NVIDIA
diff --git a/‎docs/source/guides/evaluate.md
+12-4 b/‎docs/source/guides/evaluate.md
+12-4
diff --git a/‎examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml
+1-3 b/‎examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config-reasoning.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config-reasoning.yml
+1-3
diff --git a/‎examples/email_phishing_analyzer/configs/config.yml
+1-3 b/‎examples/email_phishing_analyzer/configs/config.yml
+1-3
diff --git a/‎examples/simple/src/aiq_simple/configs/eval_config.yml
+1-3 b/‎examples/simple/src/aiq_simple/configs/eval_config.yml
+1-3
diff --git a/‎examples/simple/src/aiq_simple/configs/eval_upload_config.yml
+2-4 b/‎examples/simple/src/aiq_simple/configs/eval_upload_config.yml
+2-4
@@ -112,11 +112,19 @@ These metrics use a judge LLM for evaluating the generated output and retrieved
 llms:
   nim_rag_eval_llm:
     _type: nim
-    model_name: meta/llama-3.3-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    model_name: meta/llama-3.1-70b-instruct
+    max_tokens: 8
 ```
+For these metrics, it is recommended to use 8 tokens for the judge LLM.
+
+Evaluation is dependent on the judge LLM's ability to accurately evaluate the generated output and retrieved context. This is the leadership board for the judge LLM:
+```
+    1)- mistralai/mixtral-8x22b-instruct-v0.1
+    2)- mistralai/mixtral-8x7b-instruct-v0.1
+    3)- meta/llama-3.1-70b-instruct
+    4)- meta/llama-3.3-70b-instruct
+```
+For a complete list of up-to-date judge LLMs, refer to the [RAGAS NV metrics leadership board](https://github.com/explodinggradients/ragas/blob/main/src/ragas/metrics/_nv_metrics.py)
 
 ### Trajectory Evaluator
 This evaluator uses the intermediate steps generated by the workflow to evaluate the workflow trajectory. The evaluator configuration includes the evaluator type and any additional parameters required by the evaluator.
 
@@ -35,9 +35,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_rag_eval_large_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -51,9 +51,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -51,9 +51,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -51,9 +51,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -50,9 +50,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -51,9 +51,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -60,9 +60,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   r1_model:
     _type: nim
     model_name: deepseek-ai/deepseek-r1
 
@@ -51,9 +51,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -34,9 +34,7 @@ llms:
   nim_rag_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 6
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
 
@@ -38,10 +38,8 @@ llms:
     temperature: 0.0
   nim_rag_eval_llm:
     _type: nim
-    model_name: meta/llama-3.3-70b-instruct
-    temperature: 0.0000001
-    top_p: 0.0001
-    max_tokens: 2
+    model_name: meta/llama-3.1-70b-instruct
+    max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct