Added prompt file and added logging of prompt

ipa-lab · andreashappe · May 23, 2025 · May 20, 2025 · May 23, 2025 · May 23, 2025
commit bb2bd3be9767f2339f9519ab717bd0650e119f3c
@@ -42,7 +42,8 @@ def __init__(self, llm_handler: LLMHandler, response_handler: ResponseHandler, s
         self.query_params = {}
         self.endpoint_methods = {}
         self.endpoint_examples = {}
-        self.filename = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.yaml"
+        date = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+        self.filename = f"{name}_spec.yaml"
         self.openapi_spec = {
             "openapi": "3.0.0",
             "info": {
@@ -57,7 +58,7 @@ def __init__(self, llm_handler: LLMHandler, response_handler: ResponseHandler, s
         self.llm_handler = llm_handler
         current_path = os.path.dirname(os.path.abspath(__file__))
 
-        self.file_path = os.path.join(current_path, "openapi_spec", str(strategy).split(".")[1].lower(), name.lower())
+        self.file_path = os.path.join(current_path, "openapi_spec", str(strategy).split(".")[1].lower(), name.lower(), date)
         os.makedirs(self.file_path, exist_ok=True)
         self.file = os.path.join(self.file_path, self.filename)
 

@@ -960,7 +960,6 @@ def check_if_successful(self, is_successful, request_path, result_dict, result_s
                 if error_msg not in self.prompt_helper.correct_endpoint_but_some_error:
                     self.prompt_helper.correct_endpoint_but_some_error[error_msg] = []
                 self.prompt_helper.correct_endpoint_but_some_error[error_msg].append(request_path)
-                self.prompt_helper.hint_for_next_round = error_msg
             else:
                 self.prompt_helper.unsuccessful_paths.append(request_path)
                 status_message = f"{request_path} is not a correct endpoint; Reason: {error_msg}"

@@ -162,7 +162,7 @@ def _initialize_handlers(self, config, description, token, name, initial_prompt)
 
         self._prompt_engineer = PromptEngineer(
             strategy=self.strategy,
-            context=None,
+            context=PromptContext.DOCUMENTATION,
             prompt_helper=self.prompt_helper,
             open_api_spec=self._documentation_handler.openapi_spec,
             rest_api_info=(token, self.host, self._correct_endpoints, self.categorized_endpoints),

@@ -84,27 +84,15 @@ def generate_prompt(
         pass
 
     def get_documentation_steps(self):
-        """
-        Returns a predefined list of endpoint exploration steps based on the target API host.
-
-        These steps are used to guide automated documentation of a web API by progressively
-        discovering and querying endpoints using GET requests. The process follows a structured
-        hierarchy from root-level endpoints to more complex nested endpoints and those with query parameters.
-
-        Returns:
-            List[List[str]]: A list of steps, each step being a list of instruction strings.
-        """
-
-        # Define specific documentation steps based on the given strategy
-
         return [
-            [f"Objective: Identify all accessible endpoints via GET requests for {self.prompt_helper.host}. {self.prompt_helper._description}"],
+            [
+                f"Objective: Identify all accessible endpoints via GET requests for {self.prompt_helper.host}. {self.prompt_helper._description}"],
             [
                 f""" Query root-level resource endpoints.
-                               Find root-level endpoints for {self.prompt_helper.host}.
-                               Only send GET requests to root-level endpoints with a single path component after the root. This means each path should have exactly one '/' followed by a single word (e.g., '/users', '/products').  
-                               1. Send GET requests to new paths only, avoiding any in the lists above.
-                               2. Do not reuse previously tested paths."""
+                                      Find root-level endpoints for {self.prompt_helper.host}.
+                                      Only send GET requests to root-level endpoints with a single path component after the root. This means each path should have exactly one '/' followed by a single word (e.g., '/users', '/products').  
+                                      1. Send GET requests to new paths only, avoiding any in the lists above.
+                                      2. Do not reuse previously tested paths."""
 
             ],
             [
@@ -119,7 +107,6 @@ def get_documentation_steps(self):
                 "Identify subresource endpoints of the form `/resource/other_resource`.",
                 "Query these endpoints to check if they return data related to the main resource without requiring an `id` parameter."
 
-
             ],
 
             [
@@ -138,6 +125,7 @@ def get_documentation_steps(self):
                 "Construct and make GET requests to these endpoints using common query parameters (e.g. `/resource?param1=1&param2=3`) or based on documentation hints, testing until a valid request with query parameters is achieved."
             ]
         ]
+
     def extract_properties(self):
         """
            Extracts example values and data types from the 'Post' schema in the OpenAPI specification.

@@ -61,21 +61,22 @@ def generate_prompt(
             str: The generated prompt.
         """
         if self.context == PromptContext.DOCUMENTATION:
-            steps = self._get_documentation_steps(move_type=move_type, previous_prompt=previous_prompt)
+            steps = self._get_documentation_steps(move_type=move_type, previous_prompt=previous_prompt, doc_steps=self.get_documentation_steps())
         elif self.context == PromptContext.PENTESTING:
             steps = self._get_pentesting_steps(move_type=move_type)
         else:
-            steps = self._get_documentation_steps(move_type=move_type, previous_prompt=previous_prompt)
+            steps = self.parse_prompt_file()
+            steps = self._get_documentation_steps(move_type=move_type, previous_prompt=previous_prompt,
+                                                  doc_steps=steps)
 
-            #steps = self.parse_prompt_file()
 
 
         if hint:
             steps = steps + [hint]
 
         return self.prompt_helper._check_prompt(previous_prompt=previous_prompt, steps=steps)
 
-    def _get_documentation_steps(self, move_type: str, previous_prompt) -> List[str]:
+    def _get_documentation_steps(self, move_type: str, previous_prompt, doc_steps: Any) -> List[str]:
         # Extract properties and example response
         if "endpoints" in self.open_api_spec:
             properties = self.extract_properties()
@@ -106,7 +107,6 @@ def _get_documentation_steps(self, move_type: str, previous_prompt) -> List[str]
             icl_prompt = ""
 
         if move_type == "explore":
-            doc_steps = self.get_documentation_steps()
             icl = [[f"Based on this information :\n{icl_prompt}\n" + doc_steps[0][0]]]
             # if self.current_step == 0:
             #   self.current_step == 1
@@ -356,7 +356,3 @@ def get_props(self, data, result ):
         return result
 
 
-
-
-
-
@@ -50,21 +50,78 @@ def generate_prompt(
         """
         if self.context == PromptContext.DOCUMENTATION:
             self.purpose = PromptPurpose.DOCUMENTATION
-            chain_of_thought_steps = self._get_documentation_steps( [],move_type)
+            chain_of_thought_steps = self._get_documentation_steps([],move_type, self.get_documentation_steps())
             chain_of_thought_steps = [chain_of_thought_steps[0]] + [
                 "Let's think step by step"] + chain_of_thought_steps[1:]
 
         elif self.context == PromptContext.PENTESTING:
             chain_of_thought_steps = self._get_pentesting_steps(move_type,"")
         else:
-            chain_of_thought_steps = self.parse_prompt_file()
+            steps = self.parse_prompt_file()
+            chain_of_thought_steps = self._get_documentation_steps([],move_type, steps)
+
             chain_of_thought_steps = [chain_of_thought_steps[0]] + [
                 "Let's think step by step"] + chain_of_thought_steps[1:]
         if hint:
             chain_of_thought_steps.append(hint)
 
         return self.prompt_helper._check_prompt(previous_prompt=previous_prompt, steps=chain_of_thought_steps)
 
+    def get_documentation_steps(self):
+        """
+        Returns a predefined list of endpoint exploration steps based on the target API host.
+
+        These steps are used to guide automated documentation of a web API by progressively
+        discovering and querying endpoints using GET requests. The process follows a structured
+        hierarchy from root-level endpoints to more complex nested endpoints and those with query parameters.
+
+        Returns:
+            List[List[str]]: A list of steps, each step being a list of instruction strings.
+        """
+
+        # Define specific documentation steps based on the given strategy
+
+        return [
+            [f"Objective: Identify all accessible endpoints via GET requests for {self.prompt_helper.host}. {self.prompt_helper._description}"],
+            [
+                f""" Query root-level resource endpoints.
+                               Find root-level endpoints for {self.prompt_helper.host}.
+                               Only send GET requests to root-level endpoints with a single path component after the root. This means each path should have exactly one '/' followed by a single word (e.g., '/users', '/products').  
+                               1. Send GET requests to new paths only, avoiding any in the lists above.
+                               2. Do not reuse previously tested paths."""
+
+            ],
+            [
+                "Query Instance-level resource endpoint with id",
+                "Look for Instance-level resource endpoint : Identify endpoints of type `/resource/id` where id is the parameter for the id.",
+                "Query these `/resource/id` endpoints to see if an `id` parameter resolves the request successfully."
+                "Ids can be integers, longs or base62."
+
+            ],
+            [
+                "Query Subresource Endpoints",
+                "Identify subresource endpoints of the form `/resource/other_resource`.",
+                "Query these endpoints to check if they return data related to the main resource without requiring an `id` parameter."
+
+
+            ],
+
+            [
+                "Query for related resource endpoints",
+                "Identify related resource endpoints that match the format `/resource/id/other_resource`: "
+                f"First, scan for the follwoing endpoints where an `id` in the middle position and follow them by another resource identifier.",
+                "Second, look for other endpoints and query these endpoints with appropriate `id` values to determine their behavior and document responses or errors."
+            ],
+            [
+                "Query multi-level resource endpoints",
+                "Search for multi-level endpoints of type `/resource/other_resource/another_resource`: Identify any endpoints in the format with three resource identifiers.",
+                "Test requests to these endpoints, adjusting resource identifiers as needed, and analyze responses to understand any additional parameters or behaviors."
+            ],
+            [
+                "Query endpoints with query parameters",
+                "Construct and make GET requests to these endpoints using common query parameters (e.g. `/resource?param1=1&param2=3`) or based on documentation hints, testing until a valid request with query parameters is achieved."
+            ]
+        ]
 
 
     def transform_into_prompt_structure(self, test_case, purpose):

@@ -49,7 +49,7 @@ def __init__(self, context: PromptContext, prompt_helper, strategy: PromptStrate
         self.transformed_steps = {}
         self.pentest_steps = None
 
-    def _get_documentation_steps(self, common_steps: List[str], move_type: str) -> List[str]:
+    def _get_documentation_steps(self, common_steps: List[str], move_type: str, steps: Any) -> List[str]:
         """
         Provides the steps for the chain-of-thought strategy when the context is documentation.
 
@@ -61,7 +61,7 @@ def _get_documentation_steps(self, common_steps: List[str], move_type: str) -> L
             List[str]: A list of steps for the chain-of-thought strategy in the documentation context.
         """
         if move_type == "explore":
-            doc_steps = self.generate_documentation_steps()
+            doc_steps = self.generate_documentation_steps(steps)
             return self.prompt_helper.get_initial_documentation_steps(
                                                                        strategy_steps= doc_steps)
         else:

@@ -54,14 +54,17 @@ def generate_prompt(self, move_type: str, hint: Optional[str], previous_prompt:
         common_steps = self._get_common_steps()
         if self.context == PromptContext.DOCUMENTATION:
             self.purpose = PromptPurpose.DOCUMENTATION
-            tree_of_thought_steps = self._get_documentation_steps(common_steps, move_type)
+            tree_of_thought_steps = self._get_documentation_steps(common_steps, move_type, self.get_documentation_steps())
             tree_of_thought_steps = [
                                         "Imagine three experts each proposing one step at a time. If an expert realizes their step was incorrect, they leave. The question is:"] + tree_of_thought_steps
 
         elif self.context == PromptContext.PENTESTING:
             tree_of_thought_steps = self._get_pentesting_steps(move_type)
         else:
-            tree_of_thought_steps = self.parse_prompt_file()
+            steps = self.parse_prompt_file()
+
+            tree_of_thought_steps = self._get_documentation_steps(common_steps, move_type, steps)
+
 
             tree_of_thought_steps = ([
                                         "Imagine three experts each proposing one step at a time. If an expert realizes their step was incorrect, they leave. The question is:"] +
@@ -266,10 +269,10 @@ def transform_to_tree_of_thoughtx(self, prompts: Dict[str, List[List[str]]]) ->
 
         return tot_prompts
 
-
-    def generate_documentation_steps(self):
-       return [
-           [f"Objective: Identify all accessible endpoints via GET requests for {self.prompt_helper.host}. {self.prompt_helper._description}"],
+    def get_documentation_steps(self):
+        return [
+            [
+                f"Objective: Identify all accessible endpoints via GET requests for {self.prompt_helper.host}. {self.prompt_helper._description}"],
             [
                 "Start by querying root-level resource endpoints.",
                 "Focus on sending GET requests only to those endpoints that consist of a single path component directly following the root.",
@@ -282,10 +285,10 @@ def generate_documentation_steps(self):
                 "Attempt to query these endpoints to validate whether the 'id' parameter correctly retrieves individual resource instances.",
                 "Consider testing with various ID formats, such as integers, longs, or base62 encodings like '6rqhFgbbKwnb9MLmUQDhG6'."
             ],
-           ["Now, move to query Subresource Endpoints.",
-                "Identify subresource endpoints of the form `/resource/other_resource`.",
-                "Query these endpoints to check if they return data related to the main resource without requiring an `id` parameter."
-],
+            ["Now, move to query Subresource Endpoints.",
+             "Identify subresource endpoints of the form `/resource/other_resource`.",
+             "Query these endpoints to check if they return data related to the main resource without requiring an `id` parameter."
+             ],
             [
                 "Proceed to analyze related resource endpoints.",
                 "Identify patterns where a resource is associated with another through an 'id', formatted as `/resource/id/other_resource`.",
@@ -305,3 +308,5 @@ def generate_documentation_steps(self):
             ]
         ]
 
+    def generate_documentation_steps(self, steps):
+        return self.generate_documentation_steps(steps)