Added evaluation to documentation

ipa-lab · andreashappe · Aug 27, 2025 · Jul 16, 2024 · Sep 3, 2024 · Sep 3, 2024
commit e228cd8bd9903a19bb4644247d84767b7306bd96
@@ -43,6 +43,7 @@ def __init__(self, llm_handler: LLMHandler, response_handler: ResponseHandler, s
         """
         self.response_handler = response_handler
         self.schemas = {}
+        self.query_params = {}
         print(f'Name:{name}')
         self.endpoint_methods = {}
         self.filename = f"{name.lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.yaml"
@@ -119,6 +120,7 @@ def update_openapi_spec(self, resp, result, result_str):
             )
             self.schemas = self.openapi_spec["components"]["schemas"]
 
+
             # Add example and reference to the method's responses if available
             if example or reference or status_message == "No Content":
                 if path in endpoints.keys() and method.lower() not in endpoints[path].values():
@@ -143,6 +145,25 @@ def update_openapi_spec(self, resp, result, result_str):
                     # Ensure uniqueness of methods for each path
                     endpoint_methods[path] = list(set(endpoint_methods[path]))
 
+            # Add query parameters to the OpenAPI path item object
+            query_params_dict = self.pattern_matcher.extract_query_params(path)
+            if query_params_dict != {}:
+                query_params = query_params_dict.keys()
+                endpoints[path][method.lower()].setdefault('parameters', [])
+                for param, value in query_params.items():
+                    param_entry = {
+                        "name": param,
+                        "in": "query",
+                        "required": True,  # Change this as needed
+                        "schema": {
+                            "type": self.get_type(value)  # Adjust the type based on actual data type
+                        }
+                    }
+                    endpoints[path][method.lower()]['parameters'].append(param_entry)
+                    if path not in self.query_params.keys():
+                        self.query_params[path] = []
+                    self.query_params[path].append(param)
+
         return list(self.openapi_spec["endpoints"].keys())
 
     def write_openapi_to_yaml(self):
@@ -213,18 +234,16 @@ def found_all_endpoints(self):
         else:
             return True
 
-    def match_patterns(self, path):
-        if bool(re.search(r"/\d+", path)):
-            path = re.sub(r"/\d+", "/:id", path)
-
-        # Check if the path matches the pattern
-        if re.match(r"^/api/books/\d+/characters\?page=\d+$", path):
-            path = re.sub(r"(?<=page=)\d+", ":id", path)
-            pattern = r"^characters\?page=\d+&pageSize=\d+$"
-        pattern = r"^characters\?page=\d+&pageSize=\d+$"
-        # Check if the pattern matches the entire string
-        if re.match(pattern, path ):
-                updated_path = re.sub(r"(page=)\d+", r"\1{page}", path)
-                updated_path = re.sub(r"(pageSize=)\d+", r"\1{pagesize}", updated_path)
-
-        return path
+
+
+    def get_type(self, value):
+        def is_double(s):
+            # Matches numbers like -123.456, +7.890, and excludes integers
+            return re.fullmatch(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?", s) is not None
+        if value.isdigit():
+            return "integer"
+        elif is_double(value):
+            return "double"
+        else:
+            return "string"
+
@@ -54,6 +54,14 @@ def replace_according_to_pattern(self, path):
             return self.replace_parameters(path)
         return path
 
+    def extract_query_params(self, path):
+        # Extract query parameters from a path and return them as a dictionary
+        params = {}
+        matches = self.patterns['query_params'].findall(path)
+        for _, param, value in matches:
+            params[param] = value
+        return params
+
 
 if __name__ == "__main__":
     # Example usage
@@ -67,3 +75,9 @@ def replace_according_to_pattern(self, path):
 
     print(modified_path)
     print(modified_nested_path)
+    print(f'{example_path}')
+
+    print(f'extracted parameters: {matcher.extract_query_params(example_path)}')
+    print(f'{example_nested_path}')
+
+    print(f'extracted parameters: {matcher.extract_query_params(example_nested_path)}')
@@ -289,7 +289,7 @@ def evaluate_result(self, result: Any, prompt_history: Prompt) -> Any:
     def extract_key_elements_of_response(self, raw_response: Any) ->str:
         status_code, headers, body = self.response_analyzer.parse_http_response(raw_response)
         return "Status Code: " + str(status_code) + "\nHeaders:"+ str(headers)+ "\nBody"+ str(body)
-    def evaluate_response(self, response, completion, prompt_history, log, categorized_endpoints):
+    def handle_response(self, response, completion, prompt_history, log, categorized_endpoints):
         """
         Evaluates the response to determine if it is acceptable.
 
@@ -310,6 +310,15 @@ def evaluate_response(self, response, completion, prompt_history, log, categoriz
             self.repeat_counter = 0
             self.prompt_helper.hint_for_next_round = f'Try this endpoint in the next round {next(self.common_endpoints)}'
 
+        if response.__class__.__name__ == "RecordNote":
+            prompt_history.append(tool_message(response, tool_call_id))
+            return False, prompt_history, None, None
+
+        else:
+            return self.handle_http_response(response, prompt_history, log, completion, message, categorized_endpoints, tool_call_id)
+
+
+    def handle_http_response(self, response: Any, prompt_history: Any, log: Any, completion: Any, message: Any, categorized_endpoints, tool_call_id) -> Any:
         parts = parts = [part for part in response.action.path.split("/") if part]
         if response.action.path == self.last_path or response.action.path in self.prompt_helper.unsuccessful_paths or response.action.path in self.prompt_helper.found_endpoints:
             self.prompt_helper.hint_for_next_round = f"DO not try this path {self.last_path}. You already tried this before!"
@@ -377,7 +386,6 @@ def evaluate_response(self, response, completion, prompt_history, log, categoriz
                                                                     categorized_endpoints)
             self.query_counter = 0
 
-        # Append status message to prompt history
         prompt_history.append(tool_message(status_message, tool_call_id))
 
         return is_successful, prompt_history, result, result_str
@@ -402,3 +410,4 @@ def extract_json(self, response: str) -> dict:
         except (ValueError, json.JSONDecodeError) as e:
             print(f"Error extracting JSON: {e}")
             return {}
+
@@ -227,7 +227,7 @@ def run_documentation(self, turn: int, move_type: str) -> None:
         while not is_good:
             prompt = self.prompt_engineer.generate_prompt(turn=turn, move_type=move_type,log=self._log , prompt_history=self._prompt_history, llm_handler =self.llm_handler)
             response, completion = self.llm_handler.execute_prompt(prompt=prompt)
-            is_good, self._prompt_history, result, result_str = self.response_handler.evaluate_response(response, completion, self._prompt_history, self._log, self.categorized_endpoints)
+            is_good, self._prompt_history, result, result_str = self.response_handler.handle_response(response, completion, self._prompt_history, self._log, self.categorized_endpoints)
             if result == None:
                 continue
             self._prompt_history, self.prompt_engineer = self.documentation_handler.document_response(
@@ -238,16 +238,10 @@ def run_documentation(self, turn: int, move_type: str) -> None:
                 is_good = True
                 self.all_steps_done = True
 
-            # Use evaluator to record routes and parameters found
-            #routes_found = self.all_http_methods_found(turn)
-            #query_params_found = self.evaluator.all_query_params_found(turn)  # This function should return the number found
-            #false_positives = self.evaluator.check_false_positives(response)  # Define this function to determine FP count
+            self.evaluator.evaluate_response(turn, response, self.prompt_engineer.prompt_helper.found_endpoints)
 
-            # Record these results in the evaluator
-            #self.evaluator.results["routes_found"].append(routes_found)
-            #self.evaluator.results["query_params_found"].append(query_params_found)
-            #self.evaluator.results["false_positives"].append(false_positives)
-       # self.finalize_documentation_metrics()
+
+        self.finalize_documentation_metrics()
 
         self.all_http_methods_found(turn)
 

@@ -1,5 +1,10 @@
+from hackingBuddyGPT.usecases.web_api_testing.documentation.pattern_matcher import PatternMatcher
+
+
 class Evaluator:
-    def __init__(self, num_runs=10, config:str=""):
+    def __init__(self, num_runs=10, config=None):
+        self.pattern_matcher = PatternMatcher()
+        self.documented_query_params = config.get("query_params")
         self.num_runs = num_runs
         self.get_routes_documented = 20  # Example documented GET routes
         self.query_params_documented = 12  # Example documented query parameters
@@ -13,11 +18,11 @@ def calculate_metrics(self):
         Calculate evaluation metrics based on the simulated runs.
         """
         # Average percentages of documented routes and parameters found
-        avg_routes_found = sum(self.results["routes_found"]) / self.num_runs
-        avg_query_params_found = sum(self.results["query_params_found"]) / self.num_runs
+        routes_found = len(self.results["routes_found"])
+        query_params_found = len(self.results["query_params_found"])
 
-        percent_routes_found = (avg_routes_found / self.get_routes_documented) * 100
-        percent_params_found = (avg_query_params_found / self.query_params_documented) * 100
+        percent_routes_found = (routes_found / self.get_routes_documented) * 100
+        percent_params_found = (query_params_found / self.query_params_documented) * 100
 
         # Average false positives
         avg_false_positives = sum(self.results["false_positives"]) / self.num_runs
@@ -38,7 +43,7 @@ def calculate_metrics(self):
 
         return metrics
 
-    def check_false_positives(self, response):
+    def check_false_positives(self, path):
         """
         Identify and count false positive query parameters in the response.
 
@@ -49,13 +54,11 @@ def check_false_positives(self, response):
             int: The count of false positive query parameters.
         """
         # Example list of documented query parameters
-        documented_query_params = ["user_id", "post_id", "page", "limit"]
-
         # Extract the query parameters from the response
-        response_query_params = self.extract_query_params_from_response_data(response)
+        response_query_params = self.pattern_matcher.extract_query_params(path).keys()
 
         # Identify false positives
-        false_positives = [param for param in response_query_params if param not in documented_query_params]
+        false_positives = [param for param in response_query_params if param not in self.documented_query_params]
 
         return len(false_positives)
 
@@ -72,7 +75,7 @@ def extract_query_params_from_response_data(self, response):
         # Placeholder code: Replace with actual logic to parse response and extract query parameters
         return response.get("query_params", [])
 
-    def all_query_params_found(self, turn):
+    def all_query_params_found(self, path):
         """
         Count the number of documented query parameters found in a response.
 
@@ -83,17 +86,16 @@ def all_query_params_found(self, turn):
             int: The count of documented query parameters found in this turn.
         """
         # Example list of documented query parameters
-        documented_query_params = ["user_id", "post_id", "page", "limit"]
 
         # Simulate response query parameters found (this would usually come from the response data)
-        response_query_params = self.extract_query_params_from_response(turn)
+        response_query_params = self.pattern_matcher.extract_query_params(path).keys()
 
         # Count the valid query parameters found in the response
-        valid_query_params = [param for param in response_query_params if param in documented_query_params]
+        valid_query_params = [param for param in response_query_params if param in self.documented_query_params]
 
         return len(valid_query_params)
 
-    def extract_query_params_from_response(self, turn):
+    def extract_query_params_from_response(self, path):
         """
         Extract query parameters from the response in a specific turn.
 
@@ -104,11 +106,17 @@ def extract_query_params_from_response(self, turn):
             list: A list of query parameter names found in the response.
         """
         # Placeholder code: Replace this with actual extraction logic
-        # Here, you should parse the actual API response to identify query parameters
-        example_responses = {
-            1: ["user_id", "page", "unknown_param"],
-            2: ["post_id", "limit"],
-            3: ["user_id", "limit", "extra_param"],
-        }
-        return example_responses.get(turn, [])
+        return self.pattern_matcher.extract_query_params(path).keys()
+
+    def evaluate_response(self, turn, response, routes_found):
+        # Use evaluator to record routes and parameters found
+        if response.__class__.__name__ != "RecordNote":
+            path = response.action.path
+            query_params_found = self.all_query_params_found(path)  # This function should return the number found
+            false_positives = self.check_false_positives(path)  # Define this function to determine FP count
+
+            # Record these results in the evaluator
+            self.results["routes_found"].append(routes_found)
+            self.results["query_params_found"].append(query_params_found)
+            self.results["false_positives"].append(false_positives)