ipa-lab · andreashappe · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
@@ -0,0 +1,70 @@
+# Copilot Instructions for hackingBuddyGPT
+
+## Project Summary
+
+hackingBuddyGPT is a research-driven Python framework that helps security researchers and penetration testers use Large Language Models (LLMs) to automate and experiment with security testing, especially privilege escalation and web/API pentesting. It supports both local shell and SSH connections to targets, and is designed for rapid prototyping of new agent-based use cases. **Warning:** This tool executes real commands on live systems—use only in safe, isolated environments.
+
+## Tech Stack
+- **Language:** Python 3.10+
+- **Core dependencies:** See `pyproject.toml` (notable: `fabric`, `requests`, `pydantic`, `pytest`)
+- **CLI Entrypoint:** `wintermute` (see `src/hackingBuddyGPT/cli/wintermute.py`)
+- **Web viewer:** Optional, for log viewing (`wintermute Viewer`)
+- **RAG/Knowledge base:** Markdown files in `rag/`
+- **Container/VM orchestration:** Bash scripts in `scripts/`, Ansible playbooks (`tasks.yaml`)
+
+## Project Structure
+- `src/hackingBuddyGPT/` — Main Python package
+  - `cli/` — CLI entrypoint (`wintermute.py`)
+  - `capabilities/` — Modular agent actions (e.g., SSH, HTTP, note-taking)
+  - `usecases/` — Agent logic for each use case (Linux privesc, web, API, etc.)
+  - `utils/` — Shared helpers (LLM, logging, config, prompt generation)
+- `tests/` — Pytest-based unit and integration tests
+- `scripts/` — Setup, orchestration, and run scripts for Mac, Codespaces, and containers
+- `rag/` — Markdown knowledge base for RAG (GTFOBins, HackTricks)
+- `docs/` — Minimal, see https://docs.hackingbuddy.ai for full docs
+
+## Setup & Usage
+- **Python:** Use 3.10+ (see `pyproject.toml`).
+- **Install:**
+  ```bash
+  python -m venv venv
+  source venv/bin/activate
+  pip install -e .
+  ```
+- **Run:**
+  - List use cases: `python src/hackingBuddyGPT/cli/wintermute.py`
+  - Example: `python src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc --llm.api_key=... --conn=ssh ...`
+  - See `README.md`, `MAC.md`, `CODESPACES.md` for platform-specific instructions.
+- **Testing:** `pip install '.[testing]' && pytest`
+- **Linting:** `ruff` (config in `pyproject.toml`)
+- **Container/VM setup:** Use scripts in `scripts/` (see comments in each script for prerequisites and usage).
+
+## Coding Guidelines
+- Follow PEP8 and use `ruff` for linting (see `[tool.ruff]` in `pyproject.toml`).
+- Use type hints and docstrings for all public functions/classes.
+- Place new agent logic in `usecases/`, new capabilities in `capabilities/`.
+- Prefer composition (capabilities, helpers) over inheritance.
+- Use the logging utilities in `utils/logging.py`.
+- Document all new scripts and major changes in the `README.md` or relevant `.md` files.
+- Mark all workarounds or hacks with `HACK`, `TODO`, or `FIXME`.
+
+## Existing Tools & Resources
+- **Documentation:** https://docs.hackingbuddy.ai
+- **Community/Support:** Discord link in `README.md`
+- **Security Policy:** See `SECURITY.md`
+- **Code of Conduct:** See `CODE_OF_CONDUCT.md`
+- **Contribution Guide:** See `CONTRIBUTING.md`
+- **Citations:** See `CITATION.cff`
+- **Benchmarks:** https://github.com/ipa-lab/benchmark-privesc-linux
+
+## Tips to Minimize Bash/Build Failures
+- Always use the provided scripts for environment/container setup; do not run ad-hoc commands unless necessary.
+- Ensure Bash version 4+ (Mac: install via Homebrew).
+- Use virtual environments for Python dependencies.
+- For Codespaces/Mac, follow the step-by-step guides in `CODESPACES.md` and `MAC.md`.
+- Never expose the web viewer to the public internet.
+- Always set API keys and credentials in `.env` or as prompted by scripts.
+- For RAG, add new markdown files to the appropriate `rag/` subfolder.
+
+---
+For further details, see the `README.md` and https://docs.hackingbuddy.ai. When in doubt, prefer existing patterns and scripts over inventing new ones.
@@ -26,9 +26,10 @@ scripts/mac_ansible_id_rsa
 scripts/mac_ansible_id_rsa.pub
 .aider*
 
-src/hackingBuddyGPT/usecases/web_api_testing/documentation/openapi_spec/
 src/hackingBuddyGPT/usecases/web_api_testing/documentation/reports/
 src/hackingBuddyGPT/usecases/web_api_testing/retrieve_spotify_token.py
 config/my_configs/*
 config/configs/*
-config/configs/
+config/configs/
+
+src/hackingBuddyGPT/usecases/web_api_documentation/openapi_spec/
@@ -1,4 +1,5 @@
 from .web import *
+from .web_api_documentation import *
 from .web_api_testing import *
 from .viewer import *
 from .minimal_linux_privesc import *

@@ -0,0 +1 @@
+from .simple_openapi_documentation import SimpleWebAPIDocumentation
@@ -1,7 +1,6 @@
 import copy
-from itertools import chain
 
-from hackingBuddyGPT.usecases.web_api_testing.documentation.pattern_matcher import PatternMatcher
+from hackingBuddyGPT.utils.web_api.pattern_matcher import PatternMatcher
 
 
 class Evaluator:

@@ -1,21 +1,22 @@
+import copy
+import json
 import os
 import re
 from collections import defaultdict
 from datetime import datetime
+from typing import Any, Dict, Optional, Tuple
 import yaml
 from hackingBuddyGPT.capabilities.yamlFile import YAMLFile
-from hackingBuddyGPT.usecases.web_api_testing.documentation.pattern_matcher import PatternMatcher
+from hackingBuddyGPT.utils.web_api.pattern_matcher import PatternMatcher
 from hackingBuddyGPT.utils.prompt_generation.information import PromptStrategy
-from hackingBuddyGPT.usecases.web_api_testing.response_processing import ResponseHandler
-from hackingBuddyGPT.usecases.web_api_testing.utils import LLMHandler
+from hackingBuddyGPT.utils.web_api.llm_handler import LLMHandler
 
 
 class OpenAPISpecificationHandler(object):
     """
     Handles the generation and updating of an OpenAPI specification document based on dynamic API responses.
 
     Attributes:
-        response_handler (object): An instance of the response handler for processing API responses.
         schemas (dict): A dictionary to store API schemas.
         filename (str): The filename for the OpenAPI specification file.
         openapi_spec (dict): The OpenAPI specification document structure.
@@ -26,18 +27,16 @@ class OpenAPISpecificationHandler(object):
         _capabilities (dict): A dictionary to store capabilities related to YAML file handling.
     """
 
-    def __init__(self, llm_handler: LLMHandler, response_handler: ResponseHandler, strategy: PromptStrategy, url: str,
+    def __init__(self, llm_handler: LLMHandler, strategy: PromptStrategy, url: str,
                  description: str, name: str) -> None:
         """
         Initializes the handler with a template OpenAPI specification.
 
         Args:
             llm_handler (object): An instance of the LLM handler for interacting with the LLM.
-            response_handler (object): An instance of the response handler for processing API responses.
             strategy (PromptStrategy): An instance of the PromptStrategy class.
         """
         self.unsuccessful_methods = {}
-        self.response_handler = response_handler
         self.schemas = {}
         self.query_params = {}
         self.endpoint_methods = {}
@@ -103,6 +102,143 @@ def is_partial_match(self, element, string_list):
 
         return False
 
+    def parse_http_response_to_openapi_example(
+            self, openapi_spec: Dict[str, Any], http_response: str, path: str, method: str
+    ) -> Tuple[Optional[Dict[str, Any]], Optional[str], Dict[str, Any]]:
+        """
+        Parses an HTTP response to generate an OpenAPI example.
+
+        Args:
+            openapi_spec (Dict[str, Any]): The OpenAPI specification to update.
+            http_response (str): The HTTP response to parse.
+            path (str): The API path.
+            method (str): The HTTP method.
+
+        Returns:
+            Tuple[Optional[Dict[str, Any]], Optional[str], Dict[str, Any]]: A tuple containing the entry dictionary, reference, and updated OpenAPI specification.
+        """
+
+        headers, body = http_response.split("\r\n\r\n", 1)
+        try:
+            body_dict = json.loads(body)
+        except json.decoder.JSONDecodeError:
+            return None, None, openapi_spec
+
+        reference, object_name, openapi_spec = self.parse_http_response_to_schema(openapi_spec, body_dict, path)
+        entry_dict = {}
+        old_body_dict = copy.deepcopy(body_dict)
+
+        if len(body_dict) == 1 and "data" not in body_dict:
+            entry_dict["id"] = body_dict
+            self.llm_handler._add_created_object(entry_dict, object_name)
+        else:
+            if "data" in body_dict:
+                body_dict = body_dict["data"]
+                if isinstance(body_dict, list) and len(body_dict) > 0:
+                    body_dict = body_dict[0]
+                    if isinstance(body_dict, list):
+                        for entry in body_dict:
+                            key = entry.get("title") or entry.get("name") or entry.get("id")
+                            entry_dict[key] = {"value": entry}
+                            self.llm_handler._add_created_object(entry_dict[key], object_name)
+                            if len(entry_dict) > 3:
+                                break
+
+
+            if isinstance(body_dict, list) and len(body_dict) > 0:
+                body_dict = body_dict[0]
+                if isinstance(body_dict, list):
+
+                    for entry in body_dict:
+                        key = entry.get("title") or entry.get("name") or entry.get("id")
+                        entry_dict[key] = entry
+                        self.llm_handler._add_created_object(entry_dict[key], object_name)
+                        if len(entry_dict) > 3:
+                            break
+            else:
+                if isinstance(body_dict, list) and len(body_dict) == 0:
+                    entry_dict = ""
+                elif isinstance(body_dict, dict) and "data" in body_dict.keys():
+                    entry_dict = body_dict["data"]
+                    if isinstance(entry_dict, list) and len(entry_dict) > 0:
+                        entry_dict = entry_dict[0]
+                else:
+                    entry_dict= body_dict
+                self.llm_handler._add_created_object(entry_dict, object_name)
+        if isinstance(old_body_dict, dict) and len(old_body_dict.keys()) > 0 and "data" in old_body_dict.keys() and isinstance(old_body_dict, dict) \
+                and isinstance(entry_dict, dict):
+            old_body_dict.pop("data")
+            entry_dict = {**entry_dict, **old_body_dict}
+
+
+        return entry_dict, reference, openapi_spec
+
+    def parse_http_response_to_schema(
+            self, openapi_spec: Dict[str, Any], body_dict: Dict[str, Any], path: str
+    ) -> Tuple[str, str, Dict[str, Any]]:
+        """
+        Parses an HTTP response body to generate an OpenAPI schema.
+
+        Args:
+            openapi_spec (Dict[str, Any]): The OpenAPI specification to update.
+            body_dict (Dict[str, Any]): The HTTP response body as a dictionary or list.
+            path (str): The API path.
+
+        Returns:
+            Tuple[str, str, Dict[str, Any]]: A tuple containing the reference, object name, and updated OpenAPI specification.
+        """
+        if "/" not in path:
+            return None, None, openapi_spec
+
+        object_name = path.split("/")[1].capitalize().rstrip("s")
+        properties_dict = {}
+
+        # Handle different structures of `body_dict`
+        if isinstance(body_dict, dict):
+            for key, value in body_dict.items():
+                # If it's a nested dictionary, extract keys recursively
+                properties_dict = self.extract_keys(key, value, properties_dict)
+
+        elif isinstance(body_dict, list) and len(body_dict) > 0:
+            first_item = body_dict[0]
+            if isinstance(first_item, dict):
+                for key, value in first_item.items():
+                    properties_dict = self.extract_keys(key, value, properties_dict)
+
+        # Create the schema object for this response
+        object_dict = {"type": "object", "properties": properties_dict}
+
+        # Add the schema to OpenAPI spec if not already present
+        if object_name not in openapi_spec["components"]["schemas"]:
+            openapi_spec["components"]["schemas"][object_name] = object_dict
+
+        reference = f"#/components/schemas/{object_name}"
+        return reference, object_name, openapi_spec
+
+    def extract_keys(self, key: str, value: Any, properties_dict: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Extracts and formats the keys and values from a dictionary to generate OpenAPI properties.
+
+        Args:
+            key (str): The key in the dictionary.
+            value (Any): The value associated with the key.
+            properties_dict (Dict[str, Any]): The dictionary to store the extracted properties.
+
+        Returns:
+            Dict[str, Any]: The updated properties dictionary.
+        """
+        if key == "id":
+            properties_dict[key] = {
+                "type": str(type(value).__name__),
+                "format": "uuid",
+                "example": str(value),
+            }
+        else:
+            properties_dict[key] = {"type": str(type(value).__name__), "example": str(value)}
+
+        return properties_dict
+
+
     def update_openapi_spec(self, resp, result, prompt_engineer):
         """
         Updates the OpenAPI specification based on the API response provided.
@@ -156,7 +292,7 @@ def update_openapi_spec(self, resp, result, prompt_engineer):
                 return list(self.openapi_spec["endpoints"].keys())
 
             # Parse the response into OpenAPI example and reference
-            example, reference, self.openapi_spec = self.response_handler.parse_http_response_to_openapi_example(
+            example, reference, self.openapi_spec = self.parse_http_response_to_openapi_example(
                 self.openapi_spec, result, path, method
             )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .simple_openapi_documentation import SimpleWebAPIDocumentation