ipa-lab · andreashappe · Oct 5, 2023 · Sep 21, 2023 · Sep 21, 2023 · Sep 21, 2023
@@ -41,7 +41,7 @@ series = {ESEC/FSE 2023}
 
 # Example runs
 
-- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/history_notes.md)
+- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/docs/history_notes.md)
 
 ## updated version using GPT-4
 
@@ -51,11 +51,11 @@ This happened during a recent run:
 
 Some things to note:
 
-- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
-- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
-- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.
-
-In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.
+- initially the current configuration is output. Yay, so many colors!
+- "Got command from LLM" shows the generated command while the panel afterwards has the given command as title and the command's output as content.
+- the tabel contains all executed commands. ThinkTime denotes the time that was needed to generate the command (Tokens show the token count for the prompt and its response). StateUpdTime shows the time that was needed to generate a new state (the next column also gives the token count)
+- "What does the LLM know about the system?" gives an LLM generated list of system facts. To generate it, it is given the latest executed command (and it's output) as well as the current list of system facts. This is the operation which time/token usage is shown in the overview table as StateUpdTime/StateUpdTokens. As the state update takes forever, this is disabled by default and has to be enabled through a command line switch.
+- Then the next round starts. The next given command (`sudo tar`) will lead to a pwn'd system BTW.
 
 ## High-Level Description
 

@@ -0,0 +1,73 @@
+import argparse
+import json
+import os
+
+from dataclasses import dataclass
+from dotenv import load_dotenv
+from llms.llm_connection import get_potential_llm_connections
+
+@dataclass
+class ConfigTarget:
+    ip : str = None
+    hostname : str = None
+    user : str = None
+    password : str = None
+    os : str = None
+    hint : str = None
+
+@dataclass
+class Config:
+    enable_explanation : bool = False
+    enable_update_state : bool = False
+
+    target : ConfigTarget = None
+
+    log : str = ':memory:'
+    max_rounds : int = 10
+    llm_connection : str = None
+    llm_server_base_url : str = None
+    model : str = None
+    context_size : int = 4096
+    tag : str = None
+
+def parse_args_and_env(console) -> Config:
+    # setup dotenv
+    load_dotenv()
+
+    # perform argument parsing
+    # for defaults we are using .env but allow overwrite through cli arguments
+    parser = argparse.ArgumentParser(description='Run an LLM vs a SSH connection.')
+    parser.add_argument('--enable-explanation', help="let the LLM explain each round's result", action="store_true")
+    parser.add_argument('--enable-update-state', help='ask the LLM to keep a multi-round state with findings', action="store_true")
+    parser.add_argument('--log', type=str, help='sqlite3 db for storing log files', default=os.getenv("LOG_DESTINATION") or ':memory:')
+    parser.add_argument('--target-ip', type=str, help='ssh hostname to use to connect to target system', default=os.getenv("TARGET_IP") or '127.0.0.1')
+    parser.add_argument('--target-hostname', type=str, help='safety: what hostname to exepct at the target IP', default=os.getenv("TARGET_HOSTNAME") or "debian")
+    parser.add_argument('--target-user', type=str, help='ssh username to use to connect to target system', default=os.getenv("TARGET_USER") or 'lowpriv')
+    parser.add_argument('--target-password', type=str, help='ssh password to use to connect to target system', default=os.getenv("TARGET_PASSWORD") or 'trustno1')
+    parser.add_argument('--max-rounds', type=int, help='how many cmd-rounds to execute at max', default=int(os.getenv("MAX_ROUNDS")) or 10)
+    parser.add_argument('--llm-connection', type=str, help='which LLM driver to use', choices=get_potential_llm_connections(), default=os.getenv("LLM_CONNECTION") or "openai_rest")
+    parser.add_argument('--target-os', type=str, help='What is the target operating system?', choices=["linux", "windows"], default="linux")
+    parser.add_argument('--model', type=str, help='which LLM to use', default=os.getenv("MODEL") or "gpt-3.5-turbo")
+    parser.add_argument('--llm-server-base-url', type=str, help='which LLM server to use', default=os.getenv("LLM_SERVER_BASE_URL") or "https://api.openai.com")
+    parser.add_argument('--tag', type=str, help='tag run with string', default="")
+    parser.add_argument('--context-size', type=int, help='model context size to use', default=int(os.getenv("CONTEXT_SIZE")) or 4096)
+    parser.add_argument('--hints', type=argparse.FileType('r', encoding='latin-1'), help='json file with a hint per tested hostname', default=None)
+
+    args = parser.parse_args()
+    hint = get_hint(args, console)
+
+    target = ConfigTarget(args.target_ip, args.target_hostname, args.target_user, args.target_password, args.target_os, hint)
+
+    return Config(args.enable_explanation, args.enable_update_state, target, args.log, args.max_rounds, args.llm_connection, args.llm_server_base_url, args.model, args.context_size, args.tag)
+
+def get_hint(args, console):
+    if args.hints:
+        try:
+            hints = json.load(args.hints)
+            if args.target_hostname in hints:
+                hint = hints[args.target_hostname]
+                console.print(f"[bold green]Using the following hint: '{hint}'")
+                return hint
+        except:
+            console.print("[yellow]Was not able to load hint file")
+    return None
@@ -1,3 +1,17 @@
+## updated version using GPT-4 (approx. End of August 2023)
+
+This happened during a recent run:
+
+![Example wintermute run](example_run_gpt4.png)
+
+Some things to note:
+
+- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
+- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
+- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.
+
+In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.
+
 # initial version (tagged as fse23-ivr) using gpt-3.5-turbo
 
 This happened during a recent run:

@@ -1,26 +1,65 @@
+import dataclasses
 import paramiko
+import re
 
 from targets.ssh import SSHHostConn
 
 def handle_cmd(conn, input):
-    result, gotRoot = conn.run(input["cmd"])
-    return input["cmd"], result, gotRoot
+    cmd = cmd_output_fixer(input)
+    result, gotRoot = conn.run(cmd)
+    return cmd, result, gotRoot
 
 
-def handle_ssh(target_host, target_hostname, input):
-    user = input["username"]
-    password = input["password"]
+def handle_ssh(target, input):
+    cmd_parts = input.split(" ")
+    assert(cmd_parts[0] == "test_credentials")
 
-    cmd = f"test_credentials {user} {password}\n"
+    if len(cmd_parts) != 3:
+        return input, "didn't provide username/password", False
 
-    test = SSHHostConn(target_host, target_hostname, user, password)
+    test_target = dataclasses.replace(target, user=cmd_parts[1], password=cmd_parts[2])
+    test = SSHHostConn(test_target)
     try:
         test.connect()
         user = test.run("whoami")[0].strip('\n\r ')
         if user == "root":
-            return cmd, "Login as root was successful\n", True
+            return input, "Login as root was successful\n", True
         else:
-            return cmd, "Authentication successful, but user is not root\n", False
+            return input, "Authentication successful, but user is not root\n", False
 
     except paramiko.ssh_exception.AuthenticationException:
-        return cmd, "Authentication error, credentials are wrong\n", False
+        return input, "Authentication error, credentials are wrong\n", False
+
+
+def remove_wrapping_characters(cmd, wrappers):
+    if cmd[0] == cmd[-1] and cmd[0] in wrappers:
+        print("will remove a wrapper from: " + cmd)
+        return remove_wrapping_characters(cmd[1:-1], wrappers)
+    return cmd
+
+# often the LLM produces a wrapped command
+def cmd_output_fixer(cmd):
+
+    if len(cmd) < 2:
+        return cmd
+
+    stupidity = re.compile(r"^[ \n\r]*```.*\n(.*)\n```$", re.MULTILINE)
+    result = stupidity.search(cmd)
+    if result:
+        print("this would have been captured by the multi-line regex 1")
+        cmd = result.group(1)
+        print("new command: " + cmd)
+    stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)
+    result = stupidity.search(cmd)
+    if result:
+        print("this would have been captured by the multi-line regex 2")
+        cmd = result.group(1)
+        print("new command: " + cmd)
+    stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)
+
+    cmd = remove_wrapping_characters(cmd, "`'\"")
+
+    if cmd.startswith("$ "):
+        cmd = cmd[2:]
+
+    return cmd