Skip to content
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ series = {ESEC/FSE 2023}

# Example runs

- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/history_notes.md)
- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/docs/history_notes.md)

## updated version using GPT-4

Expand All @@ -51,11 +51,11 @@ This happened during a recent run:

Some things to note:

- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.

In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.
- initially the current configuration is output. Yay, so many colors!
- "Got command from LLM" shows the generated command while the panel afterwards has the given command as title and the command's output as content.
- the tabel contains all executed commands. ThinkTime denotes the time that was needed to generate the command (Tokens show the token count for the prompt and its response). StateUpdTime shows the time that was needed to generate a new state (the next column also gives the token count)
- "What does the LLM know about the system?" gives an LLM generated list of system facts. To generate it, it is given the latest executed command (and it's output) as well as the current list of system facts. This is the operation which time/token usage is shown in the overview table as StateUpdTime/StateUpdTokens. As the state update takes forever, this is disabled by default and has to be enabled through a command line switch.
- Then the next round starts. The next given command (`sudo tar`) will lead to a pwn'd system BTW.

## High-Level Description

Expand Down
73 changes: 73 additions & 0 deletions args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import argparse
import json
import os

from dataclasses import dataclass
from dotenv import load_dotenv
from llms.llm_connection import get_potential_llm_connections

@dataclass
class ConfigTarget:
ip : str = None
hostname : str = None
user : str = None
password : str = None
os : str = None
hint : str = None

@dataclass
class Config:
enable_explanation : bool = False
enable_update_state : bool = False

target : ConfigTarget = None

log : str = ':memory:'
max_rounds : int = 10
llm_connection : str = None
llm_server_base_url : str = None
model : str = None
context_size : int = 4096
tag : str = None

def parse_args_and_env(console) -> Config:
# setup dotenv
load_dotenv()

# perform argument parsing
# for defaults we are using .env but allow overwrite through cli arguments
parser = argparse.ArgumentParser(description='Run an LLM vs a SSH connection.')
parser.add_argument('--enable-explanation', help="let the LLM explain each round's result", action="store_true")
parser.add_argument('--enable-update-state', help='ask the LLM to keep a multi-round state with findings', action="store_true")
parser.add_argument('--log', type=str, help='sqlite3 db for storing log files', default=os.getenv("LOG_DESTINATION") or ':memory:')
parser.add_argument('--target-ip', type=str, help='ssh hostname to use to connect to target system', default=os.getenv("TARGET_IP") or '127.0.0.1')
parser.add_argument('--target-hostname', type=str, help='safety: what hostname to exepct at the target IP', default=os.getenv("TARGET_HOSTNAME") or "debian")
parser.add_argument('--target-user', type=str, help='ssh username to use to connect to target system', default=os.getenv("TARGET_USER") or 'lowpriv')
parser.add_argument('--target-password', type=str, help='ssh password to use to connect to target system', default=os.getenv("TARGET_PASSWORD") or 'trustno1')
parser.add_argument('--max-rounds', type=int, help='how many cmd-rounds to execute at max', default=int(os.getenv("MAX_ROUNDS")) or 10)
parser.add_argument('--llm-connection', type=str, help='which LLM driver to use', choices=get_potential_llm_connections(), default=os.getenv("LLM_CONNECTION") or "openai_rest")
parser.add_argument('--target-os', type=str, help='What is the target operating system?', choices=["linux", "windows"], default="linux")
parser.add_argument('--model', type=str, help='which LLM to use', default=os.getenv("MODEL") or "gpt-3.5-turbo")
parser.add_argument('--llm-server-base-url', type=str, help='which LLM server to use', default=os.getenv("LLM_SERVER_BASE_URL") or "https://api.openai.com")
parser.add_argument('--tag', type=str, help='tag run with string', default="")
parser.add_argument('--context-size', type=int, help='model context size to use', default=int(os.getenv("CONTEXT_SIZE")) or 4096)
parser.add_argument('--hints', type=argparse.FileType('r', encoding='latin-1'), help='json file with a hint per tested hostname', default=None)

args = parser.parse_args()
hint = get_hint(args, console)

target = ConfigTarget(args.target_ip, args.target_hostname, args.target_user, args.target_password, args.target_os, hint)

return Config(args.enable_explanation, args.enable_update_state, target, args.log, args.max_rounds, args.llm_connection, args.llm_server_base_url, args.model, args.context_size, args.tag)

def get_hint(args, console):
if args.hints:
try:
hints = json.load(args.hints)
if args.target_hostname in hints:
hint = hints[args.target_hostname]
console.print(f"[bold green]Using the following hint: '{hint}'")
return hint
except:
console.print("[yellow]Was not able to load hint file")
return None
File renamed without changes
Binary file added docs/example_run_gpt4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 14 additions & 0 deletions history_notes.md → docs/history_notes.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
## updated version using GPT-4 (approx. End of August 2023)

This happened during a recent run:

![Example wintermute run](example_run_gpt4.png)

Some things to note:

- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.

In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.

# initial version (tagged as fse23-ivr) using gpt-3.5-turbo

This happened during a recent run:
Expand Down
Binary file modified example_run_gpt4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 49 additions & 10 deletions handlers.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,65 @@
import dataclasses
import paramiko
import re

from targets.ssh import SSHHostConn

def handle_cmd(conn, input):
result, gotRoot = conn.run(input["cmd"])
return input["cmd"], result, gotRoot
cmd = cmd_output_fixer(input)
result, gotRoot = conn.run(cmd)
return cmd, result, gotRoot


def handle_ssh(target_host, target_hostname, input):
user = input["username"]
password = input["password"]
def handle_ssh(target, input):
cmd_parts = input.split(" ")
assert(cmd_parts[0] == "test_credentials")

cmd = f"test_credentials {user} {password}\n"
if len(cmd_parts) != 3:
return input, "didn't provide username/password", False

test = SSHHostConn(target_host, target_hostname, user, password)
test_target = dataclasses.replace(target, user=cmd_parts[1], password=cmd_parts[2])
test = SSHHostConn(test_target)
try:
test.connect()
user = test.run("whoami")[0].strip('\n\r ')
if user == "root":
return cmd, "Login as root was successful\n", True
return input, "Login as root was successful\n", True
else:
return cmd, "Authentication successful, but user is not root\n", False
return input, "Authentication successful, but user is not root\n", False

except paramiko.ssh_exception.AuthenticationException:
return cmd, "Authentication error, credentials are wrong\n", False
return input, "Authentication error, credentials are wrong\n", False


def remove_wrapping_characters(cmd, wrappers):
if cmd[0] == cmd[-1] and cmd[0] in wrappers:
print("will remove a wrapper from: " + cmd)
return remove_wrapping_characters(cmd[1:-1], wrappers)
return cmd

# often the LLM produces a wrapped command
def cmd_output_fixer(cmd):

if len(cmd) < 2:
return cmd

stupidity = re.compile(r"^[ \n\r]*```.*\n(.*)\n```$", re.MULTILINE)
result = stupidity.search(cmd)
if result:
print("this would have been captured by the multi-line regex 1")
cmd = result.group(1)
print("new command: " + cmd)
stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)
result = stupidity.search(cmd)
if result:
print("this would have been captured by the multi-line regex 2")
cmd = result.group(1)
print("new command: " + cmd)
stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)

cmd = remove_wrapping_characters(cmd, "`'\"")

if cmd.startswith("$ "):
cmd = cmd[2:]

return cmd
101 changes: 0 additions & 101 deletions helper.py

This file was deleted.

Loading