Skip to content

Commit 494e7cd

Browse files
authored
Merge pull request #8 from ipa-lab/v6
V6
2 parents 5fda594 + f6edf78 commit 494e7cd

18 files changed

+309
-297
lines changed

‎README.md‎

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ series = {ESEC/FSE 2023}
4141

4242
# Example runs
4343

44-
- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/history_notes.md)
44+
- more can be seen at [history notes](https://github.com/ipa-lab/hackingBuddyGPT/blob/v3/docs/history_notes.md)
4545

4646
## updated version using GPT-4
4747

@@ -51,11 +51,11 @@ This happened during a recent run:
5151

5252
Some things to note:
5353

54-
- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
55-
- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
56-
- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.
57-
58-
In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.
54+
- initially the current configuration is output. Yay, so many colors!
55+
- "Got command from LLM" shows the generated command while the panel afterwards has the given command as title and the command's output as content.
56+
- the tabel contains all executed commands. ThinkTime denotes the time that was needed to generate the command (Tokens show the token count for the prompt and its response). StateUpdTime shows the time that was needed to generate a new state (the next column also gives the token count)
57+
- "What does the LLM know about the system?" gives an LLM generated list of system facts. To generate it, it is given the latest executed command (and it's output) as well as the current list of system facts. This is the operation which time/token usage is shown in the overview table as StateUpdTime/StateUpdTokens. As the state update takes forever, this is disabled by default and has to be enabled through a command line switch.
58+
- Then the next round starts. The next given command (`sudo tar`) will lead to a pwn'd system BTW.
5959

6060
## High-Level Description
6161

‎args.py‎

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import argparse
2+
import json
3+
import os
4+
5+
from dataclasses import dataclass
6+
from dotenv import load_dotenv
7+
from llms.llm_connection import get_potential_llm_connections
8+
9+
@dataclass
10+
class ConfigTarget:
11+
ip : str = None
12+
hostname : str = None
13+
user : str = None
14+
password : str = None
15+
os : str = None
16+
hint : str = None
17+
18+
@dataclass
19+
class Config:
20+
enable_explanation : bool = False
21+
enable_update_state : bool = False
22+
23+
target : ConfigTarget = None
24+
25+
log : str = ':memory:'
26+
max_rounds : int = 10
27+
llm_connection : str = None
28+
llm_server_base_url : str = None
29+
model : str = None
30+
context_size : int = 4096
31+
tag : str = None
32+
33+
def parse_args_and_env(console) -> Config:
34+
# setup dotenv
35+
load_dotenv()
36+
37+
# perform argument parsing
38+
# for defaults we are using .env but allow overwrite through cli arguments
39+
parser = argparse.ArgumentParser(description='Run an LLM vs a SSH connection.')
40+
parser.add_argument('--enable-explanation', help="let the LLM explain each round's result", action="store_true")
41+
parser.add_argument('--enable-update-state', help='ask the LLM to keep a multi-round state with findings', action="store_true")
42+
parser.add_argument('--log', type=str, help='sqlite3 db for storing log files', default=os.getenv("LOG_DESTINATION") or ':memory:')
43+
parser.add_argument('--target-ip', type=str, help='ssh hostname to use to connect to target system', default=os.getenv("TARGET_IP") or '127.0.0.1')
44+
parser.add_argument('--target-hostname', type=str, help='safety: what hostname to exepct at the target IP', default=os.getenv("TARGET_HOSTNAME") or "debian")
45+
parser.add_argument('--target-user', type=str, help='ssh username to use to connect to target system', default=os.getenv("TARGET_USER") or 'lowpriv')
46+
parser.add_argument('--target-password', type=str, help='ssh password to use to connect to target system', default=os.getenv("TARGET_PASSWORD") or 'trustno1')
47+
parser.add_argument('--max-rounds', type=int, help='how many cmd-rounds to execute at max', default=int(os.getenv("MAX_ROUNDS")) or 10)
48+
parser.add_argument('--llm-connection', type=str, help='which LLM driver to use', choices=get_potential_llm_connections(), default=os.getenv("LLM_CONNECTION") or "openai_rest")
49+
parser.add_argument('--target-os', type=str, help='What is the target operating system?', choices=["linux", "windows"], default="linux")
50+
parser.add_argument('--model', type=str, help='which LLM to use', default=os.getenv("MODEL") or "gpt-3.5-turbo")
51+
parser.add_argument('--llm-server-base-url', type=str, help='which LLM server to use', default=os.getenv("LLM_SERVER_BASE_URL") or "https://api.openai.com")
52+
parser.add_argument('--tag', type=str, help='tag run with string', default="")
53+
parser.add_argument('--context-size', type=int, help='model context size to use', default=int(os.getenv("CONTEXT_SIZE")) or 4096)
54+
parser.add_argument('--hints', type=argparse.FileType('r', encoding='latin-1'), help='json file with a hint per tested hostname', default=None)
55+
56+
args = parser.parse_args()
57+
hint = get_hint(args, console)
58+
59+
target = ConfigTarget(args.target_ip, args.target_hostname, args.target_user, args.target_password, args.target_os, hint)
60+
61+
return Config(args.enable_explanation, args.enable_update_state, target, args.log, args.max_rounds, args.llm_connection, args.llm_server_base_url, args.model, args.context_size, args.tag)
62+
63+
def get_hint(args, console):
64+
if args.hints:
65+
try:
66+
hints = json.load(args.hints)
67+
if args.target_hostname in hints:
68+
hint = hints[args.target_hostname]
69+
console.print(f"[bold green]Using the following hint: '{hint}'")
70+
return hint
71+
except:
72+
console.print("[yellow]Was not able to load hint file")
73+
return None
File renamed without changes.

‎docs/example_run_gpt4.png‎

130 KB
Loading

‎history_notes.md‎ renamed to ‎docs/history_notes.md‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
## updated version using GPT-4 (approx. End of August 2023)
2+
3+
This happened during a recent run:
4+
5+
![Example wintermute run](example_run_gpt4.png)
6+
7+
Some things to note:
8+
9+
- the panel labeled 'my new fact list' is generated by the LLM. After each command execution we give the LLM it's current fact list, the executed command, and its output and ask it to generate a new concise fact list.
10+
- the tabel contains all executed commands. The columns 'success?' and 'reason' are populate by asking the LLM if the executed comamnd (and its output) help with getting root access as well as to reason about the commands output
11+
- in the bottom you see the last executed command (`/tmp/bash -p`) and it's output.
12+
13+
In this case GPT-4 wanted to exploit a vulnerable cron script (to which it had write access), sadly I forgot to enable cron in the VM.
14+
115
# initial version (tagged as fse23-ivr) using gpt-3.5-turbo
216

317
This happened during a recent run:

‎example_run_gpt4.png‎

209 KB
Loading

‎handlers.py‎

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,65 @@
1+
import dataclasses
12
import paramiko
3+
import re
24

35
from targets.ssh import SSHHostConn
46

57
def handle_cmd(conn, input):
6-
result, gotRoot = conn.run(input["cmd"])
7-
return input["cmd"], result, gotRoot
8+
cmd = cmd_output_fixer(input)
9+
result, gotRoot = conn.run(cmd)
10+
return cmd, result, gotRoot
811

912

10-
def handle_ssh(target_host, target_hostname, input):
11-
user = input["username"]
12-
password = input["password"]
13+
def handle_ssh(target, input):
14+
cmd_parts = input.split(" ")
15+
assert(cmd_parts[0] == "test_credentials")
1316

14-
cmd = f"test_credentials {user} {password}\n"
17+
if len(cmd_parts) != 3:
18+
return input, "didn't provide username/password", False
1519

16-
test = SSHHostConn(target_host, target_hostname, user, password)
20+
test_target = dataclasses.replace(target, user=cmd_parts[1], password=cmd_parts[2])
21+
test = SSHHostConn(test_target)
1722
try:
1823
test.connect()
1924
user = test.run("whoami")[0].strip('\n\r ')
2025
if user == "root":
21-
return cmd, "Login as root was successful\n", True
26+
return input, "Login as root was successful\n", True
2227
else:
23-
return cmd, "Authentication successful, but user is not root\n", False
28+
return input, "Authentication successful, but user is not root\n", False
2429

2530
except paramiko.ssh_exception.AuthenticationException:
26-
return cmd, "Authentication error, credentials are wrong\n", False
31+
return input, "Authentication error, credentials are wrong\n", False
32+
33+
34+
def remove_wrapping_characters(cmd, wrappers):
35+
if cmd[0] == cmd[-1] and cmd[0] in wrappers:
36+
print("will remove a wrapper from: " + cmd)
37+
return remove_wrapping_characters(cmd[1:-1], wrappers)
38+
return cmd
39+
40+
# often the LLM produces a wrapped command
41+
def cmd_output_fixer(cmd):
42+
43+
if len(cmd) < 2:
44+
return cmd
45+
46+
stupidity = re.compile(r"^[ \n\r]*```.*\n(.*)\n```$", re.MULTILINE)
47+
result = stupidity.search(cmd)
48+
if result:
49+
print("this would have been captured by the multi-line regex 1")
50+
cmd = result.group(1)
51+
print("new command: " + cmd)
52+
stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)
53+
result = stupidity.search(cmd)
54+
if result:
55+
print("this would have been captured by the multi-line regex 2")
56+
cmd = result.group(1)
57+
print("new command: " + cmd)
58+
stupidity = re.compile(r"^[ \n\r]*~~~.*\n(.*)\n~~~$", re.MULTILINE)
59+
60+
cmd = remove_wrapping_characters(cmd, "`'\"")
61+
62+
if cmd.startswith("$ "):
63+
cmd = cmd[2:]
64+
65+
return cmd

‎helper.py‎

Lines changed: 0 additions & 101 deletions
This file was deleted.

0 commit comments

Comments
 (0)