-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathopenai.py
328 lines (273 loc) · 11 KB
/
openai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
from __future__ import annotations
import logging
import os
import re
import shlex
import subprocess
import textwrap
import datetime
from concurrent import futures
from contextlib import redirect_stdout
from functools import partial
from pathlib import Path
from typing import Counter, Iterable
import click
logger = logging.getLogger(__name__)
logging.basicConfig()
LLM_COMMAND_PREFIX = (
f"openai api completions.create --engine=text-davinci-003 --max-tokens=2000 -p "
)
RESPONSES_PATH = "llm_responses"
REQUESTS_PATH = "llm_requests"
OUTPUTS_PATH = "python_outputs"
PUZZLE_FILE = "puzzle.md"
THRESHOLD = 2
def year_day_path(year: int, day: int) -> Path:
return Path(f"{year}-{day}")
def write_instructions(day: int, year: int) -> None:
subprocess.run(
f"aoc download --overwrite --input-file=input.txt --day={day} --year={year}",
shell=True,
capture_output=True,
check=True,
text=True,
).stdout
subprocess.run(
f"aoc read --overwrite --puzzle-file=puzzle.md --day={day} --year={year}",
shell=True,
capture_output=True,
check=True,
text=True,
).stdout
def read_instructions(day: int) -> str:
return Path(PUZZLE_FILE).read_text()
def extract_part_1(text):
text = re.sub(".*--- Day.*", "", text)
text = re.sub(".*-----", "", text)
text = re.sub("\\--- Part", "--- Part", text)
text = text.replace("\n\n\n", "\n")
text, _ = re.subn("Your puzzle answer was.*", "", text)
return text.split(r"\--- Part Two ---")[0]
def extract_part_2(text):
text = re.sub(".*--- Day.*", "", text)
text = re.sub(".*-----", "", text)
text, _ = re.subn("Your puzzle answer was.*", "", text)
# awkward way of transforming ` \--- Part Two ---` into `--- Part Two ---`
text = re.sub(r"(.*)\\(--- Part)", r"\1\2", text)
text = text.split("If you still want to see it,")[0]
text = text.split("Both parts of this puzzle are complete")[0].strip()
# There's no Part One, only a Part Two, so we add in that to make it clearer
#
# TODO: weirdly this once caused it to do much worse. Need to test that; it's
# very possible it's something else that caused it.
return f"--- Part One ---\n\n{text}"
def amend_instructions(text, part):
# Telling it it's an AoC problem seems to help slightly (I got that advice from
# GPT-Chat, no joke)
#
# Adding "difficult" seemed to help at one point, but doesn't seem to any longer
llm_prefix = (
"Here's an Advent of Code puzzle, which is a cryptic puzzle told through "
"an imaginary story. You need to write code which solves it. The description "
"includes an example of solving a simplified version of the puzzle.\n\n-----\n"
)
# "with any relevant details on the same line" seems to help (without saying the
# same line, it'll do things like print every value as it's looping).
llm_instructions = (
f"Now write python3 code which prints the correct answer"
f"{'' if part == 1 else ' to Part Two'}. "
"Print any information which supports your answer on the same line as the answer. "
"The input is in a file `input.txt`. "
"\n\n"
"```python"
)
# I also tried prompts like these, but they didn't seem to work as well:
#
# This is a difficult problem! Think about it carefully, step-by-step. If you're not \
# confident, have the python print "I'm not confident" instead.
# llm_instructions = f"""
# Write python3 code which prints the correct answer{'' if part == 1 else ' to Part Two'}, \
# as a sentence. Include any other useful numbers in the sentence. The input is in a
# file `input.txt`.
#
# It might be more difficult than it looks, so go step-by-step.
# ```python
# """
text = f"{llm_prefix}{textwrap.indent(text, ' ')}\n\n-----\n\n{llm_instructions}"
(Path(REQUESTS_PATH) / f"part-{part}.txt").write_text(text)
return shlex.quote(text)
def do_part(n: int, day: int, part: int) -> str | None:
# TODO: this part can be split up; doesn't need to be done by each process (but it's
# fast so not a big impact)
instructions = read_instructions(day)
if part == 1:
instructions = amend_instructions(extract_part_1(instructions), part=part)
elif part == 2:
instructions = amend_instructions(extract_part_2(instructions), part=part)
else:
raise ValueError
try:
llm_response = request_python_from_llm(instructions)
except Exception as e:
print(f"Run {n} didn't generate a python excerpt")
logger.info(f"Run {n} didn't generate a python excerpt")
(Path(RESPONSES_PATH) / f"part_{part}_{n}.error").write_text(str(e))
return None
(Path(RESPONSES_PATH) / f"part_{part}_{n}.py").write_text(llm_response)
try:
# Run the code, writing the output to a file
f = Path(OUTPUTS_PATH) / f"part_{part}_{n}.txt"
with redirect_stdout(f.open("w+")):
exec(llm_response)
return f.read_text()
except Exception as e:
print(f"Run {n} failed with `{e}`")
logger.info(f"Run {n} failed with `{e}`")
return None
def request_python_from_llm(instructions) -> str:
llm_command = f"{LLM_COMMAND_PREFIX} {instructions}"
llm_respose = subprocess.run(
llm_command,
shell=True,
capture_output=True,
text=True,
)
if llm_respose.returncode == 0:
try:
return llm_respose.stdout.split("```python")[1].split("```")[0]
except Exception:
raise ValueError(
f"Couldn't parse python from LLM: {llm_respose.stdout}, {llm_respose.stderr}"
)
else:
raise ValueError(
f"Error code from openai: {llm_respose.stdout}, {llm_respose.stderr}"
)
def parse_answer(text) -> Iterable[str]:
"""
Returns an iterable of the numbers from the first line
"""
if not text:
return []
# Only grab the first line. Some scripts will write lots, but most only
# write one (we ask it to only write one).
line = text.splitlines()[0]
numbers = re.search(r"(\d+)", line)
if numbers and len(numbers.groups()) < 10:
# Sometimes there are multiple numbers; include all of them as long as there
# aren't like 10
answers = numbers.groups()
# Also filter out `0` and `1` as they're probably mistakes
for answer in answers:
if answer not in ("0", "1"):
yield answer
else:
return []
def read_results(part: int) -> Counter:
c: Counter = Counter()
for n in range(1, 100):
f = Path(OUTPUTS_PATH) / f"part_{part}_{n}.txt"
if not f.exists():
continue
answer = parse_answer(f.read_text())
if answer:
c.update(answer)
return c
def submit_result(day, year, part, answer):
logger.info(f"Submitting {answer} for part {part}")
return subprocess.run(
f"aoc submit --day={day} --year={year} {part} {answer}",
shell=True,
capture_output=True,
check=True,
text=True,
).stdout
def run_parallel(
day: int,
year: int,
part: int,
stop_when_submitted: bool,
n_workers: int,
runs: int,
threshold: int = THRESHOLD,
):
print(f"Starting {n_workers} workers")
c: Counter = Counter()
submitted = False
func = partial(do_part, day=day, part=part)
with futures.ProcessPoolExecutor(max_workers=n_workers) as executor:
fs = []
# If we can't do it in 200 runs, we're probably not going to do it at all
for n in range(runs):
fs.append(executor.submit(func, n))
for f in futures.as_completed(fs):
result = f.result()
logger.debug(f"Got result {result}")
answer = parse_answer(result)
if answer:
c.update(answer)
if submitted:
print(
f"Already submitted but continuing collecting results. Current counts: {c}"
)
continue
# Only submit if the top answer is 2 or more above the next answer
top_results = c.most_common(2)
if len(top_results) >= 1:
top = top_results[0][1]
# Only one result, so use zero for next
next = top_results[1][1] if len(top_results) == 2 else 0
if top - next >= threshold:
print(f"Submitting {top_results[0][0]}. Counts were {c}")
out = submit_result(day, year, part, c.most_common(1)[0][0])
print(out)
submitted = True
if stop_when_submitted:
print(f"Now stopping on part {part}. Final results: {c}")
# We can't seem to cancel everything, so this hangs until all
# the running futures are complete. That's annoying when we want
# to start part 2 after submitting part 1. Looks not that easy
# to do
# https://stackoverflow.com/questions/29177490/how-do-you-kill-futures-once-they-have-started
executor.shutdown(wait=False, cancel_futures=True)
return
print(f"No answer hit the threshold, not submitting. Current counts: {c}")
print(f"Reached all attempts without success, stopping. Final results: {c}")
return
#def get_year():
# return int(
# subprocess.run("date +%Y", shell=True, capture_output=True, text=True).stdout
# )
def get_year():
return datetime.datetime.now().year
@click.command()
@click.option("--part", type=int)
@click.option("--day", type=int, required=True)
@click.option("--year", type=int, required=False, default=get_year())
@click.option("--n-workers", type=int, default=1)
@click.option("--runs", type=int, required=False, default=200)
@click.option("--stop-when-submitted", is_flag=True)
def run(
day: int, year: int, part: int, n_workers: int, runs: int, stop_when_submitted: bool
) -> None:
print(f"Running {part=}, {day=}, {year=}")
year_day_path(year, day).mkdir(exist_ok=True)
(year_day_path(year, day) / RESPONSES_PATH).mkdir(exist_ok=True)
(year_day_path(year, day) / OUTPUTS_PATH).mkdir(exist_ok=True)
(year_day_path(year, day) / REQUESTS_PATH).mkdir(exist_ok=True)
# Slightly hacky way to make the day path local; which for executing the python is
# important, becasue we don't want to confuse the AI by giving it a more complicated
# path than `input.txt`.
os.chdir(year_day_path(year, day))
write_instructions(day, year)
if part is None:
run_parallel(day, year, 1, stop_when_submitted, n_workers, runs)
run_parallel(day, year, 2, stop_when_submitted, n_workers, runs)
else:
if n_workers == 1:
do_part(0, day, part)
else:
run_parallel(day, year, part, stop_when_submitted, n_workers, runs)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, force=True)
run()