Skip to content

Commit e2ed131

Browse files
authored
Merge pull request llmware-ai#869 from llmware-ai/update-prompt-fact-check
update prompt fact checking
2 parents e7b7192 + b6c11c7 commit e2ed131

File tree

1 file changed

+20
-22
lines changed

1 file changed

+20
-22
lines changed

‎llmware/prompts.py‎

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class Prompt:
5757
The name of the llm to be used.
5858
5959
tokenizer : object, default=None
60-
The tokenzier to use. The default is to use the tokenizer specified by the ``Utilities`` class.
60+
The tokenizer to use. The default is to use the tokenizer specified by the ``Utilities`` class.
6161
6262
model_card : dict, default=None
6363
A dictionary describing the model to be used. If the dictionary contains the key ``model_name``,
@@ -68,7 +68,7 @@ class Prompt:
6868
A ``Library`` object.
6969
7070
account_name : str, default="llmware"
71-
The name of the account to be used. This is one of the states a the prompt.
71+
The name of the account to be used. This is one of the attributes of the prompt.
7272
7373
prompt_id : int, default=None
7474
The ID of the prompt. If a prompt ID is given, then the state of this prompt is loaded. Otherwise, a
@@ -100,17 +100,8 @@ class Prompt:
100100
Sets whether the large language model should follow instructions. Note that this has an effect
101101
if and only if the model specified has a version that is trained to follow instructions.
102102
103-
Examples
104-
----------
105-
>>> import os
106-
>>> from llmware.prompts import Prompt
107-
>>> openai_api_key = os.environ.get("OPENAI_API_KEY", "")
108-
>>> prompter = Prompt(llm_name='gpt-4', llm_api_key=openai_api_key)
109-
>>> prompt = 'How old is my brother?'
110-
>>> context = 'My brother is 20 years old and my sister is 1.5 times older'
111-
>>> response = prompter.prompt_main(prompt=prompt, context=context)
112-
>>> response['llm_response']
113103
"""
104+
114105
def __init__(self, llm_name=None, tokenizer=None, model_card=None, library=None, account_name="llmware",
115106
prompt_id=None, save_state=True, llm_api_key=None, llm_model=None, from_hf=False,
116107
prompt_catalog=None, temperature=0.3, prompt_wrapper="human_bot", instruction_following=False):
@@ -1667,9 +1658,9 @@ class QualityCheck:
16671658
>>> library = Library().create_new_library('prompt_with_sources')
16681659
>>> sample_files_path = Setup().load_sample_files(over_write=False)
16691660
>>> parsing_output = library.add_files(os.path.join(sample_files_path, "Agreements"))
1670-
>>> prompt = Prompt().load_model('llmware/bling-1b-0.1')
1671-
>>> prompt.add_source_document(os.path.join(sample_files_path, "Agreements"), 'Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf')
1672-
>>> result = prompt.prompt_with_source(prompt='What is the base salery amount?', prompt_name='default_with_context')
1661+
>>> prompter = Prompt().load_model('llmware/bling-1b-0.1')
1662+
>>> prompter.add_source_document(os.path.join(sample_files_path, "Agreements"), 'Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf')
1663+
>>> result = prompter.prompt_with_source(prompt='What is the base salery amount?', prompt_name='default_with_context')
16731664
>>> result[0]['llm_response']
16741665
' $1,000,000.00'
16751666
>>> ev_numbers = prompter.evidence_check_numbers(result)
@@ -1752,7 +1743,6 @@ def fact_checker_numbers (self, response_dict):
17521743
ai_gen_output = response_dict["llm_response"]
17531744
evidence = response_dict["evidence"]
17541745
evidence_metadata = response_dict["evidence_metadata"]
1755-
add_markup= False
17561746

17571747
# looks for numbers only right now
17581748
llm_response_markup = ""
@@ -1768,6 +1758,7 @@ def fact_checker_numbers (self, response_dict):
17681758
tokens = ai_gen_output.split(" ")
17691759
percent_on = -1
17701760
char_counter = 0
1761+
17711762
for i, tok in enumerate(tokens):
17721763

17731764
tok_len = len(tok)
@@ -1832,7 +1823,7 @@ def fact_checker_numbers (self, response_dict):
18321823
if tok.endswith("\n"):
18331824
tok = tok[:-1]
18341825

1835-
current_str_token = tok
1826+
# current_str_token = tok
18361827

18371828
if tok.endswith(",") or tok.endswith(".") or tok.endswith("-") or tok.endswith(";") or \
18381829
tok.endswith(")") or tok.endswith("]"):
@@ -1847,6 +1838,9 @@ def fact_checker_numbers (self, response_dict):
18471838

18481839
tok = re.sub("[,-]","",tok)
18491840

1841+
# current_str_token set to the 'cleaned' tok
1842+
current_str_token = tok
1843+
18501844
if Utilities().isfloat(tok):
18511845
tok = float(tok)
18521846
if percent_on == 1:
@@ -1855,6 +1849,7 @@ def fact_checker_numbers (self, response_dict):
18551849
percent_on = -1
18561850

18571851
if tok == ai_numbers[x]:
1852+
18581853
match_token = i
18591854

18601855
if i > 10:
@@ -1911,7 +1906,9 @@ def fact_checker_numbers (self, response_dict):
19111906
evidence_char_counter += tok_len + 1
19121907

19131908
if match_tmp == -1:
1914-
new_fact_check_entry = {"fact": current_str_token,
1909+
1910+
# change here - replace 'current_str_token'
1911+
new_fact_check_entry = {"fact": str(ai_numbers[x]),
19151912
"status": "Not Confirmed",
19161913
"text": "",
19171914
"page_num": "",
@@ -1922,6 +1919,11 @@ def fact_checker_numbers (self, response_dict):
19221919

19231920
# provide markup highlighting confirmations and non-confirmations
19241921
confirm_updates = []
1922+
1923+
# add_markup feature turned to OFF by default
1924+
# -- may be reworked or deleted in future releases
1925+
add_markup = False
1926+
19251927
if add_markup:
19261928
for i,f in enumerate(fact_check):
19271929

@@ -1973,10 +1975,6 @@ def source_reviewer (self, response_dict):
19731975
evidence_metadata = response_dict["evidence_metadata"]
19741976
add_markup = False
19751977

1976-
# insert test starts here
1977-
# text_snippet_dict = self._evidence_token_matcher(ai_tmp_output, evidence_batch)
1978-
# end - insert test here
1979-
19801978
min_th = 0.25
19811979
conclusive_th = 0.75
19821980
min_match_count = 3

0 commit comments

Comments
 (0)