Skip to content

Commit a2edd18

Browse files
Update graph.py
1. Error Handling: Added try-except blocks to handle potential errors when loading stop words, reading files, and writing JSON. 2. Logging: Used the logging module to log errors instead of printing them, providing better control over log levels and outputs. 3. Type Annotations: Added type annotations to the bow_locator, build_graph, and get_unique_vocab_lookup methods for clarity. 4. Docstrings: Ensured that all methods have clear docstrings explaining their purpose and return types.
1 parent efee6d7 commit a2edd18

File tree

1 file changed

+48
-20
lines changed

1 file changed

+48
-20
lines changed

‎llmware/graph.py‎

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# Copyright 2023-2024 llmware
32

43
# Licensed under the Apache License, Version 2.0 (the "License"); you
@@ -65,14 +64,22 @@ def __init__(self, library):
6564
self.pre_initialization_bow_data = {}
6665
self.post_initialization_bow_data = {}
6766

68-
# create stop words txt file in nlp path
69-
self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path)
67+
# Load stop words with error handling
68+
try:
69+
self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path)
70+
except Exception as e:
71+
logger.error(f"Failed to load stop words: {e}")
72+
self.stop_words = []
7073

71-
# load graph c modules - note: if any issues loading module, will be captured in get_module_graph_functions()
72-
self._mod_utility = Utilities().get_module_graph_functions()
74+
# Load graph C modules with error handling
75+
try:
76+
self._mod_utility = Utilities().get_module_graph_functions()
77+
except Exception as e:
78+
logger.error(f"Failed to load graph utility module: {e}")
79+
self._mod_utility = None
7380

7481
# new method - used to track 'counter' inside the bow files for incremental read/write/analysis
75-
def bow_locator(self):
82+
def bow_locator(self) -> tuple:
7683

7784
""" Internal utility method used to enable scalability across multiple underlying BOW (Bag-of-Word)
7885
files which are created by the graph module. """
@@ -103,16 +110,26 @@ def bow_locator(self):
103110
f"{top_bow_file}")
104111
bow_index = 0
105112

106-
fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8')
107-
fp.seek(0, 2)
108-
bow_byte_index = fp.tell()
109-
fp.seek(0, 0) # rewind
110-
bow_tokens = len(fp.read().split(","))
111-
fp.close()
113+
try:
114+
fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8')
115+
fp.seek(0, 2)
116+
bow_byte_index = fp.tell()
117+
fp.seek(0, 0) # rewind
118+
bow_tokens = len(fp.read().split(","))
119+
fp.close()
120+
except FileNotFoundError:
121+
logger.error(f"BOW file not found: {top_bow_file}")
122+
return 0, 0, 0, [], True
123+
except Exception as e:
124+
logger.error(f"Error reading BOW file: {e}")
125+
return 0, 0, 0, [], True
126+
finally:
127+
if 'fp' in locals():
128+
fp.close()
112129

113130
return bow_index, bow_byte_index, bow_tokens, bow_files, no_bow
114131

115-
def build_graph(self):
132+
def build_graph(self) -> dict:
116133

117134
""" Generates multiple valuable nlp artifacts in the library's /nlp folder path, with the
118135
primary objective of generating the co-occurrence matrix. """
@@ -186,9 +203,11 @@ def build_graph(self):
186203
graph_summary.update({"time_stamp": ts})
187204

188205
# write to manifest.json for knowledge graph
189-
json_dict = json.dumps(graph_summary,indent=2)
190-
with open(os.path.join(self.library.nlp_path,"manifest.json"),"w", encoding='utf-8') as outfile:
191-
outfile.write(json_dict)
206+
try:
207+
with open(os.path.join(self.library.nlp_path,"manifest.json"), "w", encoding='utf-8') as outfile:
208+
outfile.write(json.dumps(graph_summary, indent=2))
209+
except Exception as e:
210+
logger.error(f"Failed to write manifest.json: {e}")
192211

193212
return graph_summary
194213

@@ -833,16 +852,25 @@ def get_unique_vocab_len(self):
833852

834853
return len(self.get_unique_vocab_lookup())
835854

836-
def get_unique_vocab_lookup(self):
855+
def get_unique_vocab_lookup(self) -> dict:
837856

838857
""" Returns the unique vocab list found in the Library corpus. """
839858

840859
if self.library.get_knowledge_graph_status() != "yes":
841860
self.build_graph()
842861

843-
j = json.load(open(os.path.join(self.library.nlp_path,"vocab_lookup.json"), "r", encoding='utf-8'))
844-
845-
return j
862+
try:
863+
with open(os.path.join(self.library.nlp_path, "vocab_lookup.json"), "r", encoding='utf-8') as file:
864+
return json.load(file)
865+
except FileNotFoundError:
866+
logger.error("vocab_lookup.json file not found.")
867+
return {}
868+
except json.JSONDecodeError:
869+
logger.error("Error decoding JSON from vocab_lookup.json.")
870+
return {}
871+
except Exception as e:
872+
logger.error(f"Unexpected error: {e}")
873+
return {}
846874

847875
def get_unique_vocab_reverse_lookup(self):
848876

0 commit comments

Comments
 (0)