Spyrosigma
diff --git a/‎llmware/model_configs.py‎
Lines changed: 20 additions & 2 deletions b/‎llmware/model_configs.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎llmware/models.py‎
Lines changed: 5 additions & 1 deletion b/‎llmware/models.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎llmware/util.py‎
Lines changed: 69 additions & 0 deletions b/‎llmware/util.py‎
Lines changed: 69 additions & 0 deletions
@@ -1737,8 +1737,26 @@
                          "system_stop": "<|eot_id|>",
                          "main_start": "<|start_header_id|>user>|end_header_id|>\n",
                          "main_stop": "<|eot_id|>",
-                         "start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"}
-       }
+                         "start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"},
+
+        "tiny_llama_chat": {"system_start": "<|system|>", "system_stop": "</s>",
+                            "main_start": "<|user|>", "main_stop": "</s>",
+                            "start_llm_response": "<|assistant|>"},
+
+        "stablelm_zephyr_chat": {"system_start": "", "system_stop": "",
+                                 "main_start": "<|user|>", "main_stop": "<|endoftext|>\n",
+                                 "start_llm_response": "<|assistant|>"},
+
+        "google_gemma_chat": {"system_start": "", "system_stop": "",
+                              "main_start": "<bos><start_of_turn>user\n",
+                              "main_stop": "<end_of_turn>\n",
+                              "start_llm_response": "<start_of_turn>model"},
+
+        "vicuna_chat": {"system_start": "", "system_stop": "",
+                        "main_start": "USER: ", "main_stop": "",
+                        "start_llm_response": " ASSISTANT:"}
+
+}
 
 """ Global default prompt catalog consists of a set of prebuilt useful prompt instructions across a wide range
 of models.   Unlike prompt_wrappers, which tend to be an attribute of the model, the prompt catalog can be invoked
 
@@ -89,7 +89,8 @@ class _ModelRegistry:
     #   most fine-tuned models require a specific prompt wrapping that was used in the fine-tuning process
     #   we are treating these "prompt_wrappers" as core attributes of the model
     prompt_wrappers = ["alpaca", "human_bot", "chatgpt", "<INST>", "open_chat", "hf_chat", "chat_ml", "phi_3",
-                       "llama_3_chat"]
+                       "llama_3_chat","tiny_llama_chat","stablelm_zephyr_chat", "google_gemma_chat",
+                       "vicuna_chat"]
 
     registered_wrappers = global_model_finetuning_prompt_wrappers_lookup
 
@@ -175,8 +176,11 @@ def validate(cls, model_card_dict):
         if "model_family" not in model_card_dict:
             return False
 
+        #   removing this condition from validation - provides more extensibility in creating new model classes
+        """
         if model_card_dict["model_family"] not in cls.model_classes:
             return False
+        """
 
         if "prompt_wrapper" in model_card_dict:
 
 
@@ -748,6 +748,75 @@ def find_match(self, key_term, sentence):
 
         return matches_found
 
+    def locate_query_match(self,query, core_text):
+
+        """ Utility function to locate the character-level match of a query inside a core_text. """
+
+        matches_found = []
+
+        # edge case - but return empty match if query is null
+        if not query:
+            return matches_found
+
+        b = CorpTokenizer(one_letter_removal=False, remove_stop_words=False, remove_punctuation=False,
+                          remove_numbers=False)
+
+        query_tokens = b.tokenize(query)
+
+        for x in range(0, len(core_text)):
+            match = 0
+            for key_term in query_tokens:
+                if len(key_term) == 0:
+                    continue
+
+                if key_term.startswith('"'):
+                    key_term = key_term[1:-1]
+
+                if core_text[x].lower() == key_term[0].lower():
+                    match += 1
+                    if (x + len(key_term)) <= len(core_text):
+                        for y in range(1, len(key_term)):
+                            if key_term[y].lower() == core_text[x + y].lower():
+                                match += 1
+                            else:
+                                match = -1
+                                break
+
+                        if match == len(key_term):
+                            new_entry = [x, key_term]
+                            matches_found.append(new_entry)
+
+        return matches_found
+
+    def highlighter(self,matches, core_string, highlight_start_token="<b>",
+                    highlight_end_token="</b>"):
+
+        """ Utility function to 'highlight' a selected token, based on matches, typically found
+        in locate_query_match function - useful for visual display of a matching keyword. """
+
+        # assumes by default:
+        #   highlight_start_token = "<b>"
+        #   highlight_end_token = "</b>"
+
+        updated_string = ""
+        cursor_position = 0
+
+        for mat in matches:
+            starter = mat[0]
+            keyword = mat[1]
+
+            updated_string += core_string[cursor_position:starter]
+            updated_string += highlight_start_token
+            updated_string += keyword
+            updated_string += highlight_end_token
+
+            cursor_position = starter + len(keyword)
+
+        if cursor_position < len(core_string):
+            updated_string += core_string[cursor_position:]
+
+        return updated_string
+
     def package_answer(self, raw_query, text_core, answer_window, x):
 
         """ Takes a raw_query, text and answer_window as input and returns a context window around matches