Skip to content

Commit 8c8f0ca

Browse files
DARREN OBERSTDARREN OBERST
authored andcommitted
updating gguf context window param
1 parent b158e42 commit 8c8f0ca

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

‎llmware/model_configs.py‎

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@
356356
# create dragon models
357357
{"model_name": "llmware/dragon-yi-6b-v0", "display_name": "dragon-yi-6b",
358358
"model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
359-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
359+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
360360
"temperature": 0.3, "trailing_space": "\n", "link": "https://huggingface.co/llmware/dragon-yi-6b-v0",
361361
"custom_model_files": [], "custom_model_repo": "",
362362
"hf_repo": "llmware/dragon-yi-6b-v0"},
@@ -370,7 +370,7 @@
370370

371371
{"model_name": "llmware/dragon-mistral-7b-v0", "display_name": "dragon-mistral-7b",
372372
"model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
373-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
373+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
374374
"temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-mistral-7b-v0",
375375
"custom_model_files": [], "custom_model_repo": "",
376376
"hf_repo": "llmware/dragon-mistral-7b-v0"},
@@ -434,7 +434,7 @@
434434
# deprecated access to dragon-mistral-7b-gguf -> replaced by dragon-mistral-answer-tool
435435
{"model_name": "llmware/dragon-mistral-7b-gguf", "display_name": "dragon-mistral-7b-gguf",
436436
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
437-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
437+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
438438
"fetch": {"module": "llmware.models", "method": "pull_model_from_hf"},
439439
"validation_files": ["dragon-mistral-7b-q4_k_m.gguf"],
440440
"temperature": 0.3, "trailing_space": "",
@@ -458,7 +458,7 @@
458458
# deprecated access to dragon-yi-6b-gguf -> replaced by dragon-yi-answer-tool
459459
{"model_name": "llmware/dragon-yi-6b-gguf", "display_name": "dragon-yi-6b-gguf",
460460
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
461-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
461+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
462462
"temperature": 0.3, "trailing_space": "\n",
463463
"gguf_file": "dragon-yi-6b-q4_k_m.gguf",
464464
"gguf_repo": "llmware/dragon-yi-6b-v0",
@@ -469,7 +469,7 @@
469469

470470
{"model_name": "dragon-yi-answer-tool", "display_name": "dragon-yi-6b-answer-tool",
471471
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
472-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
472+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
473473
"temperature": 0.3, "trailing_space": "\n",
474474
"gguf_file": "dragon-yi.gguf",
475475
"gguf_repo": "llmware/dragon-yi-answer-tool",
@@ -491,7 +491,7 @@
491491

492492
{"model_name": "dragon-mistral-answer-tool", "display_name": "dragon-mistral-answer-tool",
493493
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
494-
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
494+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
495495
"temperature": 0.3, "trailing_space": "",
496496
"gguf_file": "dragon-mistral.gguf",
497497
"gguf_repo": "llmware/dragon-mistral-answer-tool",
@@ -514,7 +514,7 @@
514514

515515
{"model_name": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", "display_name": "openhermes-mistral-7b-gguf",
516516
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
517-
"context_window": 2048, "instruction_following": True, "prompt_wrapper": "chat_ml",
517+
"context_window": 4096, "instruction_following": True, "prompt_wrapper": "chat_ml",
518518
"temperature": 0.3, "trailing_space": "",
519519
"gguf_file": "openhermes-2.5-mistral-7b.Q4_K_M.gguf",
520520
"gguf_repo": "llmware/bonchon",
@@ -525,7 +525,7 @@
525525

526526
{"model_name": "TheBloke/zephyr-7B-beta-GGUF", "display_name": "zephyr-7b-gguf",
527527
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
528-
"context_window": 2048, "instruction_following": True, "prompt_wrapper": "hf_chat",
528+
"context_window": 4096, "instruction_following": True, "prompt_wrapper": "hf_chat",
529529
"temperature": 0.3, "trailing_space": "",
530530
"gguf_file": "zephyr-7b-beta.Q4_K_M.gguf",
531531
"gguf_repo": "llmware/bonchon",
@@ -536,7 +536,7 @@
536536

537537
{"model_name": "TheBloke/Starling-LM-7B-alpha-GGUF", "display_name": "starling-7b-gguf",
538538
"model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
539-
"context_window": 2048, "instruction_following": True, "prompt_wrapper": "open_chat",
539+
"context_window": 8192, "instruction_following": True, "prompt_wrapper": "open_chat",
540540
"temperature": 0.3, "trailing_space": "",
541541
"gguf_file": "starling-lm-7b-alpha.Q4_K_M.gguf",
542542
"gguf_repo": "llmware/bonchon",
@@ -546,7 +546,7 @@
546546
"custom_model_files": [], "custom_model_repo": ""},
547547

548548
{"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf", "display_name": "phi-3-gguf", "model_family": "GGUFGenerativeModel",
549-
"model_category": "generative_local", "model_location": "llmware_repo", "context_windows": 4096,
549+
"model_category": "generative_local", "model_location": "llmware_repo", "context_window": 4096,
550550
"instruction_following": False, "prompt_wrapper": "phi_3", "temperature": 0.3, "trailing_space": "",
551551
"gguf_file": "Phi-3-mini-4k-instruct-q4.gguf",
552552
"gguf_repo": "microsoft/Phi-3-mini-4k-instruct-gguf",

‎llmware/models.py‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6610,7 +6610,10 @@ def load_model_for_inference(self, model_repo_path, model_card = None, **kwargs)
66106610

66116611
# update context parameters
66126612
self.context_params = self._lib.llama_context_default_params()
6613-
self.context_params.n_ctx = 2048
6613+
6614+
# sets minimum of 2048, but will extend if context_window is larger (e.g., 4096/8192+)
6615+
self.context_params.n_ctx = max(2048, self.max_total_len)
6616+
66146617
self.context_params.n_batch = self.n_batch
66156618

66166619
if model_card:
@@ -6767,7 +6770,8 @@ def _load_llama_cpp_shared_library(self):
67676770
if fall_back_option:
67686771
try:
67696772

6770-
logger.warning("update: Not successful loading CUDA lib, so reverting to CPU driver.")
6773+
logger.warning("update: Not successful loading GPU-accelerated lib, "
6774+
"so reverting to CPU driver.")
67716775

67726776
return ctypes.CDLL(str(fall_back_option), **cdll_args)
67736777
except:

0 commit comments

Comments
 (0)