Skip to content

Commit 2202589

Browse files
authored
Merge pull request llmware-ai#474 from llmware-ai/windows-cuda-support
update windows cuda config options
2 parents 483e600 + fbda27f commit 2202589

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

‎llmware/gguf_configs.py‎

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,9 @@ def add_ctypes_declarations (_lib):
238238
llama_new_context_with_model.argtypes = [llama_model_p_ctypes, llama_context_params]
239239
llama_new_context_with_model.restype = llama_context_p_ctypes
240240

241-
llama_numa_init = _lib.llama_numa_init
242-
llama_numa_init.argtypes = [ctypes.c_int]
243-
llama_numa_init.restype = None
241+
# llama_numa_init = _lib.llama_numa_init
242+
# llama_numa_init.argtypes = [ctypes.c_int]
243+
# llama_numa_init.restype = None
244244

245245
llama_free = _lib.llama_free
246246
llama_free.argtypes = [llama_context_p_ctypes]
@@ -810,6 +810,9 @@ class GGUFConfigs:
810810
# --Linux / Windows - checks for cuda availability
811811
"use_gpu": True,
812812

813+
# note this will be used on Windows and Linux, but not Mac
814+
"n_gpu_layers": 50,
815+
813816
"backend_initialized": False,
814817

815818
"max_output_tokens": 256,

‎llmware/models.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4683,6 +4683,12 @@ def load_model_for_inference(self, model_repo_path, model_card = None):
46834683
self.model_params.use_mmap = True
46844684
self.model_params.use_mlock = False
46854685

4686+
if self.use_gpu:
4687+
# on darwin, keep at 0 - on win32 and linux - set to 50 by default (e.g., shift all model layers to GPU)
4688+
if sys.platform.lower() == "win32" or sys.platform.lower().startswith("linux"):
4689+
4690+
self.model_params.n_gpu_layers = GGUFConfigs().get_config("n_gpu_layers")
4691+
46864692
# update context parameters
46874693
self.context_params = self._lib.llama_context_default_params()
46884694
self.context_params.n_ctx = 2048

0 commit comments

Comments
 (0)