unslothai
diff --git a/‎unsloth_zoo/__init__.py‎
Lines changed: 13 additions & 1 deletion b/‎unsloth_zoo/__init__.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎unsloth_zoo/compiler.py‎
Lines changed: 12 additions & 1 deletion b/‎unsloth_zoo/compiler.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎unsloth_zoo/empty_model.py‎
Lines changed: 30 additions & 18 deletions b/‎unsloth_zoo/empty_model.py‎
Lines changed: 30 additions & 18 deletions
diff --git a/‎unsloth_zoo/fused_losses/cross_entropy_loss.py‎
Lines changed: 1 addition & 1 deletion b/‎unsloth_zoo/fused_losses/cross_entropy_loss.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎unsloth_zoo/rl_replacements.py‎
Lines changed: 1 addition & 1 deletion b/‎unsloth_zoo/rl_replacements.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎unsloth_zoo/temporary_patches/gpt_oss.py‎
Lines changed: 1 addition & 1 deletion b/‎unsloth_zoo/temporary_patches/gpt_oss.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎unsloth_zoo/vllm_utils.py‎
Lines changed: 46 additions & 3 deletions b/‎unsloth_zoo/vllm_utils.py‎
Lines changed: 46 additions & 3 deletions
@@ -14,9 +14,10 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-__version__ = "2025.10.7"
+__version__ = "2025.10.8"
 
 import os
+import warnings
 # Hugging Face Hub faster downloads
 if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
     os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -101,3 +102,14 @@
     execute_with_time_limit,
     Benchmarker,
 )
+
+# Top some pydantic warnings
+try:
+    # pydantic/_internal/_generate_schema.py:2249: UnsupportedFieldAttributeWarning: The 'frozen' attribute with value True
+    # was provided to the `Field()` function, which has no effect in the context it was used.
+    # 'frozen' is field-specific metadata, and can only be attached to a model field using `Annotated` metadata or by assignment.
+    # This may have happened because an `Annotated` type alias using the `type` statement was used, or if the `Field()` function was attached to a single member of a union type.
+    from pydantic.warnings import UnsupportedFieldAttributeWarning
+    warnings.filterwarnings(action = "ignore", category = UnsupportedFieldAttributeWarning)
+except:
+    pass
@@ -2035,7 +2035,14 @@ def unsloth_compile_transformers(
     except ModuleNotFoundError:
         return
     modeling_file = eval(model_location)
-    if hasattr(modeling_file, "__UNSLOTH_PATCHED__"): return
+    if hasattr(modeling_file, "__UNSLOTH_PATCHED__"):
+        # Get __UNSLOTH_SUPPORTS_SDPA__
+        if hasattr(modeling_file, "__UNSLOTH_SUPPORTS_SDPA__"):
+            if supports_sdpa is not None:
+                assert(type(supports_sdpa) is list and len(supports_sdpa) == 1)
+                supports_sdpa[0] = modeling_file.__UNSLOTH_SUPPORTS_SDPA__
+        return
+    pass
 
     # Use transformers model_type logger to suppress message: Remove `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`
     exec("model_logger.addFilter(HideLoggingMessage('`use_cache`'))", globals(), locals())
@@ -2189,6 +2196,7 @@ def replaced_tqdm(*args, **kwargs):
     torch_modules = [x for x in torch_modules if x not in removal]
 
     # Check SDPA to load as eager or SDPA (Pixtral / Mistral 3 for eg doesn't have SDPA)
+    final_supports_sdpa = True
     if supports_sdpa is not None:
         assert(type(supports_sdpa) is list and len(supports_sdpa) == 1)
         if ("_supports_sdpa = True" in full_source) and ("_supports_sdpa = False" not in full_source):
@@ -2197,7 +2205,10 @@ def replaced_tqdm(*args, **kwargs):
             if supports_sdpa[0] != False: supports_sdpa[0] = True
         else:
             supports_sdpa[0] = False
+            final_supports_sdpa = False
     pass
+    # Save supports_sdpa to solve secondary imports
+    modeling_file.__UNSLOTH_SUPPORTS_SDPA__ = final_supports_sdpa
 
     # Get functions which are called
     called_functions = []
 
@@ -223,33 +223,45 @@ def copy_attributes(original_model, new_model):
         if dict_skipped_count > 0:
             print(f"📋 Skipped {dict_skipped_count} non-config dictionaries")
         if skipped_count > 0:
-            print(f"⏭️  Skipped {skipped_count} total attributes (tensors, modules, non-config dicts, etc.)")
+            print(f"⏭️ Skipped {skipped_count} total attributes (tensors, modules, non-config dicts, etc.)")
             if skipped_count <= 10:
-                print(f"   Skipped: {skipped_attrs}")
+                print(f"    Skipped: {skipped_attrs}")
             else:
-                print(f"   Sample: {skipped_attrs[:5]}... and {skipped_count-5} more")
+                print(f"    Sample: {skipped_attrs[:5]}... and {skipped_count-5} more")
+pass
 
 
 @torch.inference_mode()
 def create_empty_causal_lm(config, dtype = torch.float16):
     # All Unsloth Zoo code licensed under LGPLv3
     from transformers import AutoModelForCausalLM
-    try:
-        from accelerate import init_empty_weights
-        # Suppress warning on uninited weights
-        old_warn = os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1")
-        os.environ["UNSLOTH_WARN_UNINITIALIZED"] = "0"
-        with init_empty_weights():
-            model_name = getattr(config, 'model_name')
-            kwargs = {"torch_dtype" if HAS_TORCH_DTYPE else "dtype" : dtype_from_config(config)}
-            if model_name is not None:
-                # This would persist quantization information.
+    from accelerate import init_empty_weights
+    # Suppress warning on uninited weights
+    old_warn = os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1")
+    os.environ["UNSLOTH_WARN_UNINITIALIZED"] = "0"
+    model_name = getattr(config, 'model_name')
+    kwargs = {"torch_dtype" if HAS_TORCH_DTYPE else "dtype" : dtype_from_config(config)}
+    original_meta_model = None
+    error = None
+    with init_empty_weights(include_buffers = True):
+        if model_name is not None:
+            try:
+                # This would persist quantization information for FP8 weights
                 original_meta_model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
-            else:
+            except Exception as e:
+                error = str(e)
+                original_meta_model = None
+        if original_meta_model is None:
+            try:
+                # We must do this for 4.57.0 and above
                 original_meta_model = AutoModelForCausalLM.from_config(config)
-        # Suppress warning on uninited weights
-        os.environ["UNSLOTH_WARN_UNINITIALIZED"] = old_warn
-    except Exception as e:
+            except Exception as e:
+                error = str(e)
+                original_meta_model = None
+    pass
+    # Suppress warning on uninited weights
+    os.environ["UNSLOTH_WARN_UNINITIALIZED"] = old_warn
+    if error is not None and original_meta_model is None:
         print(f"Failed to create original_meta_model for AutoModelForCausalLM. Error {e}")
         original_meta_model = None
 
@@ -302,7 +314,7 @@ def _init_weights(self, module):
     try:
         # Use accelerate's init_empty_weights, not transformers.modeling_utils
         from accelerate import init_empty_weights
-        with init_empty_weights():
+        with init_empty_weights(include_buffers = True):
             original_meta_model = model_cls(config)
     except Exception as e:
         print(f"Failed to create original_meta_model for {model_cls.__name__}. Error {e}")
 
@@ -26,7 +26,7 @@
 import functools
 import math
 from ..temporary_patches.common import UNSLOTH_ENABLE_LOGGING, torch_compile_options, logger
-from unsloth import DEVICE_TYPE
+from ..device_type import DEVICE_TYPE
 
 @functools.cache
 def _get_mapping(autograd):
 
@@ -23,7 +23,7 @@
 import os
 import numpy as np
 from typing import Union, Callable, Optional, List, Dict
-from unsloth import DEVICE_TYPE
+from .device_type import DEVICE_TYPE
 from .temporary_patches.common import torch_compile_options
 RL_REPLACEMENTS = dict()
 
 
@@ -535,7 +535,7 @@ def forward(self, hidden_states):
 
 
 # Combo kernels uses too much VRAM for low memory GPUs
-from unsloth import DEVICE_TYPE
+from ..device_type import DEVICE_TYPE
 if DEVICE_TYPE == "xpu":
     device_memory = torch.xpu.memory.mem_get_info(0)[-1]
 else:
 
@@ -39,6 +39,8 @@
 import math
 import gc
 import os
+import ast
+import sys
 import torch
 import json
 import psutil
@@ -59,8 +61,7 @@
     UNSLOTH_ENABLE_LOGGING,
 )
 from .log import logger
-from unsloth import DEVICE_TYPE
-from unsloth.models.vision import VLLM_SUPPORTED_VLM
+from .device_type import DEVICE_TYPE
 global LORA_REQUEST_ID
 
 # Ignore logging messages
@@ -2360,9 +2361,50 @@ def _test_is_same_vlm(model, new_model, processor, test_backward=False):
                 mismatches.append(layer_name)
         print(f"Backward gradient statistics match for {len(matches)} layers: {matches}")
         print(f"Backward gradient statistics mismatch for {len(mismatches)} layers: {mismatches}")
+pass
 
 
-    pass
+def _read_unsloth_vision_source() -> str:
+    _VISION_TAIL = ("unsloth", "models", "vision.py")
+    from importlib.metadata import files, PackageNotFoundError, PackagePath
+    from pathlib import Path
+    # 1) Via installed distribution metadata (no import of the package)
+    try:
+        for entry in files("unsloth") or ():
+            if isinstance(entry, PackagePath):
+                parts = entry.parts
+                if len(parts) >= 3 and tuple(parts[-3:]) == _VISION_TAIL:
+                    return entry.read_text(encoding = "utf-8")
+    except PackageNotFoundError:
+        pass
+
+    # 2) Fallback: scan sys.path for a plain file
+    for base in map(Path, sys.path):
+        candidate = base.joinpath(*_VISION_TAIL)
+        if candidate.is_file():
+            return candidate.read_text(encoding = "utf-8")
+    raise FileNotFoundError("Could not locate unsloth/models/vision.py without importing it")
+pass
+
+
+def get_vllm_supported_vlm(_VAR_NAME = "VLLM_SUPPORTED_VLM"):
+    """
+    Parse VLLM_SUPPORTED_VLM from unsloth/models/vision.py as a literal.
+    """
+    src = _read_unsloth_vision_source()
+    tree = ast.parse(src)
+
+    # Support: `VLLM_SUPPORTED_VLM = [...]` and `VLLM_SUPPORTED_VLM: list[str] = [...]`
+    for node in tree.body:
+        if isinstance(node, ast.Assign):
+            if any(getattr(t, "id", None) == _VAR_NAME for t in node.targets):
+                return ast.literal_eval(node.value)
+        elif isinstance(node, ast.AnnAssign):
+            if getattr(node.target, "id", None) == _VAR_NAME:
+                return ast.literal_eval(node.value)
+    raise ValueError(f"{_VAR_NAME} not found as a literal in unsloth/models/vision.py")
+pass
+
 
 @torch.inference_mode
 def _test_get_vllm_state_dict(
@@ -2419,6 +2461,7 @@ def _test_get_vllm_state_dict(
     if not is_vision_model:
         model_class = AutoModelForCausalLM
     else:
+        VLLM_SUPPORTED_VLM = get_vllm_supported_vlm()
         if model_type in VLLM_SUPPORTED_VLM:
             import transformers
             model_class = getattr(transformers, config.architectures[0])