Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
728 commits
Select commit Hold shift + click to select a range
88b680a
Update gpt_oss.py
danielhanchen Sep 23, 2025
912e836
Update gpt_oss.py
danielhanchen Sep 23, 2025
69ae337
Update gpt_oss.py
danielhanchen Sep 23, 2025
17b78be
Update gpt_oss.py
danielhanchen Sep 23, 2025
b54f658
Update gpt_oss.py
danielhanchen Sep 23, 2025
36769ed
Update gpt_oss.py
danielhanchen Sep 23, 2025
5c5909f
Update gpt_oss.py
danielhanchen Sep 23, 2025
2f9fee2
Update gpt_oss.py
danielhanchen Sep 23, 2025
fb3ce53
Update gpt_oss.py
danielhanchen Sep 23, 2025
9425aa2
Update gpt_oss.py
danielhanchen Sep 23, 2025
8895518
Update __init__.py
danielhanchen Sep 23, 2025
14372e9
Update gpt_oss.py
danielhanchen Sep 23, 2025
fbc394b
Update gpt_oss.py
danielhanchen Sep 23, 2025
0dce904
Update gpt_oss.py
danielhanchen Sep 23, 2025
731b413
Update compiler.py
danielhanchen Sep 23, 2025
c492b70
Update gpt_oss.py
danielhanchen Sep 23, 2025
de35ca0
Update gpt_oss.py
danielhanchen Sep 23, 2025
0c3a5b1
Update gpt_oss.py
danielhanchen Sep 23, 2025
0b67494
Update gpt_oss.py
danielhanchen Sep 23, 2025
c3e6724
Dannightly (#304)
danielhanchen Sep 23, 2025
b2e590b
Update gpt_oss.py
danielhanchen Sep 24, 2025
6bffc3e
Update gpt_oss.py
danielhanchen Sep 24, 2025
a147bc8
Update gpt_oss.py
danielhanchen Sep 24, 2025
b0c88f8
Update gpt_oss.py
danielhanchen Sep 24, 2025
4ab2cfe
Update gpt_oss.py
danielhanchen Sep 24, 2025
4205b56
Update gpt_oss.py
danielhanchen Sep 24, 2025
98806b7
Update gpt_oss.py
danielhanchen Sep 24, 2025
15a9f88
Update gpt_oss.py
danielhanchen Sep 24, 2025
1ce4f9b
Update gpt_oss.py
danielhanchen Sep 24, 2025
976fcb4
Update gpt_oss.py
danielhanchen Sep 24, 2025
5b05f0d
Update gpt_oss.py
danielhanchen Sep 24, 2025
4bc3d22
Fix Flex Attention autotuning
danielhanchen Sep 24, 2025
0aaa499
Update patching_utils.py
danielhanchen Sep 24, 2025
5b27681
Update patching_utils.py
danielhanchen Sep 24, 2025
ebeb9f8
Update patching_utils.py
danielhanchen Sep 24, 2025
c2c473b
Update mxfp4.py
danielhanchen Sep 24, 2025
ed65c01
Update mxfp4.py
danielhanchen Sep 24, 2025
272a5ea
Update gpt_oss.py
danielhanchen Sep 24, 2025
95772ec
Update attention_sink.py
danielhanchen Sep 24, 2025
bacfc4d
Update patching_utils.py
danielhanchen Sep 24, 2025
8a89348
Update attention_sink.py
danielhanchen Sep 24, 2025
6d9b66b
Update gpt_oss.py
danielhanchen Sep 24, 2025
226c866
prefer_nd_tiling
danielhanchen Sep 24, 2025
2cebcc9
Update patching_utils.py
danielhanchen Sep 24, 2025
f3c5e1f
flex_attention_with_sink
danielhanchen Sep 24, 2025
e485dfc
Compile Flex Attention
danielhanchen Sep 24, 2025
6bd3f70
Update mxfp4.py
danielhanchen Sep 24, 2025
2393dfb
Update mxfp4.py
danielhanchen Sep 24, 2025
44356be
Update mxfp4.py
danielhanchen Sep 24, 2025
d5eebbc
Update mxfp4.py
danielhanchen Sep 24, 2025
93b5b88
Update gpt_oss.py
danielhanchen Sep 24, 2025
5e43a2a
bitsandbytes patch
danielhanchen Sep 24, 2025
2f0acb1
Update bitsandbytes.py
danielhanchen Sep 24, 2025
ebaf9b3
Update gpt_oss.py
danielhanchen Sep 24, 2025
2db0323
Inplace ops
danielhanchen Sep 24, 2025
031d21a
Update gpt_oss.py
danielhanchen Sep 24, 2025
61bb5aa
has_static_cache
danielhanchen Sep 24, 2025
267ab06
Update gpt_oss.py
danielhanchen Sep 24, 2025
bf825b1
Update gpt_oss.py
danielhanchen Sep 24, 2025
bf65ea5
Update gpt_oss.py
danielhanchen Sep 24, 2025
5c22d92
Update gpt_oss.py
danielhanchen Sep 24, 2025
274f7be
Update attention_sink.py
danielhanchen Sep 24, 2025
b2ec9f6
Update gpt_oss.py
danielhanchen Sep 24, 2025
ed572b1
Update gpt_oss.py
danielhanchen Sep 24, 2025
0bdaf45
Update gpt_oss.py
danielhanchen Sep 24, 2025
9fdf256
Update gpt_oss.py
danielhanchen Sep 24, 2025
56f7a73
Update gpt_oss.py
danielhanchen Sep 24, 2025
0c5437e
Update attention_sink.py
danielhanchen Sep 24, 2025
619c462
Update attention_sink.py
danielhanchen Sep 24, 2025
5d87949
Update rl_replacements.py
danielhanchen Sep 24, 2025
7ba642a
Update rl_replacements.py
danielhanchen Sep 24, 2025
040c6f2
Update rl_replacements.py
danielhanchen Sep 24, 2025
96798d8
Update gpt_oss.py
danielhanchen Sep 24, 2025
138f9f7
Update gpt_oss.py
danielhanchen Sep 24, 2025
eb19db9
Update gpt_oss.py
danielhanchen Sep 24, 2025
1f4f0c7
torch compile
danielhanchen Sep 25, 2025
b4afc0a
Update attention_sink.py
danielhanchen Sep 25, 2025
3d2083b
Update common.py
danielhanchen Sep 25, 2025
475a1fa
Update common.py
danielhanchen Sep 25, 2025
a1577f3
Patches
danielhanchen Sep 25, 2025
dc8308b
Compiled mask creation
danielhanchen Sep 25, 2025
15ae568
Update attention_sink.py
danielhanchen Sep 25, 2025
c849066
Update gpt_oss.py
danielhanchen Sep 25, 2025
eb68b54
Update gpt_oss.py
danielhanchen Sep 25, 2025
b4433b0
Revert
danielhanchen Sep 25, 2025
5f0fa7e
Update gpt_oss.py
danielhanchen Sep 25, 2025
274c830
Update gpt_oss.py
danielhanchen Sep 25, 2025
0c52d58
Fix up
danielhanchen Sep 25, 2025
3d9f498
Update attention_sink.py
danielhanchen Sep 25, 2025
dfe12c5
Update attention_sink.py
danielhanchen Sep 25, 2025
02ec222
Update utils.py
danielhanchen Sep 25, 2025
4e57162
Update attention_sink.py
danielhanchen Sep 25, 2025
17a6427
Update attention_sink.py
danielhanchen Sep 25, 2025
2002c9c
Retry
danielhanchen Sep 25, 2025
1ee8d5e
Update gpt_oss.py
danielhanchen Sep 25, 2025
3994e3c
Update gpt_oss.py
danielhanchen Sep 25, 2025
ef81921
Fix Flex
danielhanchen Sep 25, 2025
8cc0e77
Update gpt_oss.py
danielhanchen Sep 25, 2025
31f1624
Update gpt_oss.py
danielhanchen Sep 25, 2025
27fc0a9
Update gpt_oss.py
danielhanchen Sep 25, 2025
e86c541
Update gpt_oss.py
danielhanchen Sep 25, 2025
b4596cc
Update gpt_oss.py
danielhanchen Sep 25, 2025
858b962
Update gpt_oss.py
danielhanchen Sep 25, 2025
b676650
Update gpt_oss.py
danielhanchen Sep 25, 2025
dc1bd58
Update gpt_oss.py
danielhanchen Sep 25, 2025
1fe5a69
Update gpt_oss.py
danielhanchen Sep 25, 2025
524ac7f
Update gpt_oss.py
danielhanchen Sep 25, 2025
bd34939
Update gpt_oss.py
danielhanchen Sep 25, 2025
935ea71
Update gpt_oss.py
danielhanchen Sep 25, 2025
3ea5482
Update gpt_oss.py
danielhanchen Sep 25, 2025
1885f31
Update gpt_oss.py
danielhanchen Sep 25, 2025
ecd9b53
Update gpt_oss.py
danielhanchen Sep 25, 2025
d3b65af
Update gpt_oss.py
danielhanchen Sep 25, 2025
3b75bc9
Update gpt_oss.py
danielhanchen Sep 25, 2025
b43c1b5
Update gpt_oss.py
danielhanchen Sep 25, 2025
db12a8a
Update gpt_oss.py
danielhanchen Sep 25, 2025
889b4fb
Update gpt_oss.py
danielhanchen Sep 25, 2025
f481e2f
Update gpt_oss.py
danielhanchen Sep 25, 2025
c3e3a90
Update gpt_oss.py
danielhanchen Sep 25, 2025
b721c77
Update gpt_oss.py
danielhanchen Sep 25, 2025
7d81867
Update gpt_oss.py
danielhanchen Sep 25, 2025
577a2a0
Update gpt_oss.py
danielhanchen Sep 25, 2025
c0e421b
Update gpt_oss.py
danielhanchen Sep 25, 2025
2605ecb
Update gpt_oss.py
danielhanchen Sep 25, 2025
e850c7d
Update gpt_oss.py
danielhanchen Sep 25, 2025
9af4313
Update gpt_oss.py
danielhanchen Sep 25, 2025
d8a4e50
Update gpt_oss.py
danielhanchen Sep 25, 2025
1b732ba
Update gpt_oss.py
danielhanchen Sep 25, 2025
666f121
Update gpt_oss.py
danielhanchen Sep 25, 2025
b8cfebf
Update gpt_oss.py
danielhanchen Sep 25, 2025
5e88a87
Update gpt_oss.py
danielhanchen Sep 25, 2025
70dfc00
Update gpt_oss.py
danielhanchen Sep 25, 2025
9128339
Update gpt_oss.py
danielhanchen Sep 25, 2025
082cfb7
Update gpt_oss.py
danielhanchen Sep 25, 2025
0f47e5e
Update gpt_oss.py
danielhanchen Sep 25, 2025
d92e62d
Update gpt_oss.py
danielhanchen Sep 25, 2025
5646157
Update gpt_oss.py
danielhanchen Sep 25, 2025
272689b
Update gpt_oss.py
danielhanchen Sep 25, 2025
d10fc7a
Bug fixes
danielhanchen Sep 26, 2025
4396a93
Update patching_utils.py
danielhanchen Sep 26, 2025
ee50724
Update patching_utils.py
danielhanchen Sep 26, 2025
abe89f0
Update patching_utils.py
danielhanchen Sep 26, 2025
edc85ca
Update rl_replacements.py
danielhanchen Sep 26, 2025
efb18b5
Update patching_utils.py
danielhanchen Sep 26, 2025
f16a5a8
Update patching_utils.py
danielhanchen Sep 26, 2025
0dae9dd
Update patching_utils.py
danielhanchen Sep 26, 2025
435de2d
flash attn
danielhanchen Sep 26, 2025
9cd630c
Update gpt_oss.py
danielhanchen Sep 26, 2025
c510029
Update __init__.py
danielhanchen Sep 26, 2025
98080fc
Update attention_sink.py
danielhanchen Sep 26, 2025
5625cfb
Update gpt_oss.py
danielhanchen Sep 26, 2025
62756a8
Update gpt_oss.py
danielhanchen Sep 26, 2025
3f9a9a9
Update gpt_oss.py
danielhanchen Sep 26, 2025
c32eb2e
Update gpt_oss.py
danielhanchen Sep 26, 2025
63a771c
Update gpt_oss.py
danielhanchen Sep 26, 2025
194ff92
Update gpt_oss.py
danielhanchen Sep 26, 2025
be54940
Update gpt_oss.py
danielhanchen Sep 26, 2025
9ebf49f
Update gpt_oss.py
danielhanchen Sep 26, 2025
2b45d36
dropout_p
danielhanchen Sep 26, 2025
7a6941a
Update gpt_oss.py
danielhanchen Sep 26, 2025
588c4f0
Update gpt_oss.py
danielhanchen Sep 26, 2025
aded049
Update attention_sink.py
danielhanchen Sep 26, 2025
33ba6b3
Update gpt_oss.py
danielhanchen Sep 26, 2025
b08753b
Update gpt_oss.py
danielhanchen Sep 26, 2025
9fe8ec0
fix
danielhanchen Sep 26, 2025
5be9e57
Update attention_sink.py
danielhanchen Sep 26, 2025
a218bfc
Update gpt_oss.py
danielhanchen Sep 26, 2025
9fc2694
Update gpt_oss.py
danielhanchen Sep 26, 2025
769301d
Update gpt_oss.py
danielhanchen Sep 26, 2025
d59f62b
Update gpt_oss.py
danielhanchen Sep 26, 2025
92d16d4
Update gpt_oss.py
danielhanchen Sep 26, 2025
0608531
Update gpt_oss.py
danielhanchen Sep 26, 2025
24bb593
Update gpt_oss.py
danielhanchen Sep 26, 2025
c481eb8
Update gpt_oss.py
danielhanchen Sep 26, 2025
68fed93
Update gpt_oss.py
danielhanchen Sep 26, 2025
9ff936f
Update gpt_oss.py
danielhanchen Sep 26, 2025
77343fa
Update gpt_oss.py
danielhanchen Sep 26, 2025
f3e7f8c
Update gpt_oss.py
danielhanchen Sep 26, 2025
5e7e7d3
Update gpt_oss.py
danielhanchen Sep 26, 2025
a508006
Update loss_utils.py
danielhanchen Sep 26, 2025
44e1de7
Update gpt_oss.py
danielhanchen Sep 26, 2025
1079a21
Update gpt_oss.py
danielhanchen Sep 26, 2025
58e5f24
Update gpt_oss.py
danielhanchen Sep 26, 2025
3c61724
Update gpt_oss.py
danielhanchen Sep 26, 2025
bd50ca4
Update gpt_oss.py
danielhanchen Sep 26, 2025
5f8b77c
Update gpt_oss.py
danielhanchen Sep 26, 2025
f2fe3db
Update gpt_oss.py
danielhanchen Sep 26, 2025
04bbc07
Update loss_utils.py
danielhanchen Sep 26, 2025
cb16066
Update gpt_oss.py
danielhanchen Sep 26, 2025
75d7829
Update gpt_oss.py
danielhanchen Sep 26, 2025
679e882
Update gpt_oss.py
danielhanchen Sep 26, 2025
4b61795
Merge branch 'main' into nightly
danielhanchen Sep 26, 2025
c37dff1
Merge branch 'main' into nightly
danielhanchen Sep 26, 2025
b61346a
Merge branch 'main' into nightly
danielhanchen Sep 26, 2025
a8d6aa8
Merge branch 'main' into nightly
danielhanchen Sep 28, 2025
5225692
Update gpt_oss.py
danielhanchen Sep 28, 2025
02326ab
Update gpt_oss.py
danielhanchen Sep 28, 2025
2210555
Update gpt_oss.py
danielhanchen Sep 30, 2025
f7406a4
Update gpt_oss.py
danielhanchen Sep 30, 2025
7020561
Update gpt_oss.py
danielhanchen Sep 30, 2025
e316226
Update gpt_oss.py
danielhanchen Sep 30, 2025
55a0f94
Update gpt_oss.py
danielhanchen Sep 30, 2025
d241d8d
Versioning
danielhanchen Sep 30, 2025
8d752f6
Merge branch 'main' into nightly
danielhanchen Oct 1, 2025
7c40a85
Update saving_utils.py
danielhanchen Oct 5, 2025
114feed
Update saving_utils.py
danielhanchen Oct 5, 2025
5bdbffe
Update saving_utils.py
danielhanchen Oct 5, 2025
79115db
Update saving_utils.py
danielhanchen Oct 5, 2025
51e3889
Update saving_utils.py
danielhanchen Oct 5, 2025
3284083
Update saving_utils.py
danielhanchen Oct 5, 2025
289abf2
Update saving_utils.py
danielhanchen Oct 5, 2025
efe6d76
Update saving_utils.py
danielhanchen Oct 5, 2025
2f5e342
Fix Gemma 3
danielhanchen Oct 5, 2025
3237c4b
Update misc.py
danielhanchen Oct 5, 2025
dc3e28e
Merge branch 'main' into nightly
danielhanchen Oct 5, 2025
22b3cb6
Merge branch 'main' into nightly
danielhanchen Oct 14, 2025
5beb515
Merge branch 'main' into nightly
danielhanchen Oct 16, 2025
bd43a5b
Update rl_environments.py
danielhanchen Oct 17, 2025
9571b67
Update pyproject.toml
danielhanchen Oct 17, 2025
f789e3b
Update rl_environments.py
danielhanchen Oct 17, 2025
c146ca2
Update __init__.py
danielhanchen Oct 17, 2025
5012df2
Merge branch 'main' into nightly
danielhanchen Oct 17, 2025
80f4b15
Merge branch 'main' into nightly
danielhanchen Oct 17, 2025
6857125
Update empty_model.py
danielhanchen Oct 17, 2025
49f3cd0
Update empty_model.py
danielhanchen Oct 17, 2025
7642fbc
Update empty_model.py
danielhanchen Oct 17, 2025
a6a9a53
Merge branch 'main' into nightly
danielhanchen Oct 17, 2025
565d37f
Merge branch 'main' into nightly
danielhanchen Oct 17, 2025
068142c
Merge branch 'main' into nightly
danielhanchen Oct 19, 2025
9b06516
Merge branch 'main' into nightly
danielhanchen Oct 19, 2025
33a55fc
Merge branch 'main' into nightly
danielhanchen Oct 20, 2025
9f9fad5
Update empty_model.py
danielhanchen Oct 20, 2025
c62f0db
Device type
danielhanchen Oct 20, 2025
44539dc
Update vllm_utils.py
danielhanchen Oct 20, 2025
c7f1a85
Update compiler.py
danielhanchen Oct 20, 2025
d98b8dd
Update empty_model.py
danielhanchen Oct 20, 2025
7dccb4f
Update vllm_utils.py
danielhanchen Oct 20, 2025
96b12f6
Update empty_model.py
danielhanchen Oct 20, 2025
b900605
Fixes
danielhanchen Oct 20, 2025
be24a86
Update empty_model.py
danielhanchen Oct 20, 2025
09a56e1
Update empty_model.py
danielhanchen Oct 20, 2025
dd3f5a9
Update __init__.py
danielhanchen Oct 20, 2025
5e914a5
Update vllm_utils.py
danielhanchen Oct 20, 2025
d45333a
Update vllm_utils.py
danielhanchen Oct 20, 2025
aef0696
Update rl_environments.py
danielhanchen Oct 20, 2025
4bbede7
Update cross_entropy_loss.py
danielhanchen Oct 20, 2025
03adb63
Update vllm_utils.py
danielhanchen Oct 20, 2025
4e0786b
Update vllm_utils.py
danielhanchen Oct 20, 2025
21a4404
Update rl_environments.py
danielhanchen Oct 20, 2025
e63cd7b
Update vllm_utils.py
danielhanchen Oct 20, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion unsloth_zoo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

__version__ = "2025.10.7"
__version__ = "2025.10.8"

import os
import warnings
# Hugging Face Hub faster downloads
if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
Expand Down Expand Up @@ -101,3 +102,14 @@
execute_with_time_limit,
Benchmarker,
)

# Top some pydantic warnings
try:
# pydantic/_internal/_generate_schema.py:2249: UnsupportedFieldAttributeWarning: The 'frozen' attribute with value True
# was provided to the `Field()` function, which has no effect in the context it was used.
# 'frozen' is field-specific metadata, and can only be attached to a model field using `Annotated` metadata or by assignment.
# This may have happened because an `Annotated` type alias using the `type` statement was used, or if the `Field()` function was attached to a single member of a union type.
from pydantic.warnings import UnsupportedFieldAttributeWarning
warnings.filterwarnings(action = "ignore", category = UnsupportedFieldAttributeWarning)
except:
pass
13 changes: 12 additions & 1 deletion unsloth_zoo/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2035,7 +2035,14 @@ def unsloth_compile_transformers(
except ModuleNotFoundError:
return
modeling_file = eval(model_location)
if hasattr(modeling_file, "__UNSLOTH_PATCHED__"): return
if hasattr(modeling_file, "__UNSLOTH_PATCHED__"):
# Get __UNSLOTH_SUPPORTS_SDPA__
if hasattr(modeling_file, "__UNSLOTH_SUPPORTS_SDPA__"):
if supports_sdpa is not None:
assert(type(supports_sdpa) is list and len(supports_sdpa) == 1)
supports_sdpa[0] = modeling_file.__UNSLOTH_SUPPORTS_SDPA__
return
pass

# Use transformers model_type logger to suppress message: Remove `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`
exec("model_logger.addFilter(HideLoggingMessage('`use_cache`'))", globals(), locals())
Expand Down Expand Up @@ -2189,6 +2196,7 @@ def replaced_tqdm(*args, **kwargs):
torch_modules = [x for x in torch_modules if x not in removal]

# Check SDPA to load as eager or SDPA (Pixtral / Mistral 3 for eg doesn't have SDPA)
final_supports_sdpa = True
if supports_sdpa is not None:
assert(type(supports_sdpa) is list and len(supports_sdpa) == 1)
if ("_supports_sdpa = True" in full_source) and ("_supports_sdpa = False" not in full_source):
Expand All @@ -2197,7 +2205,10 @@ def replaced_tqdm(*args, **kwargs):
if supports_sdpa[0] != False: supports_sdpa[0] = True
else:
supports_sdpa[0] = False
final_supports_sdpa = False
pass
# Save supports_sdpa to solve secondary imports
modeling_file.__UNSLOTH_SUPPORTS_SDPA__ = final_supports_sdpa

# Get functions which are called
called_functions = []
Expand Down
48 changes: 30 additions & 18 deletions unsloth_zoo/empty_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,33 +223,45 @@ def copy_attributes(original_model, new_model):
if dict_skipped_count > 0:
print(f"📋 Skipped {dict_skipped_count} non-config dictionaries")
if skipped_count > 0:
print(f"⏭️ Skipped {skipped_count} total attributes (tensors, modules, non-config dicts, etc.)")
print(f"⏭️ Skipped {skipped_count} total attributes (tensors, modules, non-config dicts, etc.)")
if skipped_count <= 10:
print(f" Skipped: {skipped_attrs}")
print(f" Skipped: {skipped_attrs}")
else:
print(f" Sample: {skipped_attrs[:5]}... and {skipped_count-5} more")
print(f" Sample: {skipped_attrs[:5]}... and {skipped_count-5} more")
pass


@torch.inference_mode()
def create_empty_causal_lm(config, dtype = torch.float16):
# All Unsloth Zoo code licensed under LGPLv3
from transformers import AutoModelForCausalLM
try:
from accelerate import init_empty_weights
# Suppress warning on uninited weights
old_warn = os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1")
os.environ["UNSLOTH_WARN_UNINITIALIZED"] = "0"
with init_empty_weights():
model_name = getattr(config, 'model_name')
kwargs = {"torch_dtype" if HAS_TORCH_DTYPE else "dtype" : dtype_from_config(config)}
if model_name is not None:
# This would persist quantization information.
from accelerate import init_empty_weights
# Suppress warning on uninited weights
old_warn = os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1")
os.environ["UNSLOTH_WARN_UNINITIALIZED"] = "0"
model_name = getattr(config, 'model_name')
kwargs = {"torch_dtype" if HAS_TORCH_DTYPE else "dtype" : dtype_from_config(config)}
original_meta_model = None
error = None
with init_empty_weights(include_buffers = True):
if model_name is not None:
try:
# This would persist quantization information for FP8 weights
original_meta_model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
else:
except Exception as e:
error = str(e)
original_meta_model = None
if original_meta_model is None:
try:
# We must do this for 4.57.0 and above
original_meta_model = AutoModelForCausalLM.from_config(config)
# Suppress warning on uninited weights
os.environ["UNSLOTH_WARN_UNINITIALIZED"] = old_warn
except Exception as e:
except Exception as e:
error = str(e)
original_meta_model = None
pass
# Suppress warning on uninited weights
os.environ["UNSLOTH_WARN_UNINITIALIZED"] = old_warn
if error is not None and original_meta_model is None:
print(f"Failed to create original_meta_model for AutoModelForCausalLM. Error {e}")
original_meta_model = None

Expand Down Expand Up @@ -302,7 +314,7 @@ def _init_weights(self, module):
try:
# Use accelerate's init_empty_weights, not transformers.modeling_utils
from accelerate import init_empty_weights
with init_empty_weights():
with init_empty_weights(include_buffers = True):
original_meta_model = model_cls(config)
except Exception as e:
print(f"Failed to create original_meta_model for {model_cls.__name__}. Error {e}")
Expand Down
4 changes: 3 additions & 1 deletion unsloth_zoo/fused_losses/cross_entropy_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import functools
import math
from ..temporary_patches.common import UNSLOTH_ENABLE_LOGGING, torch_compile_options, logger
from unsloth import DEVICE_TYPE
from ..device_type import DEVICE_TYPE

@functools.cache
def _get_mapping(autograd):
Expand Down Expand Up @@ -198,6 +198,8 @@ def forward(
n_chunks = extra_kwargs.pop("n_chunks")
else:
n_chunks = get_chunk_size(bsz, qlen, vocab_size, target_gb = target_gb)
if UNSLOTH_ENABLE_LOGGING:
logger.info(f"Fused CE Loss [bsz={bsz}][qlen={qlen}][vocab_size={vocab_size}][n_chunks={n_chunks}]")
__shift_labels = torch.chunk(labels, n_chunks, dim = 0)
__shift_states = torch.chunk(hidden_states.view(-1, hd), n_chunks, dim = 0)
__grad_inputs = torch.chunk(grad_inputs.view(-1, hd), n_chunks, dim = 0)
Expand Down
86 changes: 61 additions & 25 deletions unsloth_zoo/rl_environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from contextlib import contextmanager
from functools import wraps
import threading
import errno
import time
from typing import Callable, TypeVar, Any, Tuple
T = TypeVar("T")
Expand Down Expand Up @@ -299,16 +300,26 @@ def create_locked_down_function(function):
pass


def _retry_eintr(func, *args):
while True:
try:
return func(*args)
except OSError as e:
if getattr(e, "errno", None) == errno.EINTR:
continue
raise
pass

@contextmanager
def time_limit(seconds: float):
def time_limit(seconds: float, *, strict: bool = True, leeway: float = 0.05):
"""
Enforce a wall-clock time limit using SIGALRM/ITIMER_REAL.

Key points:
- Nest-safe: earliest deadline wins.
- Restores any prior timer with remaining time corrected.
- **Interrupts** blocking syscalls so TimeoutError can be raised promptly.
- Unix-like OS, main thread only. Process-wide timer: not composable with other SIGALRM users.
- Earliest deadline wins (respects any currently armed ITIMER_REAL).
- EINTR-safe setup/teardown; resists Ctrl+C during cleanup.
- strict=True: 'fail-closed' — if body returns after the deadline and the
SIGALRM handler didn't get to run, raise TimeoutError on exit anyway.
- Unix-like OS, main thread only. Process-wide SIGALRM: not composable with other users.
"""
if seconds <= 0:
raise ValueError("seconds must be > 0")
Expand All @@ -317,39 +328,64 @@ def time_limit(seconds: float):
if threading.current_thread() is not threading.main_thread():
raise RuntimeError("time_limit must be used from the main thread")

start = time.monotonic()
deadline_at = start + seconds

old_handler = signal.getsignal(signal.SIGALRM)
prev_remaining, prev_interval = signal.getitimer(signal.ITIMER_REAL)

# Always respect any already-armed timer: take the earlier deadline.
deadline = seconds if prev_remaining <= 0.0 else min(seconds, prev_remaining)

fired = False # set by our handler

def _handler(signum, frame):
raise TimeoutError(f"Timed out after {seconds:g}s")
setattr(_handler, "__time_limit_handler__", True)
nonlocal fired
fired = True
# include the intended arming deadline for debugging
raise TimeoutError(f"Timed out after {deadline:g}s")

nested_ours = getattr(old_handler, "__time_limit_handler__", False)
start = time.monotonic()
delay_now = min(seconds, prev_remaining) if (nested_ours and prev_remaining > 0.0) else seconds
setattr(_handler, "__time_limit_handler__", True)

_retry_eintr(signal.signal, signal.SIGALRM, _handler)
try:
signal.signal(signal.SIGALRM, _handler)

# IMPORTANT: ensure blocking syscalls are INTERRUPTED (no SA_RESTART),
# so control returns to Python and we can raise TimeoutError.
# Ensure blocking syscalls are interrupted (avoid SA_RESTART)
try:
signal.siginterrupt(signal.SIGALRM, True)
except (AttributeError, OSError):
pass

signal.setitimer(signal.ITIMER_REAL, delay_now)
_retry_eintr(signal.setitimer, signal.ITIMER_REAL, deadline)
yield
finally:
# Cancel our timer and restore the previous handler.
signal.setitimer(signal.ITIMER_REAL, 0.0)
signal.signal(signal.SIGALRM, old_handler)

# Restore prior timer with corrected remaining time.
if prev_remaining != 0.0 or prev_interval != 0.0:
elapsed = max(time.monotonic() - start, 0.0)
remaining = max(prev_remaining - elapsed, 0.0)
signal.setitimer(signal.ITIMER_REAL, remaining, prev_interval)
# Make teardown atomic wrt SIGINT and robust to EINTR
old_sigint = signal.getsignal(signal.SIGINT)
try:
_retry_eintr(signal.signal, signal.SIGINT, signal.SIG_IGN)
try:
_retry_eintr(signal.setitimer, signal.ITIMER_REAL, 0.0) # cancel ours
finally:
_retry_eintr(signal.signal, signal.SIGALRM, old_handler)

# Restore prior timer with corrected remaining time.
if prev_remaining != 0.0 or prev_interval != 0.0:
elapsed = max(time.monotonic() - start, 0.0)
remaining = max(prev_remaining - elapsed, 0.0)
_retry_eintr(signal.setitimer, signal.ITIMER_REAL, remaining, prev_interval)
finally:
_retry_eintr(signal.signal, signal.SIGINT, old_sigint)

# ---- Fail-closed check (only if no TimeoutError was raised inside) ----
if strict and not fired:
now = time.monotonic()
if now > deadline_at + leeway:
# We exceeded wall time but the handler didn't get a chance to run.
# This typically means the body spent a long time in non-cooperative C code.
raise TimeoutError(
f"Exceeded time limit ({seconds:g}s) without interrupt; "
f"elapsed ≈ {now - start:.3f}s. "
"The protected code likely blocked in a C extension or another SIGALRM user clobbered the timer."
)
pass

def execute_with_time_limit(seconds: float) -> Callable[[Callable[..., T]], Callable[..., T]]:
Expand Down
2 changes: 1 addition & 1 deletion unsloth_zoo/rl_replacements.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import os
import numpy as np
from typing import Union, Callable, Optional, List, Dict
from unsloth import DEVICE_TYPE
from .device_type import DEVICE_TYPE
from .temporary_patches.common import torch_compile_options
RL_REPLACEMENTS = dict()

Expand Down
2 changes: 1 addition & 1 deletion unsloth_zoo/temporary_patches/gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def forward(self, hidden_states):


# Combo kernels uses too much VRAM for low memory GPUs
from unsloth import DEVICE_TYPE
from ..device_type import DEVICE_TYPE
if DEVICE_TYPE == "xpu":
device_memory = torch.xpu.memory.mem_get_info(0)[-1]
else:
Expand Down
56 changes: 50 additions & 6 deletions unsloth_zoo/vllm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
import math
import gc
import os
import ast
import sys
import torch
import json
import psutil
Expand All @@ -59,8 +61,7 @@
UNSLOTH_ENABLE_LOGGING,
)
from .log import logger
from unsloth import DEVICE_TYPE
from unsloth.models.vision import VLLM_SUPPORTED_VLM
from .device_type import DEVICE_TYPE
global LORA_REQUEST_ID

# Ignore logging messages
Expand Down Expand Up @@ -1219,7 +1220,7 @@ def _override_to(self, *args, **kwargs):
layer.quant_method = "fbgemm_fp8"
elif f"{layer_name}.weight_scale_inv" in quant_state_dict:
# This denotes that the model if FP8 dynamic quantized.
layer = FP8Linear(in_features = 0, out_features = 0, bias = has_bias, dtype=dtype, block_size = kwargs['block_size'], device = get_target_device(), activation_scheme = kwargs['activation_scheme'])
layer = FP8Linear(in_features = 0, out_features = 0, bias = has_bias, dtype = dtype, block_size = kwargs['block_size'], device = get_target_device(), activation_scheme = kwargs['activation_scheme'])
layer.in_features = weight.shape[1]
layer.out_features = weight.shape[0]
layer.weight = torch.nn.Parameter(weight, requires_grad = False)
Expand Down Expand Up @@ -1462,8 +1463,9 @@ def load_vllm(
assert(conservativeness >= 0.0 and conservativeness <= 1.0)

unsloth_vllm_standby = unsloth_vllm_standby or (os.getenv("UNSLOTH_VLLM_STANDBY", "0") != "0")
if unsloth_vllm_standby and gpu_memory_utilization < 0.9:
gpu_memory_utilization = 0.9
if unsloth_vllm_standby and gpu_memory_utilization < 0.8:
## [TODO] Used to allow 0.9, but now 0.85 works only
gpu_memory_utilization = 0.8
logger.info("Unsloth: Standby mode is enabled. Increasing `gpu_memory_utilization` to 0.9.")

if DEVICE_TYPE == "cuda":
Expand Down Expand Up @@ -2360,9 +2362,50 @@ def _test_is_same_vlm(model, new_model, processor, test_backward=False):
mismatches.append(layer_name)
print(f"Backward gradient statistics match for {len(matches)} layers: {matches}")
print(f"Backward gradient statistics mismatch for {len(mismatches)} layers: {mismatches}")
pass


pass
def _read_unsloth_vision_source() -> str:
_VISION_TAIL = ("unsloth", "models", "vision.py")
from importlib.metadata import files, PackageNotFoundError, PackagePath
from pathlib import Path
# 1) Via installed distribution metadata (no import of the package)
try:
for entry in files("unsloth") or ():
if isinstance(entry, PackagePath):
parts = entry.parts
if len(parts) >= 3 and tuple(parts[-3:]) == _VISION_TAIL:
return entry.read_text(encoding = "utf-8")
except PackageNotFoundError:
pass

# 2) Fallback: scan sys.path for a plain file
for base in map(Path, sys.path):
candidate = base.joinpath(*_VISION_TAIL)
if candidate.is_file():
return candidate.read_text(encoding = "utf-8")
raise FileNotFoundError("Could not locate unsloth/models/vision.py without importing it")
pass


def get_vllm_supported_vlm(_VAR_NAME = "VLLM_SUPPORTED_VLM"):
"""
Parse VLLM_SUPPORTED_VLM from unsloth/models/vision.py as a literal.
"""
src = _read_unsloth_vision_source()
tree = ast.parse(src)

# Support: `VLLM_SUPPORTED_VLM = [...]` and `VLLM_SUPPORTED_VLM: list[str] = [...]`
for node in tree.body:
if isinstance(node, ast.Assign):
if any(getattr(t, "id", None) == _VAR_NAME for t in node.targets):
return ast.literal_eval(node.value)
elif isinstance(node, ast.AnnAssign):
if getattr(node.target, "id", None) == _VAR_NAME:
return ast.literal_eval(node.value)
raise ValueError(f"{_VAR_NAME} not found as a literal in unsloth/models/vision.py")
pass


@torch.inference_mode
def _test_get_vllm_state_dict(
Expand Down Expand Up @@ -2419,6 +2462,7 @@ def _test_get_vllm_state_dict(
if not is_vision_model:
model_class = AutoModelForCausalLM
else:
VLLM_SUPPORTED_VLM = get_vllm_supported_vlm()
if model_type in VLLM_SUPPORTED_VLM:
import transformers
model_class = getattr(transformers, config.architectures[0])
Expand Down