unslothai
diff --git a/‎unsloth_zoo/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎unsloth_zoo/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎unsloth_zoo/fused_losses/cross_entropy_loss.py‎
Lines changed: 1 addition & 7 deletions b/‎unsloth_zoo/fused_losses/cross_entropy_loss.py‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎unsloth_zoo/loss_utils.py‎
Lines changed: 5 additions & 1 deletion b/‎unsloth_zoo/loss_utils.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎unsloth_zoo/patch_torch_functions.py‎
Lines changed: 5 additions & 3 deletions b/‎unsloth_zoo/patch_torch_functions.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎unsloth_zoo/rl_replacements.py‎
Lines changed: 14 additions & 2 deletions b/‎unsloth_zoo/rl_replacements.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎unsloth_zoo/saving_utils.py‎
Lines changed: 31 additions & 3 deletions b/‎unsloth_zoo/saving_utils.py‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎unsloth_zoo/training_utils.py‎
Lines changed: 4 additions & 2 deletions b/‎unsloth_zoo/training_utils.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎unsloth_zoo/vllm_utils.py‎
Lines changed: 10 additions & 2 deletions b/‎unsloth_zoo/vllm_utils.py‎
Lines changed: 10 additions & 2 deletions
@@ -14,7 +14,7 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-__version__ = "2025.11.1"
+__version__ = "2025.11.2"
 
 import os
 import warnings
 
@@ -95,13 +95,7 @@ def compute_fused_ce_loss(
 
     # Calculate cross entropy loss
     reduction = "sum" if n_items is not None else "mean"
-    # Since we overwrite torch.compile(torch.nn.functional.cross_entropy)
-    # We might get double compile errors, so use the uncompiled version
-    cross_entropy = \
-        torch.nn.functional._uncompiled_cross_entropy if \
-        hasattr(torch.nn.functional, "_uncompiled_cross_entropy") else \
-        torch.nn.functional.cross_entropy
-    loss = cross_entropy(
+    loss = torch.nn.functional.cross_entropy(
         input  = logits.view(-1, vocab_size).float().contiguous(),
         target = labels.view(-1).to(device).contiguous(),
         reduction = reduction,
 
@@ -106,7 +106,11 @@ def unsloth_fixed_cross_entropy(source, target, num_items_in_batch: int = None,
                 ignore_index = ignore_index,
                 reduction    = reduction,
             )
-            if reduction == "sum": loss = loss / num_items_in_batch
+            if reduction == "sum":
+                # just in case users pass an int for num_items_in_batch, which could be the case for custom trainer
+                if torch.is_tensor(num_items_in_batch):
+                    num_items_in_batch = num_items_in_batch.to(loss.device)
+                loss = loss / num_items_in_batch
         return loss
     pass
 
 
@@ -171,9 +171,11 @@ def patch_torch_functions():
     if not hasattr(torch.nn.functional, "_uncompiled_layer_norm"):
         torch.nn.functional._uncompiled_layer_norm = torch.nn.functional.layer_norm
         torch.nn.functional.layer_norm = layer_norm
-    if not hasattr(torch.nn.functional, "_uncompiled_cross_entropy"):
-        torch.nn.functional._uncompiled_cross_entropy = torch.nn.functional.cross_entropy
-        torch.nn.functional.cross_entropy = cross_entropy
+    # Remove compiling cross_entropy since too many errors
+    # We already compile this most likely anyways
+    # if not hasattr(torch.nn.functional, "_uncompiled_cross_entropy"):
+    #     torch.nn.functional._uncompiled_cross_entropy = torch.nn.functional.cross_entropy
+    #     torch.nn.functional.cross_entropy = cross_entropy
 pass
 
 
 
@@ -126,7 +126,6 @@ def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor:
 pass
 RL_REPLACEMENTS["left_pack_padding"] = left_pack_padding
 
-import torch
 
 def align_logprobs_with_mask(
     logprob_tensor: torch.Tensor,
@@ -176,10 +175,23 @@ def align_logprobs_with_mask(
     padded_logprobs[valid_rows, valid_cols] = valid_vals
 
     return padded_logprobs
-
+pass
 RL_REPLACEMENTS["align_logprobs_with_mask"] = align_logprobs_with_mask
 
 
+def grpo_update_SamplingParams(SamplingParams, generation_kwargs, vllm_sampling_params = None):
+    good_sampling_params_keys = inspect.signature(SamplingParams).parameters.keys()
+    if vllm_sampling_params is not None:
+        for key in good_sampling_params_keys:
+            if hasattr(vllm_sampling_params, key):
+                overwrited_key = getattr(vllm_sampling_params, key)
+                if overwrited_key is not None and (type(overwrited_key) in (list, tuple,) and len(overwrited_key) != 0):
+                    generation_kwargs[key] = overwrited_key
+    return generation_kwargs
+pass
+RL_REPLACEMENTS["grpo_update_SamplingParams"] = grpo_update_SamplingParams
+
+
 # Custom compiled GRPO loss - creates 3 Triton kernels
 def grpo_compute_loss(
     ref_logits,
 
@@ -920,6 +920,26 @@ def fix_tokenizer_config_json(tokenizer, saved_folder):
     return
 pass
 
+def is_hf_sharded_safetensors(filenames: list[str]) -> bool:
+    """Check if filenames follow HF sharded naming: model-00001-of-00005.safetensors"""
+    pattern = re.compile(r'^(.+?)-(\d+)-of-(\d+)\.safetensors$')
+    
+    matches = [pattern.match(f) for f in filenames]
+    if not all(matches):
+        return False
+    
+    # Keep strings to check padding
+    parsed = [(m.group(1), m.group(2), m.group(3)) for m in matches]
+    
+    # shard and total have same padding: turned off as deepseekocr padding is different
+    # for prefix, shard_str, total_str in parsed:
+    #     if len(shard_str) != len(total_str):
+    #         return False
+    
+    # same prefix and total
+    prefixes, _, totals = zip(*parsed)
+    return len(set(prefixes)) == 1 and len(set(totals)) == 1
+
 @torch.inference_mode
 def merge_and_overwrite_lora(
     get_model_name,
@@ -1170,7 +1190,8 @@ def upload_items(filename = None):
     _hf_cache_dir = _get_hf_cache_dir()
     copied_all_from_cache = False
     copied_tokenizer_model_from_cache = False
-    safe_tensor_index_files = ["model.safetensors.index.json"] if len(safetensors_list) > 1 else []
+    is_hf_sharded = is_hf_sharded_safetensors(safetensors_list)
+    safe_tensor_index_files = ["model.safetensors.index.json"] if (len(safetensors_list) > 1 or is_hf_sharded) else []
 
     # ONLY download/copy the original index if we are NOT dequantizing an MXFP4 model
     if (not (base_model_is_quantized and quant_type == "mxfp4") or (base_model_is_quantized and quant_type == "mxfp4" and save_method == "mxfp4")) and not needs_splitting:
@@ -1180,7 +1201,13 @@ def upload_items(filename = None):
             if safe_tensor_index_files:
                 local_index_path = os.path.join(model_name, "model.safetensors.index.json")
                 if os.path.exists(local_index_path):
-                    shutil.copy2(local_index_path, os.path.join(save_directory, "model.safetensors.index.json"))
+                    try:
+                        shutil.copy2(local_index_path, os.path.join(save_directory, "model.safetensors.index.json"))
+                    except shutil.SameFileError:
+                        pass
+                    except Exception as e:
+                        print(f"Error copying model.safetensors.index.json: {e}")
+                        raise e
         else:
             # Download from HF
             if "model.safetensors.index.json" in [f for f in safe_tensor_index_files]:
@@ -1282,7 +1309,8 @@ def upload_items(filename = None):
     if needs_splitting:
         final_safetensors_list = renumber_safetensor_files(final_safetensors_list, save_directory)
 
-    regenerate_index = ((base_model_is_quantized and quant_type == "mxfp4") or needs_splitting) and len(final_safetensors_list) > 1 and save_method != "mxfp4"
+    is_final_safetensors_list_sharded = is_hf_sharded_safetensors(final_safetensors_list)
+    regenerate_index = ((base_model_is_quantized and quant_type == "mxfp4") or needs_splitting) and (len(final_safetensors_list) > 1 or is_final_safetensors_list_sharded) and save_method != "mxfp4"
     weight_map = {}
 
     for filename in ProgressBar(final_safetensors_list, desc=f'Unsloth: Merging weights into {"mxfp4" if save_method=="mxfp4" else "16bit"}'):
 
@@ -74,14 +74,16 @@ def fix_zero_training_loss(model, tokenizer, train_dataset):
                 "Unsloth: All labels in your dataset are -100. Training losses will be all 0.\n"\
                 "For example, are you sure you used `train_on_responses_only` correctly?\n"\
                 "Or did you mask our tokens incorrectly? Maybe this is intended?\n"\
-                "Maybe you're using a Llama chat template on a non Llama model for example?"
+                "Maybe you're using a Llama chat template on a non Llama model for example?"\
+                "If you used `train_on_responses_only`, confirm your user and assistant parts are correct!"
             )
         elif seen_bad / (seen_bad + seen_good) >= 0.9:
             print(
                 "Unsloth: Nearly all labels in your dataset are -100. Training losses will be all 0.\n"\
                 "For example, are you sure you used `train_on_responses_only` correctly?\n"\
                 "Or did you mask our tokens incorrectly? Maybe this is intended?\n"\
-                "Maybe you're using a Llama chat template on a non Llama model for example?"
+                "Maybe you're using a Llama chat template on a non Llama model for example?"\
+                "If you used `train_on_responses_only`, confirm your user and assistant parts are correct!"
             )
     pass
 pass
 
@@ -28,6 +28,7 @@
     "generate_batches",
     "convert_lora_modules",
     "return_lora_modules",
+    "get_lora_supported_ranks",
 ]
 
 from typing import Optional, List, Tuple, Dict, Any
@@ -1463,8 +1464,8 @@ def approximate_vllm_memory_usage(
 pass
 
 
-def determine_max_lora_rank(lora_rank = 16):
-    """vLLM doesn't allow any LoRA rank, so we need to get the next largest"""
+@functools.cache
+def get_lora_supported_ranks():
     possible_max_ranks = [8, 16, 32, 64, 128, 256, 320, 512]
     try:
         import vllm.config.lora
@@ -1482,6 +1483,13 @@ def determine_max_lora_rank(lora_rank = 16):
     if type(possible_max_ranks) is str:
         possible_max_ranks = re.findall(r"[\d]{1,}", possible_max_ranks)
         possible_max_ranks = [int(x) for x in possible_max_ranks]
+    return possible_max_ranks
+pass
+
+
+def determine_max_lora_rank(lora_rank = 16):
+    """vLLM doesn't allow any LoRA rank, so we need to get the next largest"""
+    possible_max_ranks = get_lora_supported_ranks()
     for max_lora_rank in possible_max_ranks:
         if max_lora_rank >= lora_rank:
             return max_lora_rank