oumi-ai · shanghongsim · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/configs/projects/coalm/405b_train.yaml b/configs/projects/coalm/405b_train.yaml
@@ -18,12 +18,14 @@ data:
         shuffle: True
         seed: 42
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
   validation:
     datasets:
       - dataset_name: "text_sft_jsonl"
         dataset_path: "/path/to/validation/dataset.jsonl"
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
 
 training:

diff --git a/configs/projects/coalm/70b_train.yaml b/configs/projects/coalm/70b_train.yaml
@@ -18,12 +18,14 @@ data:
         shuffle: True
         seed: 42
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
   validation:
     datasets:
       - dataset_name: "text_sft_jsonl"
         dataset_path: "/path/to/validation/dataset.jsonl"
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
 
 training:

diff --git a/configs/projects/coalm/8b_train.yaml b/configs/projects/coalm/8b_train.yaml
@@ -18,12 +18,14 @@ data:
         shuffle: True
         seed: 42
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
   validation:
     datasets:
       - dataset_name: "text_sft_jsonl"
         dataset_path: "/path/to/validation/dataset.jsonl"
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
 
 training:

diff --git a/configs/projects/halloumi/8b_train.yaml b/configs/projects/halloumi/8b_train.yaml
@@ -66,6 +66,7 @@ data:
         seed: 42
 
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
   validation:
     datasets:
@@ -78,6 +79,7 @@ data:
           }
 
     collator_name: "text_completions_only_with_padding"
+    train_target: "all_assistant_turns"
     seed: 42
 
 training:

diff --git a/src/oumi/builders/collators.py b/src/oumi/builders/collators.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from collections.abc import Callable
 
 import oumi.core.constants as constants
@@ -27,11 +28,101 @@
 from oumi.core.configs.internal.supported_models import (
     find_internal_model_config,
 )
+from oumi.core.configs.params.data_params import TrainTarget
 from oumi.core.tokenizers.base_tokenizer import BaseTokenizer
 from oumi.utils.logging import logger
 
-# This is used to set the max input length for a model with infinite size input
 _VERY_LARGE_INTEGER = int(1e30)
+_SENTINEL_USER = "<<__U__>>"
+_SENTINEL_ASST = "<<__A__>>"
+_FALLBACK_MSG = (
+    "Cannot auto-detect collator templates from the chat template. "
+    "Provide response_template (and end_of_turn_template for "
+    "all_assistant_turns) via collator_kwargs."
+)
+
+
+def _resolve_collator_templates(
+    tokenizer: "BaseTokenizer",
+) -> tuple[str, str]:
+    """Auto-detect response_template and end_of_turn_template.
+
+    Applies the chat template to a known test conversation, then finds
+    the assistant boundary strings in the rendered output.
+
+    Returns:
+        (response_template, end_of_turn_template)
+
+    Raises:
+        ValueError: If templates cannot be extracted.
+    """
+    msgs = [
+        {"role": "user", "content": _SENTINEL_USER},
+        {"role": "assistant", "content": _SENTINEL_ASST},
+        {"role": "user", "content": _SENTINEL_USER},
+        {"role": "assistant", "content": _SENTINEL_ASST},
+    ]
+
+    try:
+        rendered = tokenizer.apply_chat_template(
+            msgs, tokenize=False, add_generation_prompt=False
+        )
+    except Exception as exc:
+        raise ValueError(_FALLBACK_MSG) from exc
+
+    if not isinstance(rendered, str):
+        raise ValueError(_FALLBACK_MSG)
+
+    # Locate boundaries around the second turn pair
+    # to avoid system-prompt effects on the first turn.
+    try:
+        a1 = rendered.index(_SENTINEL_ASST)
+        first_asst_end = a1 + len(_SENTINEL_ASST)
+        second_user = rendered.index(_SENTINEL_USER, first_asst_end)
+        second_user_end = second_user + len(_SENTINEL_USER)
+        second_asst = rendered.index(_SENTINEL_ASST, second_user_end)
+        second_asst_end = second_asst + len(_SENTINEL_ASST)
+    except ValueError:
+        raise ValueError(_FALLBACK_MSG)
+
+    # End-of-turn: common token-ID prefix of the two strings that
+    # follow assistant content (mid-conversation vs. end-of-sequence).
+    after_ids = tokenizer.encode(rendered[second_asst_end:], add_special_tokens=False)
+    between_ids = tokenizer.encode(
+        rendered[first_asst_end:second_user], add_special_tokens=False
+    )
+    eot_len = 0
+    for a, b in zip(after_ids, between_ids):
+        if a != b:
+            break
+        eot_len += 1
+    eot_ids = after_ids[:eot_len]
+    end_of_turn_template: str = tokenizer.decode(eot_ids, skip_special_tokens=False)
+
+    # Response template: strip the EOT prefix to get just the assistant header.
+    resp_ids = tokenizer.encode(
+        rendered[second_user_end:second_asst], add_special_tokens=False
+    )
+    if eot_len > 0 and resp_ids[:eot_len] == eot_ids:
+        resp_ids = resp_ids[eot_len:]
+    response_template: str = tokenizer.decode(resp_ids, skip_special_tokens=False)
+
+    if not response_template.strip():
+        raise ValueError(_FALLBACK_MSG)
+
+    # Qwen3 and similar reasoning models inject <think>...</think> into
+    # every assistant turn via their chat template.  If training data was
+    # formatted without thinking tokens the response_template won't match
+    # and every example will be silently masked.
+    if "<think>" in response_template:
+        logger.warning(
+            "The extracted response_template contains <think> tokens "
+            "(from the model's chat template). If you're training without "
+            "thinking tokens, use collator_kwargs to specify "
+            "response_template manually."
+        )
+
+    return response_template, end_of_turn_template
 
 
 def build_data_collator(
@@ -51,7 +142,8 @@ def build_data_collator(
 
             - "text_with_padding": Uses `TextCollatorWithPadding`.
             - "text_completions_only_with_padding": Uses
-                `TextCompletionsCollatorWithPadding`.
+                `TextCompletionsCollatorWithPadding`. Supports optional
+                ``end_of_turn_template`` for tool-aware span-based masking.
             - "vision_language_with_padding": Uses `VisionLanguageCollatorWithPadding`.
             - "vision_language_sft": Uses `VisionLanguageSftCollator`.
 
@@ -126,27 +218,20 @@ def build_data_collator(
             **kwargs,
         )
     elif collator_name == "text_completions_only_with_padding":
-        # Extract instruction and response templates from kwargs if provided
-        instruction_template = kwargs.pop("instruction_template", None)
-        response_template = kwargs.pop("response_template", None)
-
-        # Default to Llama-style templates if not provided
-        instruction_prefix = (
-            instruction_template
-            if instruction_template
-            else "<|start_header_id|>user<|end_header_id|>\n\n"
-        )
-        response_prefix = (
-            response_template
-            if response_template
-            else "<|start_header_id|>assistant<|end_header_id|>\n\n"
-        )
+        if not kwargs.get("response_template"):
+            raise ValueError(
+                "'text_completions_only_with_padding' requires a "
+                "response_template. Either set train_target in your config "
+                "(which auto-resolves templates from the tokenizer) or "
+                "provide response_template via collator_kwargs."
+            )
 
         return TextCompletionsCollatorWithPadding(
             tokenizer=tokenizer,
-            instruction_prefix=instruction_prefix,
-            response_prefix=response_prefix,
             debug=debug,
+            ignore_index=(
+                label_ignore_index if label_ignore_index is not None else -100
+            ),
             **kwargs,
         )
     raise ValueError(f"Unknown data collator name: '{collator_name}'")
@@ -206,9 +291,55 @@ def build_collator_from_config(
             "trust_remote_code", config.model.trust_remote_code
         )
 
-    # Merge collator_kwargs from config with the existing kwargs
-    # Config kwargs take precedence over automatically determined kwargs
+    # --- Resolve train_target and templates ---
     config_collator_kwargs = train_split.collator_kwargs or {}
+
+    if collator_name == "text_completions_only_with_padding":
+        if train_split.train_target is not None:
+            # Path 1: train_target is set, auto-detect templates from
+            # the tokenizer's chat template. Falls back to user-provided
+            # response_template in collator_kwargs if auto-detection fails.
+            collator_kwargs["train_target"] = train_split.train_target.value
+
+            try:
+                response_template, end_of_turn_template = _resolve_collator_templates(
+                    tokenizer
+                )
+                collator_kwargs["response_template"] = response_template
+                if train_split.train_target == TrainTarget.ALL_ASSISTANT_TURNS:
+                    collator_kwargs["end_of_turn_template"] = end_of_turn_template
+            except ValueError:
+                if config_collator_kwargs.get("response_template") is None:
+                    raise
+
+        elif config_collator_kwargs.get("response_template") is not None:
+            # Path 2: train_target not set, templates provided manually
+            # via collator_kwargs. Infer train_target from which templates
+            # are present.
+            has_eot = config_collator_kwargs.get("end_of_turn_template") is not None
+            has_inst = config_collator_kwargs.get("instruction_template") is not None
+            if has_eot:
+                collator_kwargs["train_target"] = "all_assistant_turns"
+            elif has_inst:
+                warnings.warn(
+                    "Instruction-based masking is deprecated. "
+                    "Use train_target='all_assistant_turns' with "
+                    "end_of_turn_template for multi-turn conversations, "
+                    "or train_target='final_assistant_turn' "
+                    "for single-turn completions.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+                collator_kwargs["train_target"] = "_legacy_instruction_response"
+            else:
+                collator_kwargs["train_target"] = "final_assistant_turn"
+        else:
+            raise ValueError(
+                "'text_completions_only_with_padding' requires either "
+                "train_target or response_template in collator_kwargs."
+            )
+
+    # User-provided collator_kwargs override auto-resolved values
     collator_kwargs.update(config_collator_kwargs)
 
     return build_data_collator(

diff --git a/src/oumi/core/collators/text_completions_collator_with_padding.py b/src/oumi/core/collators/text_completions_collator_with_padding.py
@@ -27,22 +27,34 @@ class TextCompletionsCollatorWithPadding:
     def __init__(
         self,
         tokenizer: BaseTokenizer,
-        instruction_prefix: str,
-        response_prefix: str,
+        response_template: str,
+        train_target: str,
+        instruction_template: str | None = None,
         debug: bool = False,
+        end_of_turn_template: str | None = None,
+        ignore_index: int = -100,
     ):
         """Custom collator for text LLM training.
 
         Args:
         tokenizer: The tokenizer used for encoding the data.
-        instruction_prefix: The prefix marking the beginning of the user instruction.
-        response_prefix: The prefix marking the beginning of the assistant response.
+        response_template: String marking assistant response start.
+        instruction_template: String marking user instruction start.
         debug: If True, enables debug mode for logging.
+        train_target: Training target — ``"all_assistant_turns"``
+            or ``"final_assistant_turn"``.
+        end_of_turn_template: String marking the end of a turn.
+            Required for ``all_assistant_turns``.
+        ignore_index: Value used for masked labels. Must match the ignore_index
+            of the loss function (default: -100).
         """
         self._default_collator = DataCollatorForCompletionOnlyLM(
             tokenizer=tokenizer,
-            instruction_template=instruction_prefix,
-            response_template=response_prefix,
+            instruction_template=instruction_template,
+            response_template=response_template,
+            train_target=train_target,
+            end_of_turn_template=end_of_turn_template,
+            ignore_index=ignore_index,
         )
 
         if not hasattr(tokenizer, "pad_token_id") or tokenizer.pad_token_id is None:
@@ -55,7 +67,7 @@ def _collate(self, inputs: list[Any]) -> dict[str, Any]:
         result = self._default_collator(inputs)
         return result
 
-    def __call__(self, batch) -> dict[str, Any]:
+    def __call__(self, batch: list[dict[str, Any]]) -> dict[str, Any]:
         """Pads to the longest length present in the batch.
 
         Args: