From 1e915f1abec2a9a57787d99c774852ced7666b54 Mon Sep 17 00:00:00 2001 From: waqas Date: Thu, 2 Jul 2026 03:49:11 +0100 Subject: [PATCH] Honor the model's generation_config.eos_token_id in the transformers backend For generative tasks the transformers backend overrode eos_token_id with tokenizer.eos_token_id, discarding the terminators the model itself declares. Chat models whose turn terminator is not the tokenizer eos (e.g. Gemma ends turns with token 106 while tokenizer.eos is 1, generation_config declares [1, 106, 50]) therefore never stopped and padded every generation to max_new_tokens, wasting up to ~95% of generated tokens and corrupting generation-length measurements. Prefer model.generation_config.eos_token_id when set, falling back to the tokenizer's. Measured on Gemma MMLU CoT: 7168 -> 2654 tokens, 145s -> 55s per item, extracted answer unchanged. Fixes #1278 --- src/lighteval/models/transformers/transformers_model.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 64e790a2f..bcd0797a2 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -809,7 +809,14 @@ def _generate_padded( generation_config.update( max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id, - eos_token_id=self.tokenizer.eos_token_id, + # Prefer the model's declared terminators: chat models can end a turn with a token that is + # not the tokenizer's eos (e.g. Gemma ends turns with token 106 while tokenizer.eos is 1); + # overriding with tokenizer.eos_token_id alone makes such models generate until max_new_tokens. + eos_token_id=( + self.model.generation_config.eos_token_id + if self.model.generation_config.eos_token_id is not None + else self.tokenizer.eos_token_id + ), num_return_sequences=num_samples, output_logits=returns_logits, renormalize_logits=True,