From fac137eae51ac9ba0ba8ccb944a9a91fa3f9154c Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Tue, 6 May 2025 19:40:04 +0800 Subject: [PATCH 01/13] - removed htrack_block in src/lighteval/main_nanotron.py - fixed import path for NanotronLightevalModel src/lighteval/pipeline.py --- src/lighteval/main_nanotron.py | 31 ++++++++++++++----------------- src/lighteval/pipeline.py | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 94004c065..345be1fe9 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -52,7 +52,6 @@ def nanotron( from lighteval.config.lighteval_config import FullNanotronConfig, LightEvalConfig from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.logging.hierarchical_logger import htrack_block from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available from lighteval.utils.utils import EnvConfig @@ -62,22 +61,20 @@ def nanotron( if not is_nanotron_available(): raise ImportError(NO_NANOTRON_ERROR_MSG) - with htrack_block("Load nanotron config"): - # Create nanotron config - if not checkpoint_config_path.endswith(".yaml"): - raise ValueError("The checkpoint path should point to a YAML file") - - model_config = get_config_from_file( - checkpoint_config_path, - config_class=Config, - model_config_class=None, - skip_unused_config_keys=True, - skip_null_keys=True, - ) - - # We are getting an type error, because the get_config_from_file is not correctly typed, - lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore - nanotron_config = FullNanotronConfig(lighteval_config, model_config) + if not checkpoint_config_path.endswith(".yaml"): + raise ValueError("The checkpoint path should point to a YAML file") + + model_config = get_config_from_file( + checkpoint_config_path, + config_class=Config, + model_config_class=None, + skip_unused_config_keys=True, + skip_null_keys=True, + ) + + # We are getting an type error, because the get_config_from_file is not correctly typed, + lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore + nanotron_config = FullNanotronConfig(lighteval_config, model_config) evaluation_tracker = EvaluationTracker( output_dir=lighteval_config.logging.output_dir, diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 39e007b33..439286f10 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -72,7 +72,7 @@ from nanotron.parallel.context import ParallelContext from nanotron.utils import local_ranks_zero_first - from lighteval.models.nanotron_model import NanotronLightevalModel + from lighteval.models.nanotron.nanotron_model import NanotronLightevalModel import logging From 044c86534cfc55f626840d7b34f5ed0b1ee55a73 Mon Sep 17 00:00:00 2001 From: nouamanetazi Date: Sat, 22 Mar 2025 10:59:37 +0000 Subject: [PATCH 02/13] . rebased pr #656 --- src/lighteval/config/lighteval_config.py | 8 +++ src/lighteval/main_nanotron.py | 51 ++++++++++++------- src/lighteval/models/__init__.py | 21 ++++++++ src/lighteval/models/nanotron/__init__.py | 21 ++++++++ .../models/nanotron/nanotron_model.py | 22 +++++--- src/lighteval/models/nanotron_model.py | 26 ++++++++++ src/lighteval/pipeline.py | 7 ++- 7 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 src/lighteval/models/__init__.py create mode 100644 src/lighteval/models/nanotron/__init__.py create mode 100644 src/lighteval/models/nanotron_model.py diff --git a/src/lighteval/config/lighteval_config.py b/src/lighteval/config/lighteval_config.py index f24a15184..0e8217afe 100644 --- a/src/lighteval/config/lighteval_config.py +++ b/src/lighteval/config/lighteval_config.py @@ -101,3 +101,11 @@ class LightEvalConfig: class FullNanotronConfig: lighteval_config: LightEvalConfig nanotron_config: "Config" + + @property + def generation_parameters(self): + # Return the generation parameters from the lighteval config + # or create default generation parameters if none are set + if self.lighteval_config.generation: + return self.lighteval_config.generation + return GenerationArgs() diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 94004c065..1b973a112 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -42,17 +42,17 @@ def nanotron( checkpoint_config_path: Annotated[ str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.") ], - lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")], + lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")] = None, cache_dir: Annotated[str, Option(help="Cache directory for datasets and models.")] = CACHE_DIR, ): """ Evaluate models using nanotron as backend. """ from nanotron.config import Config, get_config_from_file + from nanotron.config.parallelism_config import ParallelismArgs - from lighteval.config.lighteval_config import FullNanotronConfig, LightEvalConfig + from lighteval.config.lighteval_config import FullNanotronConfig, LightEvalConfig, LightEvalLoggingArgs, LightEvalTasksArgs from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.logging.hierarchical_logger import htrack_block from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available from lighteval.utils.utils import EnvConfig @@ -61,23 +61,38 @@ def nanotron( if not is_nanotron_available(): raise ImportError(NO_NANOTRON_ERROR_MSG) + + # Create nanotron config + if not checkpoint_config_path.endswith(".yaml"): + raise ValueError("The checkpoint path should point to a YAML file") + + model_config = get_config_from_file( + checkpoint_config_path, + config_class=Config, + model_config_class=None, + skip_unused_config_keys=True, + skip_null_keys=True, + ) - with htrack_block("Load nanotron config"): - # Create nanotron config - if not checkpoint_config_path.endswith(".yaml"): - raise ValueError("The checkpoint path should point to a YAML file") - - model_config = get_config_from_file( - checkpoint_config_path, - config_class=Config, - model_config_class=None, - skip_unused_config_keys=True, - skip_null_keys=True, - ) - - # We are getting an type error, because the get_config_from_file is not correctly typed, + # Create or use default lighteval config + if lighteval_config_path is not None: lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore - nanotron_config = FullNanotronConfig(lighteval_config, model_config) + else: + # Create default config with minimal required parameters + default_logging = LightEvalLoggingArgs( + output_dir="./eval_results" + ) + default_tasks = LightEvalTasksArgs( + tasks="lighteval|agieval:aqua-rat|5|0" + ) + default_parallelism = ParallelismArgs(dp=1, pp=1, tp=1) + lighteval_config = LightEvalConfig( + logging=default_logging, + tasks=default_tasks, + parallelism=default_parallelism + ) + + nanotron_config = FullNanotronConfig(lighteval_config, model_config) evaluation_tracker = EvaluationTracker( output_dir=lighteval_config.logging.output_dir, diff --git a/src/lighteval/models/__init__.py b/src/lighteval/models/__init__.py new file mode 100644 index 000000000..064e2842d --- /dev/null +++ b/src/lighteval/models/__init__.py @@ -0,0 +1,21 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. \ No newline at end of file diff --git a/src/lighteval/models/nanotron/__init__.py b/src/lighteval/models/nanotron/__init__.py new file mode 100644 index 000000000..064e2842d --- /dev/null +++ b/src/lighteval/models/nanotron/__init__.py @@ -0,0 +1,21 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. \ No newline at end of file diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index 5d5bb934c..b785eeb0b 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -343,7 +343,14 @@ def tok_decode(self, tokens: torch.LongTensor) -> List[str]: return self.tokenizer.batch_decode(tokens, skip_special_tokens=True) def _model_call(self, inputs: torch.Tensor) -> torch.Tensor: - return self.model(inputs) + position_ids = ( + torch.arange( + inputs.shape[1], device=inputs.device, dtype=torch.int32 + ) + .unsqueeze(0) + .repeat(inputs.shape[0], 1) + ) + return self.model(inputs, position_ids) def homogeneize_ending_conditions(self, ending_condition: tuple | dict | list | str) -> tuple[list, int]: """Ending conditions are submitted in several possible formats. @@ -711,14 +718,14 @@ def _loglikelihood_single_token( inputs, padding_length=max_context, max_context=max_context, full_attention_masks=True ) # batched_inputs, batch_attention, input_lengths, truncated, padded - - out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) + position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) + out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs - # Gather all the output across TP - out = out.transpose(0, 1).contiguous() # [batch, seq_length, vocab] + # Gather all the output accross TP + out = out.view(*batch_model.input_ids.shape, -1).contiguous() # [batch, seq_length, vocab] gathered_out = [torch.zeros_like(out) for _ in range(self.parallel_context.tp_pg.size())] dist.all_gather(gathered_out, out, group=self.parallel_context.tp_pg, async_op=False) @@ -944,7 +951,8 @@ def _loglikelihood_tokens( ) # batched_inputs, batch_attention, input_lengths, truncated, padded with torch.no_grad(): - out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) + position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) + out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs @@ -954,7 +962,7 @@ def _loglikelihood_tokens( dist.all_gather(gathered_out, out, group=self.parallel_context.tp_pg, async_op=False) out = torch.cat(gathered_out, dim=-1) - out = out.transpose(0, 1) # [batch, seq_length, vocab] + out = out.view(*batch_model.input_ids.shape, -1) # [batch, seq_length, vocab] multi_logits = F.log_softmax(out, dim=-1) # [batch, padding_length, vocab] logits_sum = [] diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py new file mode 100644 index 000000000..4a1ed72c6 --- /dev/null +++ b/src/lighteval/models/nanotron_model.py @@ -0,0 +1,26 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Import and re-export the NanotronLightevalModel class from the nanotron module +from lighteval.models.nanotron.nanotron_model import NanotronLightevalModel + +__all__ = ["NanotronLightevalModel"] \ No newline at end of file diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 39e007b33..68ae71920 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -72,7 +72,7 @@ from nanotron.parallel.context import ParallelContext from nanotron.utils import local_ranks_zero_first - from lighteval.models.nanotron_model import NanotronLightevalModel + # from lighteval.models.nanotron import NanotronLightevalModel import logging @@ -187,15 +187,18 @@ def _init_model(self, model_config, model): logger.info("--- LOADING MODEL ---") if model_config is not None: if self.parallel_context: + from lighteval.models.nanotron_model import NanotronLightevalModel + return NanotronLightevalModel( checkpoint_path=os.path.dirname(self.pipeline_parameters.nanotron_checkpoint_path) if self.pipeline_parameters.nanotron_checkpoint_path else "", - nanotron_config=self.model_config, + nanotron_config=model_config, parallel_context=self.parallel_context, debug_one_layer_model=False, model_class=None, ) + # return None else: return load_model(config=model_config) if isinstance(model, TransformersModel): From aad905096ee7e9cf076fdc9c118bc3009bfe8bc0 Mon Sep 17 00:00:00 2001 From: nouamanetazi Date: Sat, 22 Mar 2025 11:31:06 +0000 Subject: [PATCH 03/13] . --- src/lighteval/main_nanotron.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 1b973a112..22755997a 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -42,7 +42,7 @@ def nanotron( checkpoint_config_path: Annotated[ str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.") ], - lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")] = None, + lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")], cache_dir: Annotated[str, Option(help="Cache directory for datasets and models.")] = CACHE_DIR, ): """ @@ -74,23 +74,8 @@ def nanotron( skip_null_keys=True, ) - # Create or use default lighteval config - if lighteval_config_path is not None: - lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore - else: - # Create default config with minimal required parameters - default_logging = LightEvalLoggingArgs( - output_dir="./eval_results" - ) - default_tasks = LightEvalTasksArgs( - tasks="lighteval|agieval:aqua-rat|5|0" - ) - default_parallelism = ParallelismArgs(dp=1, pp=1, tp=1) - lighteval_config = LightEvalConfig( - logging=default_logging, - tasks=default_tasks, - parallelism=default_parallelism - ) + # Load lighteval config + lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore nanotron_config = FullNanotronConfig(lighteval_config, model_config) From 7995fa63a92146d5796bf9cb73c5da41bddcb447 Mon Sep 17 00:00:00 2001 From: nouamanetazi Date: Sat, 22 Mar 2025 11:37:11 +0000 Subject: [PATCH 04/13] . --- src/lighteval/models/__init__.py | 21 ------------------ src/lighteval/models/nanotron/__init__.py | 21 ------------------ src/lighteval/models/nanotron_model.py | 26 ----------------------- src/lighteval/pipeline.py | 7 ++---- 4 files changed, 2 insertions(+), 73 deletions(-) delete mode 100644 src/lighteval/models/__init__.py delete mode 100644 src/lighteval/models/nanotron/__init__.py delete mode 100644 src/lighteval/models/nanotron_model.py diff --git a/src/lighteval/models/__init__.py b/src/lighteval/models/__init__.py deleted file mode 100644 index 064e2842d..000000000 --- a/src/lighteval/models/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# MIT License - -# Copyright (c) 2024 The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. \ No newline at end of file diff --git a/src/lighteval/models/nanotron/__init__.py b/src/lighteval/models/nanotron/__init__.py deleted file mode 100644 index 064e2842d..000000000 --- a/src/lighteval/models/nanotron/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# MIT License - -# Copyright (c) 2024 The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. \ No newline at end of file diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py deleted file mode 100644 index 4a1ed72c6..000000000 --- a/src/lighteval/models/nanotron_model.py +++ /dev/null @@ -1,26 +0,0 @@ -# MIT License - -# Copyright (c) 2024 The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# Import and re-export the NanotronLightevalModel class from the nanotron module -from lighteval.models.nanotron.nanotron_model import NanotronLightevalModel - -__all__ = ["NanotronLightevalModel"] \ No newline at end of file diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 68ae71920..bc0f5c819 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -72,7 +72,7 @@ from nanotron.parallel.context import ParallelContext from nanotron.utils import local_ranks_zero_first - # from lighteval.models.nanotron import NanotronLightevalModel + from lighteval.models.nanotron.nanotron_model import NanotronLightevalModel import logging @@ -186,9 +186,7 @@ def _init_parallelism_manager(self): def _init_model(self, model_config, model): logger.info("--- LOADING MODEL ---") if model_config is not None: - if self.parallel_context: - from lighteval.models.nanotron_model import NanotronLightevalModel - + if self.parallel_context: return NanotronLightevalModel( checkpoint_path=os.path.dirname(self.pipeline_parameters.nanotron_checkpoint_path) if self.pipeline_parameters.nanotron_checkpoint_path @@ -198,7 +196,6 @@ def _init_model(self, model_config, model): debug_one_layer_model=False, model_class=None, ) - # return None else: return load_model(config=model_config) if isinstance(model, TransformersModel): From eca97ebba0c6c91c4df33c399536547584496fae Mon Sep 17 00:00:00 2001 From: Jason Stillerman Date: Wed, 26 Mar 2025 00:53:13 +0000 Subject: [PATCH 05/13] allow extra keywords in LightevalTaskConfig --- src/lighteval/tasks/lighteval_task.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index da09ec000..2681e779d 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -107,6 +107,7 @@ class LightevalTaskConfig: few_shots_select: Optional[str] = None # Generation args + output_regex: Optional[str] = None generation_size: Optional[int] = None generation_grammar: Optional[TextGenerationInputGrammarType] = None stop_sequence: Optional[ListLike[str]] = None @@ -120,6 +121,7 @@ class LightevalTaskConfig: must_remove_duplicate_docs: bool = False version: int = 0 + frozen: bool = False def __post_init__(self): # If we got a Metrics enums instead of a Metric, we convert From 70f7f9ee81aba20a80d77ac20b82f2a618f3b75d Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Tue, 6 May 2025 23:59:40 +0800 Subject: [PATCH 06/13] removed EnvConfig for nanotron --- src/lighteval/main_nanotron.py | 14 +++--------- .../models/nanotron/nanotron_model.py | 22 +++++++------------ src/lighteval/pipeline.py | 2 +- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 345be1fe9..2bf951549 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -26,9 +26,6 @@ from typer import Option from typing_extensions import Annotated - -CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") - HELP_PANEL_NAME_1 = "Common Parameters" HELP_PANEL_NAME_2 = "Logging Parameters" HELP_PANEL_NAME_3 = "Debug Parameters" @@ -42,8 +39,7 @@ def nanotron( checkpoint_config_path: Annotated[ str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.") ], - lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")], - cache_dir: Annotated[str, Option(help="Cache directory for datasets and models.")] = CACHE_DIR, + lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")] ): """ Evaluate models using nanotron as backend. @@ -54,9 +50,6 @@ def nanotron( from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available - from lighteval.utils.utils import EnvConfig - - env_config = EnvConfig(token=os.getenv("HF_TOKEN"), cache_dir=cache_dir) if not is_nanotron_available(): raise ImportError(NO_NANOTRON_ERROR_MSG) @@ -75,7 +68,7 @@ def nanotron( # We are getting an type error, because the get_config_from_file is not correctly typed, lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore nanotron_config = FullNanotronConfig(lighteval_config, model_config) - + evaluation_tracker = EvaluationTracker( output_dir=lighteval_config.logging.output_dir, hub_results_org=lighteval_config.logging.results_org, @@ -89,12 +82,11 @@ def nanotron( pipeline_parameters = PipelineParameters( launcher_type=ParallelismManager.NANOTRON, - env_config=env_config, job_id=os.environ.get("SLURM_JOB_ID", 0), nanotron_checkpoint_path=checkpoint_config_path, dataset_loading_processes=lighteval_config.tasks.dataset_loading_processes, custom_tasks_directory=lighteval_config.tasks.custom_tasks, - override_batch_size=lighteval_config.batch_size, + # override_batch_size=lighteval_config.batch_size, num_fewshot_seeds=1, max_samples=lighteval_config.tasks.max_samples, use_chat_template=False, diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index 5d5bb934c..70d37122a 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -56,7 +56,7 @@ ) from lighteval.utils.imports import is_nanotron_available from lighteval.utils.parallelism import find_executable_batch_size -from lighteval.utils.utils import EnvConfig, as_list +from lighteval.utils.utils import as_list logger = logging.getLogger(__name__) @@ -101,7 +101,6 @@ def __init__( trust_remote_code: bool = False, debug_one_layer_model: bool = False, model_class: Optional[Type] = None, - env_config: EnvConfig = None, ): """Initializes a nanotron model for evaluation. Args: @@ -138,7 +137,6 @@ def __init__( self._add_special_tokens = add_special_tokens self._tokenizer = self._create_auto_tokenizer( pretrained=tokenizer.tokenizer_name_or_path, - env_config=env_config, trust_remote_code=trust_remote_code, ) self._tokenizer.model_max_length = self.max_length @@ -230,7 +228,6 @@ def _create_auto_tokenizer( *, pretrained: str, tokenizer: Optional[str] = None, - env_config: EnvConfig = None, trust_remote_code: bool = False, ) -> transformers.PreTrainedTokenizer: """Returns a pre-trained tokenizer from a pre-trained tokenizer configuration.""" @@ -238,15 +235,11 @@ def _create_auto_tokenizer( try: tokenizer = AutoTokenizer.from_pretrained( pretrained if tokenizer is None else tokenizer, - cache_dir=env_config.cache_dir, - token=env_config.token, trust_remote_code=trust_remote_code, ) except RecursionError: tokenizer = AutoTokenizer.from_pretrained( pretrained if tokenizer is None else tokenizer, - cache_dir=env_config.cache_dir, - token=env_config.token, unk_token="", trust_remote_code=trust_remote_code, ) @@ -711,14 +704,14 @@ def _loglikelihood_single_token( inputs, padding_length=max_context, max_context=max_context, full_attention_masks=True ) # batched_inputs, batch_attention, input_lengths, truncated, padded - - out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) + position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) + out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs - # Gather all the output across TP - out = out.transpose(0, 1).contiguous() # [batch, seq_length, vocab] + # Gather all the output accross TP + out = out.view(*batch_model.input_ids.shape, -1).contiguous() # [batch, seq_length, vocab] gathered_out = [torch.zeros_like(out) for _ in range(self.parallel_context.tp_pg.size())] dist.all_gather(gathered_out, out, group=self.parallel_context.tp_pg, async_op=False) @@ -944,7 +937,8 @@ def _loglikelihood_tokens( ) # batched_inputs, batch_attention, input_lengths, truncated, padded with torch.no_grad(): - out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) + position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) + out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs @@ -954,7 +948,7 @@ def _loglikelihood_tokens( dist.all_gather(gathered_out, out, group=self.parallel_context.tp_pg, async_op=False) out = torch.cat(gathered_out, dim=-1) - out = out.transpose(0, 1) # [batch, seq_length, vocab] + out = out.view(*batch_model.input_ids.shape, -1) # [batch, seq_length, vocab] multi_logits = F.log_softmax(out, dim=-1) # [batch, padding_length, vocab] logits_sum = [] diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 439286f10..f24021b99 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -155,7 +155,7 @@ def __init__( self.accelerator, self.parallel_context = self._init_parallelism_manager() self.model = self._init_model(model_config, model) - generation_parameters = model_config.generation_parameters.model_dump() if model_config else {} + generation_parameters = model_config.generation_parameters.model_dump() if model_config and hasattr(model_config, "generation_parameters") else {} self.evaluation_tracker.general_config_logger.log_model_info(generation_parameters, self.model.model_info) self._init_random_seeds() From c4c264c9480670110356727a111f4a350a6b3371 Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 7 May 2025 00:30:29 +0800 Subject: [PATCH 07/13] used asdict instead of model_dump --- src/lighteval/pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 222210ded..a55405f44 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -27,7 +27,7 @@ import re import shutil from contextlib import nullcontext -from dataclasses import dataclass +from dataclasses import asdict, dataclass from datetime import timedelta from enum import Enum, auto @@ -154,8 +154,7 @@ def __init__( self._metric_options = metric_options or {} self.accelerator, self.parallel_context = self._init_parallelism_manager() self.model = self._init_model(model_config, model) - - generation_parameters = model_config.generation_parameters.model_dump() if model_config and hasattr(model_config, "generation_parameters") else {} + generation_parameters = asdict(model_config.generation_parameters) if model_config and hasattr(model_config, "generation_parameters") else {} self.evaluation_tracker.general_config_logger.log_model_info(generation_parameters, self.model.model_info) self._init_random_seeds() From 4c7a1e7458776971fa38dc895d76e530cf7ab81f Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 7 May 2025 00:31:42 +0800 Subject: [PATCH 08/13] added input_mask for nanotron models' forward --- src/lighteval/models/nanotron/nanotron_model.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index d0b45c7da..dc285e353 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -336,14 +336,9 @@ def tok_decode(self, tokens: torch.LongTensor) -> List[str]: return self.tokenizer.batch_decode(tokens, skip_special_tokens=True) def _model_call(self, inputs: torch.Tensor) -> torch.Tensor: - position_ids = ( - torch.arange( - inputs.shape[1], device=inputs.device, dtype=torch.int32 - ) - .unsqueeze(0) - .repeat(inputs.shape[0], 1) - ) - return self.model(inputs, position_ids) + # This is only called for detecting the batch size so we just need a mock input_mask + input_mask = torch.ones_like(inputs) + return self.model(inputs, input_mask) def homogeneize_ending_conditions(self, ending_condition: tuple | dict | list | str) -> tuple[list, int]: """Ending conditions are submitted in several possible formats. @@ -711,8 +706,7 @@ def _loglikelihood_single_token( inputs, padding_length=max_context, max_context=max_context, full_attention_masks=True ) # batched_inputs, batch_attention, input_lengths, truncated, padded - position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) - out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) + out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs @@ -944,8 +938,7 @@ def _loglikelihood_tokens( ) # batched_inputs, batch_attention, input_lengths, truncated, padded with torch.no_grad(): - position_ids = torch.arange(batch_model.input_ids.shape[1], device=self.device, dtype=torch.int32).unsqueeze(0).repeat(batch_model.input_ids.shape[0], 1) - out = self.model(input_ids=batch_model.input_ids, position_ids=position_ids) + out = self.model(input_ids=batch_model.input_ids, input_mask=batch_model.input_mask) if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank: # This process got outputs From cb63773d1df756301650359c5b83003c44cbe3cd Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 7 May 2025 06:45:33 +0800 Subject: [PATCH 09/13] removed override_bs, use batch_size from lighteval_config --- .../models/nanotron/nanotron_model.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index dc285e353..c03c24793 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -114,6 +114,10 @@ def __init__( self._max_length = max_length self.parallel_config = parallel_config self.parallel_context = parallel_context + if hasattr(lighteval_config, "batch_size"): + self.batch_size = lighteval_config.batch_size + else: + self.batch_size = None if parallel_config.pp > 1: # To implement PP parallelism we need to think about how we want to sync the output for the PP ranks without outputs @@ -298,9 +302,9 @@ def max_length(self) -> int: def device(self) -> Union[int, str, torch.device]: return "cuda" - def _get_batch_size(self, max_input_length: int, override_bs: int = 0, starting_batch_size: int = 512) -> int: - if override_bs: - return override_bs + def _get_batch_size(self, max_input_length: int, starting_batch_size: int = 512) -> int: + if self.batch_size is not None: + return self.batch_size logger.warning("Detecting largest batch size") @find_executable_batch_size( @@ -395,7 +399,7 @@ def _check_continuations_start_space(self, continuation: str) -> str: return continuation def loglikelihood_single_token( - self, requests: List[Tuple[str, dict]], override_bs=0 + self, requests: List[Tuple[str, dict]], ) -> List[LoglikelihoodSingleTokenResponse]: """Tokenize the context and continuation and compute the log likelihood of those tokenized sequences. @@ -428,11 +432,10 @@ def loglikelihood_single_token( return self._loglikelihood_single_token( requests, - override_bs=override_bs, disable_tqdm=bool(dist.get_rank(self.parallel_context.world_pg) != 0), ) - def loglikelihood(self, requests: List[LoglikelihoodRequest], override_bs=None) -> List[LoglikelihoodResponse]: + def loglikelihood(self, requests: List[LoglikelihoodRequest]) -> List[LoglikelihoodResponse]: """Tokenize the context and continuation and compute the log likelihood of those tokenized sequences. """ @@ -450,12 +453,11 @@ def loglikelihood(self, requests: List[LoglikelihoodRequest], override_bs=None) return self._loglikelihood_tokens( requests, - override_bs=override_bs, disable_tqdm=bool(dist.get_rank(self.parallel_context.world_pg) != 0), ) def loglikelihood_rolling( - self, requests: List[LoglikelihoodRollingRequest], override_bs: int = 0 + self, requests: List[LoglikelihoodRollingRequest], ) -> List[LoglikelihoodResponse]: """This function is used to compute the log likelihood of the context for perplexity metrics.""" for request in tqdm( @@ -466,7 +468,6 @@ def loglikelihood_rolling( results = self._loglikelihood_tokens( requests, - override_bs=override_bs, disable_tqdm=bool(dist.get_rank(self.parallel_context.world_pg) != 0), return_bool_score=False, ) @@ -632,7 +633,7 @@ def _get_subsets(self, dataset, num_dataset_splits): @torch.inference_mode() def _loglikelihood_single_token( - self, requests, disable_tqdm: bool = False, override_bs: int = 0, num_dataset_splits: int = 1 + self, requests, disable_tqdm: bool = False, num_dataset_splits: int = 1 ) -> List[LoglikelihoodSingleTokenResponse]: dataset = LoglikelihoodSingleTokenDataset(requests=requests) res = [] @@ -660,7 +661,7 @@ def _loglikelihood_single_token( context_enc = dataset[0].tokenized_context max_context = len(context_enc[-self.max_length :]) batch_size = self._get_batch_size( - override_bs=override_bs, max_input_length=max_context, starting_batch_size=starting_batch_size + max_input_length=max_context, starting_batch_size=starting_batch_size ) starting_batch_size = batch_size * 2 # for the next round @@ -860,7 +861,6 @@ def _loglikelihood_tokens( self, requests, disable_tqdm: bool = False, - override_bs: int = -1, num_dataset_splits: int = 1, return_bool_score: bool = True, ) -> List[LoglikelihoodResponse]: @@ -892,7 +892,7 @@ def _loglikelihood_tokens( max_context = len((context_enc + continuation_enc)[-(self.max_length + 1) :][:-1]) batch_size = self._get_batch_size( - override_bs=override_bs, max_input_length=max_context, starting_batch_size=starting_batch_size + max_input_length=max_context, starting_batch_size=starting_batch_size ) starting_batch_size = batch_size * 2 # for the next round @@ -1094,7 +1094,6 @@ def greedy_until( self, requests: List[GreedyUntilRequest], disable_tqdm: bool = False, - override_bs: int = -1, num_dataset_splits: int = 1, ) -> List[GenerativeResponse]: """Greedy generation until a stop token is generated.""" @@ -1134,7 +1133,6 @@ def greedy_until( max_input_length = min(len(context_enc) + max_gen, self.max_length) batch_size = self._get_batch_size( - override_bs=override_bs, max_input_length=max_input_length, starting_batch_size=starting_batch_size, ) From b7feb729f2b04e14dc97faa573ab4481afd9599c Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Tue, 20 May 2025 11:03:03 +0800 Subject: [PATCH 10/13] fixed dataclass & pydantic dual compat in pipeline.py --- src/lighteval/config/lighteval_config.py | 2 +- src/lighteval/main_nanotron.py | 11 +++++++---- src/lighteval/models/nanotron/nanotron_model.py | 14 ++++++-------- src/lighteval/pipeline.py | 14 +++++++++++--- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/lighteval/config/lighteval_config.py b/src/lighteval/config/lighteval_config.py index 0e8217afe..100ab5431 100644 --- a/src/lighteval/config/lighteval_config.py +++ b/src/lighteval/config/lighteval_config.py @@ -101,7 +101,7 @@ class LightEvalConfig: class FullNanotronConfig: lighteval_config: LightEvalConfig nanotron_config: "Config" - + @property def generation_parameters(self): # Return the generation parameters from the lighteval config diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index d10483131..d263090db 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -26,6 +26,7 @@ from typer import Option from typing_extensions import Annotated + HELP_PANEL_NAME_1 = "Common Parameters" HELP_PANEL_NAME_2 = "Logging Parameters" HELP_PANEL_NAME_3 = "Debug Parameters" @@ -39,15 +40,17 @@ def nanotron( checkpoint_config_path: Annotated[ str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.") ], - lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")] + lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")], ): """ Evaluate models using nanotron as backend. """ from nanotron.config import Config, get_config_from_file - from nanotron.config.parallelism_config import ParallelismArgs - from lighteval.config.lighteval_config import FullNanotronConfig, LightEvalConfig, LightEvalLoggingArgs, LightEvalTasksArgs + from lighteval.config.lighteval_config import ( + FullNanotronConfig, + LightEvalConfig, + ) from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available @@ -76,7 +79,7 @@ def nanotron( # We are getting an type error, because the get_config_from_file is not correctly typed, lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore nanotron_config = FullNanotronConfig(lighteval_config, model_config) - + evaluation_tracker = EvaluationTracker( output_dir=lighteval_config.logging.output_dir, hub_results_org=lighteval_config.logging.results_org, diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index c03c24793..137caa8a9 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -399,7 +399,8 @@ def _check_continuations_start_space(self, continuation: str) -> str: return continuation def loglikelihood_single_token( - self, requests: List[Tuple[str, dict]], + self, + requests: List[Tuple[str, dict]], ) -> List[LoglikelihoodSingleTokenResponse]: """Tokenize the context and continuation and compute the log likelihood of those tokenized sequences. @@ -457,7 +458,8 @@ def loglikelihood(self, requests: List[LoglikelihoodRequest]) -> List[Loglikelih ) def loglikelihood_rolling( - self, requests: List[LoglikelihoodRollingRequest], + self, + requests: List[LoglikelihoodRollingRequest], ) -> List[LoglikelihoodResponse]: """This function is used to compute the log likelihood of the context for perplexity metrics.""" for request in tqdm( @@ -660,9 +662,7 @@ def _loglikelihood_single_token( # pull longest context sample from request context_enc = dataset[0].tokenized_context max_context = len(context_enc[-self.max_length :]) - batch_size = self._get_batch_size( - max_input_length=max_context, starting_batch_size=starting_batch_size - ) + batch_size = self._get_batch_size(max_input_length=max_context, starting_batch_size=starting_batch_size) starting_batch_size = batch_size * 2 # for the next round @@ -891,9 +891,7 @@ def _loglikelihood_tokens( max_context = len((context_enc + continuation_enc)[-(self.max_length + 1) :][:-1]) - batch_size = self._get_batch_size( - max_input_length=max_context, starting_batch_size=starting_batch_size - ) + batch_size = self._get_batch_size(max_input_length=max_context, starting_batch_size=starting_batch_size) starting_batch_size = batch_size * 2 # for the next round # For the DP replicas diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index a55405f44..503e2dd57 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -27,7 +27,7 @@ import re import shutil from contextlib import nullcontext -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, is_dataclass from datetime import timedelta from enum import Enum, auto @@ -154,7 +154,15 @@ def __init__( self._metric_options = metric_options or {} self.accelerator, self.parallel_context = self._init_parallelism_manager() self.model = self._init_model(model_config, model) - generation_parameters = asdict(model_config.generation_parameters) if model_config and hasattr(model_config, "generation_parameters") else {} + + if model_config and hasattr(model_config, "generation_parameters"): + generation_parameters = ( + asdict(model_config.generation_parameters) + if is_dataclass(model_config.generation_parameters) + else model_config.generation_parameters.model_dump() + ) + else: + generation_parameters = {} self.evaluation_tracker.general_config_logger.log_model_info(generation_parameters, self.model.model_info) self._init_random_seeds() @@ -185,7 +193,7 @@ def _init_parallelism_manager(self): def _init_model(self, model_config, model): logger.info("--- LOADING MODEL ---") if model_config is not None: - if self.parallel_context: + if self.parallel_context: return NanotronLightevalModel( checkpoint_path=os.path.dirname(self.pipeline_parameters.nanotron_checkpoint_path) if self.pipeline_parameters.nanotron_checkpoint_path From 309d2b2e78fe5015aea9bbb503f64411faa919f2 Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 21 May 2025 00:03:03 +0800 Subject: [PATCH 11/13] let nanotron return results --- src/lighteval/main_nanotron.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index d263090db..7eed1284b 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -68,13 +68,6 @@ def nanotron( skip_unused_config_keys=True, skip_null_keys=True, ) - model_config = get_config_from_file( - checkpoint_config_path, - config_class=Config, - model_config_class=None, - skip_unused_config_keys=True, - skip_null_keys=True, - ) # We are getting an type error, because the get_config_from_file is not correctly typed, lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore @@ -115,4 +108,8 @@ def nanotron( pipeline.show_results() + results = pipeline.get_results() + pipeline.save_and_push_results() + + return results From 4ca30193b87848ffc66800d5c128edf4c5366600 Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 21 May 2025 00:04:30 +0800 Subject: [PATCH 12/13] added nanotron deps, addd [nanotron] option to dev --- pyproject.toml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4b2c4c768..7e3a18e9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,9 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["src"] +[tool.uv] +no-build-isolation-package = ['flash-attn'] + [project] name = "lighteval" version = "0.9.1.dev0" @@ -88,14 +91,18 @@ optimum = ["optimum==1.12.0"] quantization = ["bitsandbytes>=0.41.0", "auto-gptq>=0.4.2"] adapters = ["peft==0.3.0"] nanotron = [ - "nanotron", - "tensorboardX" + "nanotron@git+https://github.com/huggingface/nanotron@v0.5", + "tensorboardX", + "ninja", + "triton", + "flash-attn>=2.5.0,<2.7.0", + "datatrove[io]" ] tensorboardX = ["tensorboardX"] vllm = ["vllm>=0.7.0", "ray", "more_itertools"] quality = ["ruff==v0.2.2","pre-commit"] tests = ["pytest==7.4.0","deepdiff"] -dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"] +dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm,nanotron]"] docs = ["hf-doc-builder", "watchdog"] extended_tasks = [ "langdetect", # ifeval From c2dd504d846fed71d733c1b1c5b0bff2a8089bfe Mon Sep 17 00:00:00 2001 From: "T. Duy Nguyen-Hien" Date: Wed, 21 May 2025 00:09:23 +0800 Subject: [PATCH 13/13] fixed styling --- ...hteval_config_override_nanotron_tests.yaml | 24 +++ src/lighteval/config/lighteval_config.py | 6 + .../models/nanotron/nanotron_model.py | 1 + ...molLM2-1.7B-Instruct-results-nanotron.json | 3 + tests/slow_tests/test_nanotron_model.py | 159 ++++++++++++++++++ 5 files changed, 193 insertions(+) create mode 100644 examples/lighteval_config_override_nanotron_tests.yaml create mode 100644 tests/reference_scores/SmolLM2-1.7B-Instruct-results-nanotron.json create mode 100644 tests/slow_tests/test_nanotron_model.py diff --git a/examples/lighteval_config_override_nanotron_tests.yaml b/examples/lighteval_config_override_nanotron_tests.yaml new file mode 100644 index 000000000..8fdd8227a --- /dev/null +++ b/examples/lighteval_config_override_nanotron_tests.yaml @@ -0,0 +1,24 @@ +# As of right now auto batch size doesn't work, so we use some default +batch_size: 8 +generation: null +logging: + output_dir: "tests/nanotron_logs" + save_details: false + push_to_hub: false + public_run: false + results_org: null + tensorboard_metric_prefix: "eval" +parallelism: + dp: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE +tasks: + dataset_loading_processes: 8 + max_samples: 10 + multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: leaderboard|arc:challenge|25|0,leaderboard|truthfulqa:mc|0|0,leaderboard|hellaswag|10|0,leaderboard|mmlu:college_chemistry|5|0,leaderboard|mmlu:us_foreign_policy|5|0,lighteval|agieval:aqua-rat|0|0,lighteval|agieval:logiqa-en|0|0,lighteval|agieval:lsat-ar|0|0,lighteval|agieval:lsat-lr|0|0,lighteval|agieval:lsat-rc|0|0,lighteval|agieval:sat-en-without-passage|0|0,lighteval|agieval:sat-en|0|0,lighteval|bigbench:causal_judgment|3|0,lighteval|bigbench:date_understanding|3|0,lighteval|bigbench:disambiguation_qa|3|0,lighteval|bigbench:geometric_shapes|3|0,lighteval|bigbench:logical_deduction_five_objects|3|0,lighteval|bigbench:logical_deduction_seven_objects|3|0,lighteval|bigbench:movie_recommendation|3|0,lighteval|bigbench:navigate|3|0,lighteval|bigbench:ruin_names|3|0,lighteval|bigbench:salient_translation_error_detection|3|0,lighteval|bigbench:snarks|3|0,lighteval|bigbench:temporal_sequences|3|0,lighteval|bigbench:tracking_shuffled_objects_five_objects|3|0,lighteval|bigbench:tracking_shuffled_objects_seven_objects|3|0,test|gsm8k|0|1 + custom_tasks: examples/custom_tasks_tests.py diff --git a/src/lighteval/config/lighteval_config.py b/src/lighteval/config/lighteval_config.py index 100ab5431..9e3fafd68 100644 --- a/src/lighteval/config/lighteval_config.py +++ b/src/lighteval/config/lighteval_config.py @@ -109,3 +109,9 @@ def generation_parameters(self): if self.lighteval_config.generation: return self.lighteval_config.generation return GenerationArgs() + + def __getattr__(self, name): + # Delegate attribute access to nanotron_config if not found in FullNanotronConfig + if hasattr(self.nanotron_config, name): + return getattr(self.nanotron_config, name) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index 137caa8a9..dda515311 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -1236,6 +1236,7 @@ def greedy_until( max_micro_batch_size=batch_size, # ok for PP=1 for PP>1 we'll need to split the batch returns_logits=returns_logits, generation_config=self.generation_config, + # tokenizer=self.tokenizer #NOTE[duynht]; This is needed for the current nanotron@main, but that is not compatible with HuggingfaceTB/SmolLM2-nanotron-ckpt ) dist.barrier() # Got everyone to send their stuff outputs = list(outputs) diff --git a/tests/reference_scores/SmolLM2-1.7B-Instruct-results-nanotron.json b/tests/reference_scores/SmolLM2-1.7B-Instruct-results-nanotron.json new file mode 100644 index 000000000..3546d526c --- /dev/null +++ b/tests/reference_scores/SmolLM2-1.7B-Instruct-results-nanotron.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdaf63946ad703af4cefaba86769b91b61847abbbc19fe48abfb68fcfb6e023e +size 50151 diff --git a/tests/slow_tests/test_nanotron_model.py b/tests/slow_tests/test_nanotron_model.py new file mode 100644 index 000000000..fe4e9d39a --- /dev/null +++ b/tests/slow_tests/test_nanotron_model.py @@ -0,0 +1,159 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import json +import os +from functools import lru_cache, partial +from typing import Callable, Tuple + +import nanotron.constants as nanotron_constants # Add this import +import pytest +import yaml +from deepdiff import DeepDiff +from huggingface_hub import snapshot_download +from packaging.version import Version + +from lighteval.main_nanotron import nanotron # noqa: E402 + + +# Set env var for deterministic run of models +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" + + +# Download the model checkpoint +@pytest.fixture(scope="session", autouse=True) +def download_model(): + snapshot_download( + repo_id="HuggingFaceTB/SmolLM2-nanotron-ckpt", + allow_patterns=["1700M/final/*"], + local_dir="./SmolLM2-nanotron-ckpt/", + ) + + +MODELS_ARGS = [ + # {"model_name": "gpt2", "use_chat_template": False, "revision": "main", "results_file": "tests/reference_scores/gpt2-results.json"}, + { + "model_name": "SmolLM2-nanotron-ckpt/1700M/final/config.yaml", + "lighteval_config_path": "examples/lighteval_config_override_nanotron_tests.yaml", + "results_file": "tests/reference_scores/SmolLM2-1.7B-Instruct-results-nanotron.json", + } +] +TASKS_PATH = "examples/test_tasks.txt" +CUSTOM_TASKS_PATH = "examples/custom_tasks_tests.py" + +ModelInput = Tuple[str, Callable[[], dict]] + + +# Set data_stages to null in config.yaml before running tests +def set_data_stages_to_null(config_path): + with open(config_path, "r") as f: + config = yaml.safe_load(f) + keys_to_keep = ["model", "tokenizer", "general", "parallelism"] + keys_to_delete = [key for key in config.keys() if key not in keys_to_keep] + for key in keys_to_delete: + del config[key] + if "parallelism" in config and config["parallelism"] is not None: + if "tp_recompute_allgather" in config["parallelism"]: + del config["parallelism"]["tp_recompute_allgather"] + if "recompute_layer" in config["parallelism"]: + del config["parallelism"]["recompute_layer"] + if "model" in config and config["model"] is not None: + if "model_config" in config["model"]: + if "rope_theta" in config["model"]["model_config"]: + del config["model"]["model_config"]["rope_theta"] + if "rope_interleaved" in config["model"]["model_config"]: + del config["model"]["model_config"]["rope_interleaved"] + # config["data_stages"] = None + # if "checkpoints" in config and config["checkpoints"] is not None: + # if "save_final_state" in config["checkpoints"]: + # del config["checkpoints"]["save_final_state"] + # if "optimizer" in config and config["optimizer"] is not None: + # if "optimizer_factory" in config["optimizer"]: + # del config["optimizer"]["optimizer_factory"] + with open(config_path, "w") as f: + yaml.safe_dump(config, f) + + +@lru_cache(maxsize=len(MODELS_ARGS)) +def run_model(checkpoint_config_path: str, lighteval_config_path: str): + """Runs the full main as a black box, using the input model and tasks, on 10 samples without parallelism""" + # Emulate torchrun launch + if "MASTER_ADDR" not in os.environ: + os.environ["MASTER_ADDR"] = "localhost" + if "MASTER_PORT" not in os.environ: + os.environ["MASTER_PORT"] = "60000" # Or any other free port + if "WORLD_SIZE" not in os.environ: + os.environ["WORLD_SIZE"] = "1" + if "RANK" not in os.environ: + os.environ["RANK"] = "0" + if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = "0" + + results = nanotron( + checkpoint_config_path=checkpoint_config_path, + lighteval_config_path=lighteval_config_path, + ) + return results + + +def generate_tests() -> list[ModelInput]: + """Generate test parameters for all models and tasks.""" + + tests = [] + for model_args in MODELS_ARGS: + predictions_lite = partial(run_model, model_args["model_name"], model_args["lighteval_config_path"]) + tests.append((model_args, predictions_lite)) + + return tests + + +# generates the model predictions parameters at test collection time +tests: list[ModelInput] = generate_tests() +ids = [f"{model_input[0]['model_name']}" for model_input in tests] + + +@pytest.mark.slow +@pytest.mark.parametrize("tests", tests, ids=ids) +def test_nanotron_model(tests: list[ModelInput], monkeypatch): # Add monkeypatch fixture + """Evaluates a model on a full task - is parametrized using pytest_generate_test""" + model_args, get_predictions = tests + + # Set data_stages to null in config.yaml before running tests + set_data_stages_to_null(model_args["model_name"]) + + # Monkeypatch CHECKPOINT_VERSION to bypass version check + monkeypatch.setattr(nanotron_constants, "CHECKPOINT_VERSION", Version("1.4")) + + predictions = get_predictions()["results"] + + # Load the reference results + with open(model_args["results_file"], "r") as f: + reference_results = json.load(f)["results"] + + # Change the key names, replace '|' with ':' + reference_results = {k.replace("|", ":"): v for k, v in reference_results.items()} + + # Convert defaultdict values to regular dict for comparison + predictions_dict = {k: dict(v) if hasattr(v, "default_factory") else v for k, v in predictions.items()} + + diff = DeepDiff(reference_results, predictions_dict, ignore_numeric_type_changes=True, math_epsilon=0.05) + + assert diff == {}, f"Differences found: {diff}"