Refactor some core stuff #36539

ArthurZucker · 2025-03-04T14:04:30Z

What does this PR do?

Update the base model prifx things, and make sure the config diff is aligned with transformers philo: explicit.

Before:

LlavaConfig {
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_attn_implementation_autoset": false,
    "_name_or_path": "",
    "add_cross_attention": false,
    "architectures": null,
    "attention_bias": false,
    "attention_dropout": 0.0,
    "bad_words_ids": null,
    "begin_suppress_tokens": null,
    "bos_token_id": 1,
    "chunk_size_feed_forward": 0,
    "cross_attention_hidden_size": null,
    "decoder_start_token_id": null,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": false,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": 2,
    "exponential_decay_length_penalty": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "head_dim": 128,
    "hidden_act": "silu",
    "hidden_size": 4096,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 11008,
    "is_decoder": false,
    "is_encoder_decoder": false,
    "label2id": {
      "LABEL_0": 0,
      "LABEL_1": 1
    },
    "length_penalty": 1.0,
    "max_length": 20,
    "max_position_embeddings": 2048,
    "min_length": 0,
    "mlp_bias": false,
    "model_type": "llama",
    "no_repeat_ngram_size": 0,
    "num_attention_heads": 32,
    "num_beam_groups": 1,
    "num_beams": 1,
    "num_hidden_layers": 32,
    "num_key_value_heads": 32,
    "num_return_sequences": 1,
    "output_attentions": false,
    "output_hidden_states": false,
    "output_scores": false,
    "pad_token_id": null,
    "prefix": null,
    "pretraining_tp": 1,
    "problem_type": null,
    "pruned_heads": {},
    "remove_invalid_values": false,
    "repetition_penalty": 1.0,
    "return_dict": true,
    "return_dict_in_generate": false,
    "rms_norm_eps": 1e-06,
    "rope_scaling": null,
    "rope_theta": 10000.0,
    "sep_token_id": null,
    "suppress_tokens": null,
    "task_specific_params": null,
    "temperature": 1.0,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": false,
    "tokenizer_class": null,
    "top_k": 50,
    "top_p": 1.0,
    "torch_dtype": null,
    "torchscript": false,
    "typical_p": 1.0,
    "use_bfloat16": false,
    "use_cache": true,
    "vocab_size": 32000
  },
  "transformers_version": "4.50.0.dev0",
  "vision_config": {
    "_attn_implementation_autoset": false,
    "_name_or_path": "",
    "add_cross_attention": false,
    "architectures": null,
    "attention_dropout": 0.0,
    "bad_words_ids": null,
    "begin_suppress_tokens": null,
    "bos_token_id": null,
    "chunk_size_feed_forward": 0,
    "cross_attention_hidden_size": null,
    "decoder_start_token_id": null,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": false,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": null,
    "exponential_decay_length_penalty": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "hidden_act": "quick_gelu",
    "hidden_size": 1024,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "is_decoder": false,
    "is_encoder_decoder": false,
    "label2id": {
      "LABEL_0": 0,
      "LABEL_1": 1
    },
    "layer_norm_eps": 1e-05,
    "length_penalty": 1.0,
    "max_length": 20,
    "min_length": 0,
    "model_type": "clip_vision_model",
    "no_repeat_ngram_size": 0,
    "num_attention_heads": 16,
    "num_beam_groups": 1,
    "num_beams": 1,
    "num_channels": 3,
    "num_hidden_layers": 24,
    "num_return_sequences": 1,
    "output_attentions": false,
    "output_hidden_states": false,
    "output_scores": false,
    "pad_token_id": null,
    "patch_size": 14,
    "prefix": null,
    "problem_type": null,
    "projection_dim": 768,
    "pruned_heads": {},
    "remove_invalid_values": false,
    "repetition_penalty": 1.0,
    "return_dict": true,
    "return_dict_in_generate": false,
    "sep_token_id": null,
    "suppress_tokens": null,
    "task_specific_params": null,
    "temperature": 1.0,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": true,
    "tokenizer_class": null,
    "top_k": 50,
    "top_p": 1.0,
    "torch_dtype": null,
    "torchscript": false,
    "typical_p": 1.0,
    "use_bfloat16": false,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default"
}

After

LlavaConfig {
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "attention_bias": false,
    "attention_dropout": 0.0,
    "head_dim": 128,
    "hidden_act": "silu",
    "hidden_size": 4096,
    "initializer_range": 0.02,
    "intermediate_size": 11008,
    "max_position_embeddings": 2048,
    "mlp_bias": false,
    "model_type": "llama",
    "num_attention_heads": 32,
    "num_hidden_layers": 32,
    "num_key_value_heads": 32,
    "pretraining_tp": 1,
    "rms_norm_eps": 1e-06,
    "rope_scaling": null,
    "rope_theta": 10000.0,
    "use_cache": true,
    "vocab_size": 32000
  },
  "transformers_version": "4.50.0.dev0",
  "vision_config": {
    "attention_dropout": 0.0,
    "hidden_act": "quick_gelu",
    "hidden_size": 1024,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 16,
    "num_channels": 3,
    "num_hidden_layers": 24,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default"
}

…e-core

HuggingFaceDocBuilderDev · 2025-03-06T11:13:34Z

The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.

LysandreJik

Nice, clean, and understandable! I appreciate the long explanatory docs even for shorter methods that are key

src/transformers/integrations/tensor_parallel.py

Cyrilvallez

Super nice, very happy to see all the cleanup and the new interface! 🤗
Just added a few nits

src/transformers/integrations/tensor_parallel.py

src/transformers/modeling_utils.py

src/transformers/integrations/tensor_parallel.py

Co-authored-by: Lysandre Debut <[email protected]>

…o update-core

ArthurZucker added 7 commits March 4, 2025 15:03

some config changes

674b78c

update

e787371

Merge branch 'main' of github.com:huggingface/transformers into updat…

5f40f60

…e-core

current state

9950a8b

Merge branch 'main' of github.com:huggingface/transformers into updat…

dd04b1e

…e-core

update

dcb13cc

update

5123975

ArthurZucker and others added 6 commits March 6, 2025 13:29

updates and cleanup

82d72e4

something that works

154c97b

fixup

b19b3d9

fixes

01052e0

nits

3a31092

Merge branch 'main' into update-core

9bf1efc

ArthurZucker marked this pull request as ready for review March 6, 2025 15:03

LysandreJik reviewed Mar 6, 2025

View reviewed changes

ArthurZucker added 2 commits March 6, 2025 18:30

nit

4fa7dcc

nits and fix

7b1bc3c

Cyrilvallez approved these changes Mar 7, 2025

View reviewed changes

src/transformers/integrations/tensor_parallel.py Show resolved Hide resolved

src/transformers/modeling_utils.py Outdated Show resolved Hide resolved

src/transformers/integrations/tensor_parallel.py Outdated Show resolved Hide resolved

ArthurZucker and others added 11 commits March 10, 2025 14:10

Update src/transformers/integrations/tensor_parallel.py

ce2e9e2

Co-authored-by: Lysandre Debut <[email protected]>

Update src/transformers/integrations/tensor_parallel.py

531ef15

Co-authored-by: Lysandre Debut <[email protected]>

cleanup

ce7528d

style

3137b8f

Merge branch 'update-core' of github.com:huggingface/transformers int…

95af148

…o update-core

safe import

90684f1

fix

e71d7e2

updates

11270d0

rename stuff an clean

ebfff6d

style

16d48d8

small updates

6f19123

ArthurZucker and others added 27 commits March 10, 2025 16:59

ups

ca3506a

oups

7aa8d61

nit

ac09587

protect imports

349c860

update tp

2ffbaa0

rodfl

bbc51f8

Merge branch 'main' into update-core

a4dbf02

arf

3bc5cbe

turbo nit on init

b94aed6

fix import error

cd66681

frumble gumbgle

4ae0a70

try to fix the import error

ad7e971

should fix the non model test

16f140f

update keep in float32

feda2b7

update

d40aadf

fix

3344c49

nits

f18219c

fix subvconfigs

387a4a8

test was weird

e723006

nit

cb3142a

fix failing test

442689a

fix instruct blip

9676701

fixes

e78e74f

style

6471670

x.com

cae586b

fix overwrite

25df30e

ok last bit of failing test

de31d97

ArthurZucker merged commit 1c4b62b into main Mar 11, 2025
22 of 24 checks passed

ArthurZucker deleted the update-core branch March 11, 2025 08:26

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Refactor some core stuff #36539

Refactor some core stuff #36539

Uh oh!

ArthurZucker commented Mar 4, 2025 •

edited

Loading

Uh oh!

HuggingFaceDocBuilderDev commented Mar 6, 2025

Uh oh!

LysandreJik left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Cyrilvallez left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Refactor some core stuff #36539

Refactor some core stuff #36539

Uh oh!

Conversation

ArthurZucker commented Mar 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

What does this PR do?

Before:

After

Uh oh!

HuggingFaceDocBuilderDev commented Mar 6, 2025

Uh oh!

LysandreJik left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Cyrilvallez left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

ArthurZucker commented Mar 4, 2025 •

edited

Loading