Skip to content

Commit b0765be

Browse files
authored
Fix DeepSeek-OCR for Transformers v4 (#41460)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
1 parent 0a201b6 commit b0765be

1 file changed

Lines changed: 8 additions & 15 deletions

File tree

vllm/transformers_utils/configs/deepseek_vl2.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ class DeepseekVLV2TextConfig(DeepseekV2Config):
101101

102102
class DeepseekVLV2Config(PretrainedConfig):
103103
model_type = "deepseek_vl_v2"
104-
architectures: list[str] | None = None
105104

106105
tile_tag: str = "2D"
107106
global_view_pos: str = "head"
@@ -114,26 +113,20 @@ def __init__(
114113
candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),),
115114
**kwargs,
116115
):
117-
if "architectures" not in kwargs:
118-
kwargs["architectures"] = ["DeepseekVLV2ForCausalLM"]
116+
architectures = kwargs.setdefault("architectures", ["DeepseekVLV2ForCausalLM"])
119117

120-
vision_config = kwargs.pop("vision_config", {})
121-
self.vision_config = VisionEncoderConfig(**vision_config)
122-
123-
projector_config = kwargs.pop("projector_config", {})
124-
self.projector_config = MlpProjectorConfig(**projector_config)
125-
126-
language_config = kwargs.pop("language_config", {})
127-
self.text_config = DeepseekVLV2TextConfig(**language_config)
118+
self.vision_config = VisionEncoderConfig(**kwargs.pop("vision_config", {}))
119+
self.projector_config = MlpProjectorConfig(**kwargs.pop("projector_config", {}))
120+
self.text_config = DeepseekVLV2TextConfig(**kwargs.pop("language_config", {}))
128121

129122
self.tile_tag = tile_tag
130123
self.global_view_pos = global_view_pos
131124
self.candidate_resolutions = candidate_resolutions
132125
self.vocab_size = self.text_config.vocab_size
133126

134127
# update model_type for OCR models
135-
if "DeepseekOCRForCausalLM" in kwargs["architectures"]:
136-
self.model_type = "deepseek_ocr"
137-
elif "DeepseekOCR2ForCausalLM" in kwargs["architectures"]:
138-
self.model_type = "deepseek_ocr2"
128+
if "DeepseekOCRForCausalLM" in architectures:
129+
kwargs["model_type"] = "deepseek_ocr"
130+
elif "DeepseekOCR2ForCausalLM" in architectures:
131+
kwargs["model_type"] = "deepseek_ocr2"
139132
super().__init__(**kwargs)

0 commit comments

Comments
 (0)