Skip to content
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
a093dcc
compatibility with transformers 4.52
echarlaix May 15, 2025
da4df29
Merge branch 'main' into transformers4.52
echarlaix May 16, 2025
0f44c76
fix
echarlaix May 19, 2025
45705b8
add back default
echarlaix May 19, 2025
2f0a409
update setup
echarlaix May 21, 2025
ba78808
fix
echarlaix May 21, 2025
3e4c799
Merge branch 'main' into transformers4.52
echarlaix May 21, 2025
4edafac
fix perceiver
echarlaix May 21, 2025
8ff8ec7
fix perceiver test
echarlaix May 21, 2025
e5f1fb7
fix style
echarlaix May 21, 2025
046d0f1
add min max tokens pipeline test
echarlaix May 22, 2025
af9dc87
fix
echarlaix May 22, 2025
fc5e4cc
comment
echarlaix May 22, 2025
22023d2
upgrade runner
echarlaix May 22, 2025
0d5a098
increase batch size for test
echarlaix May 22, 2025
83b551b
fix test
echarlaix May 22, 2025
42cd6d0
style
echarlaix May 22, 2025
c33578d
update model
echarlaix May 22, 2025
e35b1e3
fix
echarlaix May 22, 2025
e7dceb5
add model
echarlaix May 23, 2025
2f57fcc
add fix
echarlaix May 23, 2025
a63dbe7
to rm
echarlaix May 23, 2025
565f3c6
revert
echarlaix May 23, 2025
65541a6
style
echarlaix May 23, 2025
d089a7b
run test in serie
echarlaix May 23, 2025
b14aa7c
fix tests
echarlaix May 23, 2025
0819a99
fix
echarlaix May 23, 2025
e418ae7
tmp
echarlaix May 26, 2025
dddf3d3
update model
echarlaix May 26, 2025
3d2068f
trigger test
echarlaix May 26, 2025
31e9b67
revert
echarlaix May 26, 2025
45292a5
fix
echarlaix May 26, 2025
b550b0c
fix default model id for pipelines
echarlaix May 26, 2025
fb2eabf
style
echarlaix May 26, 2025
36dd759
fix
echarlaix May 26, 2025
c8917bf
fix
echarlaix May 26, 2025
04b2898
style
echarlaix May 26, 2025
350c8f9
fix loading seq2seq models
echarlaix May 27, 2025
aa4623e
add test
echarlaix May 27, 2025
df8f7bb
style
echarlaix May 27, 2025
28bba3c
apply comments
echarlaix May 27, 2025
a4e5d34
remove unused
echarlaix May 27, 2025
463fb81
rename
echarlaix May 27, 2025
834d2b2
style
echarlaix May 27, 2025
4738765
add more tests
echarlaix May 27, 2025
88518df
add test
echarlaix May 27, 2025
24147f5
fix
echarlaix May 27, 2025
b9e5065
style
echarlaix May 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
fail-fast: false
matrix:
python-version: [3.9]
runs-on: [ubuntu-22.04, windows-2019, macos-13]
runs-on: [ubuntu-22.04, windows-2019, macos-14]

runs-on: ${{ matrix.runs-on }}

Expand Down
90 changes: 68 additions & 22 deletions optimum/pipelines/pipelines_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
from transformers import (
AudioClassificationPipeline,
AutoConfig,
AutoFeatureExtractor,
AutoImageProcessor,
AutomaticSpeechRecognitionPipeline,
AutoTokenizer,
FeatureExtractionPipeline,
FillMaskPipeline,
ImageClassificationPipeline,
Expand All @@ -41,9 +44,16 @@
)
from transformers import pipeline as transformers_pipeline
from transformers.feature_extraction_utils import PreTrainedFeatureExtractor
from transformers.onnx.utils import get_preprocessor
from transformers.image_processing_utils import BaseImageProcessor
from transformers.pipelines import (
FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
TOKENIZER_MAPPING,
check_task,
get_default_model_and_revision,
infer_framework_load_model,
)
from transformers.pipelines import SUPPORTED_TASKS as TRANSFORMERS_SUPPORTED_TASKS
from transformers.pipelines import infer_framework_load_model

from ..utils import is_onnxruntime_available, is_transformers_version

Expand Down Expand Up @@ -87,7 +97,7 @@
},
"image-segmentation": {
"impl": ImageSegmentationPipeline,
"class": (ORTModelForSemanticSegmentation,) if is_onnxruntime_available() else (),
"class": (ORTModelForSemanticSegmentation,),
"default": "nvidia/segformer-b0-finetuned-ade-512-512",
"type": "image",
},
Expand Down Expand Up @@ -175,6 +185,8 @@ def load_bettertransformer(
tokenizer=None,
feature_extractor=None,
load_feature_extractor=None,
image_processor=None,
load_image_processor=None,
SUPPORTED_TASKS=None,
subfolder: str = "",
token: Optional[Union[bool, str]] = None,
Expand All @@ -194,9 +206,7 @@ def load_bettertransformer(
else:
model_kwargs = {}

if model is None:
model_id = SUPPORTED_TASKS[targeted_task]["default"]
elif isinstance(model, str):
if isinstance(model, str):
model_id = model
else:
model_id = None
Expand All @@ -219,7 +229,7 @@ def load_bettertransformer(

model = BetterTransformer.transform(model, **kwargs)

return model, model_id, tokenizer, feature_extractor
return model, model_id, tokenizer, feature_extractor, image_processor


def load_ort_pipeline(
Expand All @@ -229,6 +239,8 @@ def load_ort_pipeline(
tokenizer,
feature_extractor,
load_feature_extractor,
image_processor,
load_image_processor,
SUPPORTED_TASKS,
subfolder: str = "",
token: Optional[Union[bool, str]] = None,
Expand All @@ -240,10 +252,7 @@ def load_ort_pipeline(
if model_kwargs is None:
model_kwargs = {}

if model is None:
model_id = SUPPORTED_TASKS[targeted_task]["default"]
model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(model_id, export=True)
elif isinstance(model, str):
if isinstance(model, str):
model_id = model
model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(
model, revision=revision, subfolder=subfolder, token=token, **model_kwargs
Expand All @@ -268,13 +277,24 @@ def load_ort_pipeline(
"Could not automatically find a feature extractor for the ORTModel, you must pass a "
"feature_extractor explictly"
)
if image_processor is None and load_image_processor:
for preprocessor in model.preprocessors:
if isinstance(preprocessor, BaseImageProcessor):
image_processor = preprocessor
break
if image_processor is None:
raise ValueError(
"Could not automatically find a feature extractor for the ORTModel, you must pass a "
"image_processor explictly"
)

model_id = None
else:
raise ValueError(
f"""Model {model} is not supported. Please provide a valid model either as string or ORTModel.
You can also provide non model then a default one will be used"""
)
return model, model_id, tokenizer, feature_extractor
return model, model_id, tokenizer, feature_extractor, image_processor


MAPPING_LOADING_FUNC = {
Expand All @@ -288,6 +308,7 @@ def pipeline(
model: Optional[Any] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
use_fast: bool = True,
token: Optional[Union[str, bool]] = None,
accelerator: Optional[str] = "ort",
Expand All @@ -309,7 +330,16 @@ def pipeline(
f'Accelerator {accelerator} is not supported. Supported accelerators are "ort" and "bettertransformer".'
)

# copied from transformers.pipelines.__init__.py
supported_tasks = ORT_SUPPORTED_TASKS if accelerator == "ort" else TRANSFORMERS_SUPPORTED_TASKS

if model is None:
if accelerator != "ort":
_, target_task, task_options = check_task(task)
model, default_revision = get_default_model_and_revision(target_task, "pt", task_options)
revision = revision or default_revision
else:
model = supported_tasks[targeted_task]["default"]

hub_kwargs = {
"revision": revision,
"token": token,
Expand All @@ -322,42 +352,52 @@ def pipeline(
config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **kwargs)
hub_kwargs["_commit_hash"] = config._commit_hash

supported_tasks = ORT_SUPPORTED_TASKS if accelerator == "ort" else TRANSFORMERS_SUPPORTED_TASKS

no_feature_extractor_tasks = set()
no_tokenizer_tasks = set()
no_image_processor_tasks = set()
for _task, values in supported_tasks.items():
if values["type"] == "text":
no_feature_extractor_tasks.add(_task)
no_image_processor_tasks.add(_task)
elif values["type"] in {"image", "video"}:
no_tokenizer_tasks.add(_task)
elif values["type"] in {"audio"}:
no_tokenizer_tasks.add(_task)
no_image_processor_tasks.add(_task)
elif values["type"] not in ["multimodal", "audio", "video"]:
raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}")

model_config = config or model.config
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None

# copied from transformers.pipelines.__init__.py l.609
if targeted_task in no_tokenizer_tasks:
# These will never require a tokenizer.
# the model on the other hand might have a tokenizer, but
# the files could be missing from the hub, instead of failing
# on such repos, we just force to not load it.
load_tokenizer = False
else:
load_tokenizer = True

if targeted_task in no_feature_extractor_tasks:
load_feature_extractor = False
else:
load_feature_extractor = True

model, model_id, tokenizer, feature_extractor = MAPPING_LOADING_FUNC[accelerator](
if targeted_task in no_image_processor_tasks:
load_image_processor = False

if load_image_processor and load_feature_extractor:
load_feature_extractor = False

model, model_id, tokenizer, feature_extractor, image_processor = MAPPING_LOADING_FUNC[accelerator](
model,
targeted_task,
load_tokenizer,
tokenizer,
feature_extractor,
load_feature_extractor,
image_processor,
load_image_processor,
SUPPORTED_TASKS=supported_tasks,
config=config,
hub_kwargs=hub_kwargs,
Expand All @@ -366,16 +406,22 @@ def pipeline(
**kwargs,
)

use_fast = kwargs.get(use_fast, "True")
if tokenizer is None and load_tokenizer:
tokenizer = get_preprocessor(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=use_fast, _from_pipeline=task, **hub_kwargs)
Comment thread
echarlaix marked this conversation as resolved.
Outdated
if feature_extractor is None and load_feature_extractor:
feature_extractor = get_preprocessor(model_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(
model_id, use_fast=use_fast, _from_pipeline=task, **hub_kwargs
)
if image_processor is None and load_image_processor:
image_processor = AutoImageProcessor.from_pretrained(model_id, _from_pipeline=task, **hub_kwargs)

return transformers_pipeline(
task,
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
use_fast=use_fast,
**kwargs,
)
9 changes: 3 additions & 6 deletions optimum/utils/input_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import numpy as np

from ..utils import is_diffusers_version, is_tf_available, is_torch_available, is_transformers_version
from ..utils.save_utils import maybe_load_preprocessors
from .normalized_config import (
NormalizedConfig,
NormalizedEncoderDecoderConfig,
Expand Down Expand Up @@ -1614,10 +1615,8 @@ def __init__(
height=height,
**kwargs,
)
preprocessor = maybe_load_preprocessors(normalized_config._name_or_path)[-1]

from transformers.onnx.utils import get_preprocessor

preprocessor = get_preprocessor(normalized_config._name_or_path)
if preprocessor is not None and hasattr(preprocessor, "crop_size"):
self.height = preprocessor.crop_size.get("height", self.height)
self.width = preprocessor.crop_size.get("width", self.width)
Expand All @@ -1644,9 +1643,7 @@ def __init__(
**kwargs,
)

from transformers.onnx.utils import get_preprocessor

preprocessor = get_preprocessor(normalized_config._name_or_path)
preprocessor = maybe_load_preprocessors(normalized_config._name_or_path)[-1]
if preprocessor is not None and hasattr(preprocessor, "size"):
self.height = preprocessor.size.get("height", self.height)
self.width = preprocessor.size.get("width", self.width)
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,37 +51,37 @@
"datasets>=1.2.1",
"protobuf>=3.20.1",
"onnxruntime>=1.11.0",
"transformers>=4.36,<4.52.0",
"transformers>=4.36,<4.53.0",
],
"onnxruntime-gpu": [
"onnx",
"datasets>=1.2.1",
"protobuf>=3.20.1",
"onnxruntime-gpu>=1.11.0",
"transformers>=4.36,<4.52.0",
"transformers>=4.36,<4.53.0",
],
"onnxruntime-training": [
"evaluate",
"torch-ort",
"accelerate",
"datasets>=1.2.1",
"protobuf>=3.20.1",
"transformers>=4.36,<4.52.0",
"transformers>=4.36,<4.53.0",
"onnxruntime-training>=1.11.0",
],
"exporters": [
"onnx",
"timm",
"onnxruntime",
"protobuf>=3.20.1",
"transformers>=4.36,<4.52.0",
"transformers>=4.36,<4.53.0",
],
"exporters-gpu": [
"onnx",
"timm",
"onnxruntime-gpu",
"protobuf>=3.20.1",
"transformers>=4.36,<4.52.0",
"transformers>=4.36,<4.53.0",
],
"exporters-tf": [
"onnx",
Expand Down
Loading
Loading