Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions src/transformers/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from collections import OrderedDict

import torch
from packaging import version
from torch import Tensor, nn

from .utils import logging
Expand All @@ -34,14 +33,6 @@ class PytorchGELUTanh(nn.Module):
match due to rounding errors.
"""

def __init__(self):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.12.0"):
raise ImportError(
f"You are using torch=={torch.__version__}, but torch>=1.12.0 is required to use "
"PytorchGELUTanh. Please upgrade torch."
)

def forward(self, input: Tensor) -> Tensor:
return nn.functional.gelu(input, approximate="tanh")

Expand Down Expand Up @@ -145,10 +136,7 @@ class MishActivation(nn.Module):

def __init__(self):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.9.0"):
self.act = self._mish_python
else:
self.act = nn.functional.mish
self.act = nn.functional.mish

def _mish_python(self, input: Tensor) -> Tensor:
return input * torch.tanh(nn.functional.softplus(input))
Expand Down
1 change: 0 additions & 1 deletion src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1496,7 +1496,6 @@ def create_extended_attention_mask_for_decoder(input_shape, attention_mask, devi
seq_ids = torch.arange(seq_length, device=device)
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask = causal_mask.to(attention_mask.dtype)

if causal_mask.shape[1] < attention_mask.shape[1]:
Expand Down
1 change: 0 additions & 1 deletion src/transformers/models/blip/modeling_blip_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,6 @@ def get_extended_attention_mask(
seq_ids = torch.arange(seq_length, device=device)
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask = causal_mask.to(attention_mask.dtype)

if causal_mask.shape[1] < attention_mask.shape[1]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@

from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version


require_version("tokenizers>=0.13.3")

if is_sentencepiece_available():
from .tokenization_code_llama import CodeLlamaTokenizer
else:
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/cohere/tokenization_cohere_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,8 @@
from ...tokenization_utils_base import BatchEncoding
from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import logging
from ...utils.versions import require_version


require_version("tokenizers>=0.13.3")

logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}

Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/gemma/tokenization_gemma_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@

from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version


require_version("tokenizers>=0.13.3")

if is_sentencepiece_available():
from .tokenization_gemma import GemmaTokenizer
else:
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/models/gpt_neo/modeling_gpt_neo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
add_start_docstrings,
add_start_docstrings_to_model_forward,
is_torch_flex_attn_available,
is_torch_fx_available,
logging,
)
from .configuration_gpt_neo import GPTNeoConfig
Expand All @@ -60,8 +59,7 @@

# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
# It means that the function will not be traced through and simply appear as a node in the graph.
if is_torch_fx_available():
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)


logger = logging.get_logger(__name__)
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/llama/tokenization_llama_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@

from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version


require_version("tokenizers>=0.13.3")

if is_sentencepiece_available():
from .tokenization_llama import LlamaTokenizer
else:
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/models/phimoe/modeling_phimoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
replace_return_docstrings,
)
from ...utils.deprecation import deprecate_kwarg
from ...utils.import_utils import is_torch_fx_available
from .configuration_phimoe import PhimoeConfig


Expand All @@ -51,8 +50,7 @@

# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
# It means that the function will not be traced through and simply appear as a node in the graph.
if is_torch_fx_available():
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)


logger = logging.get_logger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/vilt/modeling_vilt.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def visual_embed(self, pixel_values, pixel_mask, max_image_length=200):
select = torch.cat(select, dim=0)
x = x[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
x_mask = x_mask[select[:, 0], select[:, 1]].view(batch_size, -1)
# `patch_index` should be on the same device as `select` (for torch>=1.13), which is ensured at definition time.
# `patch_index` should be on the same device as `select`, which is ensured at definition time.
patch_index = patch_index[select[:, 0], select[:, 1]].view(batch_size, -1, 2)
pos_embed = pos_embed[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)

Expand Down
2 changes: 0 additions & 2 deletions src/transformers/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from .trainer_pt_utils import LayerWiseDummyOptimizer, LayerWiseDummyScheduler
from .trainer_utils import SchedulerType
from .utils import logging
from .utils.versions import require_version


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -701,7 +700,6 @@ def __init__(
relative_step=True,
warmup_init=False,
):
require_version("torch>=1.5.0") # add_ with alpha
if lr is not None and relative_step:
raise ValueError("Cannot combine manual `lr` and `relative_step=True` options")
if warmup_init and not relative_step:
Expand Down
9 changes: 0 additions & 9 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@
is_tokenizers_available,
is_torch_available,
is_torch_bf16_available_on_device,
is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available,
is_torch_deterministic,
is_torch_fp16_available_on_device,
Expand Down Expand Up @@ -1073,14 +1072,6 @@ def require_torch_bf16_gpu(test_case):
)(test_case)


def require_torch_bf16_cpu(test_case):
"""Decorator marking a test that requires torch>=1.10, using CPU."""
return unittest.skipUnless(
is_torch_bf16_cpu_available(),
"test requires torch>=1.10, using CPU",
)(test_case)


def require_deterministic_for_xpu(test_case):
if is_torch_xpu_available():
return unittest.skipUnless(is_torch_deterministic(), "test requires torch to use deterministic algorithms")(
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
is_sagemaker_dp_enabled,
is_sagemaker_mp_enabled,
is_schedulefree_available,
is_torch_compile_available,
is_torch_available,
is_torch_hpu_available,
is_torch_mlu_available,
is_torch_mps_available,
Expand Down Expand Up @@ -257,7 +257,7 @@

def _is_peft_model(model):
if is_peft_available():
classes_to_check = (PeftModel,) if is_peft_available() else ()
classes_to_check = (PeftModel,)
# Here we also check if the model is an instance of `PeftMixedModel` introduced in peft>=0.7.0: https://github.com/huggingface/transformers/pull/28321
if version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0"):
from peft import PeftMixedModel
Expand Down Expand Up @@ -798,7 +798,7 @@ def __init__(
self._memory_tracker.stop_and_update_metrics()

# torch.compile
if args.torch_compile and not is_torch_compile_available():
if args.torch_compile and not is_torch_available():
raise RuntimeError("Using torch.compile requires PyTorch 2.0 or higher.")

self.is_fsdp_xla_v2_enabled = args.fsdp_config.get("xla_fsdp_v2", False)
Expand Down Expand Up @@ -1987,7 +1987,7 @@ def _wrap_model(self, model, training=True, dataloader=None):
if self.accelerator.unwrap_model(model) is not model:
return model

# Mixed precision training with apex (torch < 1.6)
# Mixed precision training with apex
if self.use_apex and training:
model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)

Expand Down Expand Up @@ -3740,7 +3740,7 @@ def training_step(
torch.musa.empty_cache()
elif is_torch_npu_available():
torch.npu.empty_cache()
elif is_torch_mps_available(min_version="2.0"):
elif is_torch_mps_available():
torch.mps.empty_cache()
elif is_torch_hpu_available():
logger.warning(
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
is_sagemaker_dp_enabled,
is_sagemaker_mp_enabled,
is_torch_available,
is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available,
is_torch_hpu_available,
is_torch_mlu_available,
Expand Down Expand Up @@ -1165,7 +1164,6 @@ class TrainingArguments:
"help": (
"Number of batches loaded in advance by each worker. "
"2 means there will be a total of 2 * num_workers batches prefetched across all workers. "
"Default is 2 for PyTorch < 2.0.0 and otherwise None."
)
},
)
Expand Down Expand Up @@ -1697,7 +1695,7 @@ def __post_init__(self):
self.half_precision_backend = self.fp16_backend

if self.bf16 or self.bf16_full_eval:
if self.use_cpu and not is_torch_bf16_cpu_available() and not is_torch_xla_available():
if self.use_cpu and not is_torch_available() and not is_torch_xla_available():
# cpu
raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10")
elif not self.use_cpu:
Expand Down
9 changes: 0 additions & 9 deletions src/transformers/utils/fx.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,7 @@
)
from .import_utils import (
ENV_VARS_TRUE_VALUES,
TORCH_FX_REQUIRED_VERSION,
get_torch_version,
is_peft_available,
is_torch_fx_available,
)


Expand Down Expand Up @@ -891,12 +888,6 @@ class HFTracer(Tracer):
def __init__(self, autowrap_modules=(math,), autowrap_functions=()):
super().__init__(autowrap_modules=autowrap_modules, autowrap_functions=autowrap_functions)

if not is_torch_fx_available():
raise ImportError(
f"Found an incompatible version of torch. Found version {get_torch_version()}, but only version "
f"{TORCH_FX_REQUIRED_VERSION} is supported."
)

def _generate_dummy_input(
self, model: "PreTrainedModel", input_name: str, shape: list[int], input_names: list[str]
) -> dict[str, torch.Tensor]:
Expand Down
51 changes: 10 additions & 41 deletions src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[
_torch_available = False
if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
_torch_available, _torch_version = _is_package_available("torch", return_version=True)
if _torch_available:
_torch_available = version.parse(_torch_version) >= version.parse("2.1.0")
if not _torch_available:
logger.warning(f"Disabling PyTorch because PyTorch >= 2.1 is required but found {_torch_version}")
else:
logger.info("Disabling PyTorch because USE_TF is set")
_torch_available = False
Expand Down Expand Up @@ -309,15 +313,6 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[
_jax_version = _flax_version = "N/A"


_torch_fx_available = False
if _torch_available:
torch_version = version.parse(_torch_version)
_torch_fx_available = (torch_version.major, torch_version.minor) >= (
TORCH_FX_REQUIRED_VERSION.major,
TORCH_FX_REQUIRED_VERSION.minor,
)


_torch_xla_available = False
if USE_TORCH_XLA in ENV_VARS_TRUE_VALUES:
_torch_xla_available, _torch_xla_version = _is_package_available("torch_xla", return_version=True)
Expand Down Expand Up @@ -521,19 +516,8 @@ def is_torch_bf16_gpu_available():
return torch.cuda.is_available() and torch.cuda.is_bf16_supported()


def is_torch_bf16_cpu_available():
if not is_torch_available():
return False

import torch

try:
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
_ = torch.cpu.amp.autocast
except AttributeError:
return False

return True
def is_torch_bf16_cpu_available() -> bool:
return is_torch_available()


def is_torch_bf16_available():
Expand Down Expand Up @@ -609,20 +593,15 @@ def is_torch_tf32_available():

import torch

if not torch.cuda.is_available() or torch.version.cuda is None:
if not torch.cuda.is_available():
return False
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
return False
if int(torch.version.cuda.split(".")[0]) < 11:
return False
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.7"):
return False

return True


def is_torch_fx_available():
return _torch_fx_available
return is_torch_available()


def is_peft_available():
Expand Down Expand Up @@ -827,21 +806,11 @@ def is_habana_gaudi1():


def is_torchdynamo_available():
if not is_torch_available():
return False

return True
return is_torch_available()


def is_torch_compile_available():
if not is_torch_available():
return False

import torch

# We don't do any version check here to support nighlies marked as 1.14. Ultimately needs to check version against
# 2.0 but let's do it later.
return hasattr(torch, "compile")
return is_torch_available()


def is_torchdynamo_compiling():
Expand Down
1 change: 0 additions & 1 deletion tests/fsdp/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ def test_fsdp_cpu_offloading(self):

@require_torch_multi_accelerator
@slow
@require_fsdp
@require_fsdp_v2_version
@require_accelerate_fsdp2
def test_accelerate_fsdp2_integration(self):
Expand Down
1 change: 0 additions & 1 deletion tests/models/bert/test_modeling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,6 @@ def test_model_as_decoder(self):
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
config,
input_ids,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ def test_decoder_model_past_with_large_inputs(self):
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)

def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
config,
input_ids,
Expand Down
Loading