diff --git a/flair/embeddings/transformer.py b/flair/embeddings/transformer.py index b8c6637db3..19972a4100 100644 --- a/flair/embeddings/transformer.py +++ b/flair/embeddings/transformer.py @@ -17,13 +17,13 @@ from transformers import ( CONFIG_MAPPING, AutoConfig, - AutoFeatureExtractor, + AutoImageProcessor, AutoModel, AutoTokenizer, - FeatureExtractionMixin, + ImageProcessingMixin, LayoutLMTokenizer, LayoutLMTokenizerFast, - LayoutLMv2FeatureExtractor, + LayoutLMv2ImageProcessor, PretrainedConfig, PreTrainedTokenizer, T5Config, @@ -351,7 +351,7 @@ def __init__( is_token_embedding: bool = False, force_device: Optional[torch.device] = None, force_max_length: bool = False, - feature_extractor: Optional[FeatureExtractionMixin] = None, + feature_extractor: Optional[ImageProcessingMixin] = None, needs_manual_ocr: Optional[bool] = None, use_context_separator: bool = True, ) -> None: @@ -453,13 +453,13 @@ def _tokenizer_from_bytes(cls, zip_data: BytesIO) -> PreTrainedTokenizer: return AutoTokenizer.from_pretrained(temp_dir) @classmethod - def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[FeatureExtractionMixin]: + def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[ImageProcessingMixin]: if zip_data is None: return None zip_obj = zipfile.ZipFile(zip_data) with tempfile.TemporaryDirectory() as temp_dir: zip_obj.extractall(temp_dir) - return AutoFeatureExtractor.from_pretrained(temp_dir, apply_ocr=False) + return AutoImageProcessor.from_pretrained(temp_dir, apply_ocr=False) def __tokenizer_bytes(self): with tempfile.TemporaryDirectory() as temp_dir: @@ -682,7 +682,7 @@ def __build_transformer_model_inputs( batched_image_encodings = [image_encodings[i] for i in cpu_overflow_to_sample_mapping] image_encodings = torch.stack(batched_image_encodings) image_encodings = image_encodings.to(flair.device) - if isinstance(self.feature_extractor, LayoutLMv2FeatureExtractor): + if isinstance(self.feature_extractor, LayoutLMv2ImageProcessor): model_kwargs["image"] = image_encodings else: model_kwargs["pixel_values"] = image_encodings @@ -1099,7 +1099,7 @@ def __init__( logging.set_verbosity_error() self.tokenizer: PreTrainedTokenizer - self.feature_extractor: Optional[FeatureExtractionMixin] + self.feature_extractor: Optional[ImageProcessingMixin] if tokenizer_data is None: # load tokenizer and transformer model @@ -1107,7 +1107,7 @@ def __init__( model, add_prefix_space=True, **transformers_tokenizer_kwargs, **kwargs ) try: - self.feature_extractor = AutoFeatureExtractor.from_pretrained(model, apply_ocr=False, **kwargs) + self.feature_extractor = AutoImageProcessor.from_pretrained(model, apply_ocr=False, **kwargs) except OSError: self.feature_extractor = None else: diff --git a/pyproject.toml b/pyproject.toml index 9711794abb..f3bc4be6d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,10 +22,8 @@ filterwarnings = [ 'ignore:Please use `triu` from the `scipy.linalg` namespace, the `scipy.linalg.special_matrices` namespace is deprecated.', # ignore gensim using deprecated scipy 'ignore:bilinear is deprecated and will be removed in Pillow 10', # huggingface layoutlmv2 has deprecated calls. 'ignore:nearest is deprecated and will be removed in Pillow 10', # huggingface layoutlmv2 has deprecated calls. - 'ignore:The `device` argument is deprecated and will be removed in v5 of Transformers.', # hf layoutlmv3 calls deprecated hf. "ignore:the imp module is deprecated:DeprecationWarning:past", # ignore DeprecationWarning from hyperopt dependency "ignore:.*imp module.*:DeprecationWarning", # ignore DeprecationWarnings that involve imp module - "ignore:The class LayoutLMv3FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv3ImageProcessor instead.", # huggingface layoutlmv3 has deprecated calls. "ignore:pkg_resources", # huggingface has deprecated calls. 'ignore:Deprecated call to `pkg_resources', # huggingface has deprecated calls. 'ignore:distutils Version classes are deprecated.', # faiss uses deprecated distutils. diff --git a/requirements-dev.txt b/requirements-dev.txt index 8053d231b8..1dc78c1f7e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,4 +12,4 @@ types-Deprecated>=1.2.9.2 types-requests>=2.28.11.17 types-tabulate>=0.9.0.2 pyab3p -transformers!=4.40.1,!=4.40.0 +transformers>=5.0.0,<6.0.0 diff --git a/requirements.txt b/requirements.txt index 39cf750c66..6d8aa774ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,6 @@ tabulate>=0.8.10 torch>=1.13.1 tqdm>=4.63.0 transformer-smaller-training-vocab>=0.2.3 -transformers[sentencepiece]>=4.25.0,<5.0.0 +transformers[sentencepiece]>=4.25.0,<6.0.0 wikipedia-api>=0.5.7 bioc<3.0.0,>=2.0.0