flairNLP · denniszag · Apr 8, 2026 · Apr 8, 2026
diff --git a/flair/embeddings/transformer.py b/flair/embeddings/transformer.py
@@ -17,13 +17,13 @@
 from transformers import (
     CONFIG_MAPPING,
     AutoConfig,
-    AutoFeatureExtractor,
+    AutoImageProcessor,
     AutoModel,
     AutoTokenizer,
-    FeatureExtractionMixin,
+    ImageProcessingMixin,
     LayoutLMTokenizer,
     LayoutLMTokenizerFast,
-    LayoutLMv2FeatureExtractor,
+    LayoutLMv2ImageProcessor,
     PretrainedConfig,
     PreTrainedTokenizer,
     T5Config,
@@ -351,7 +351,7 @@ def __init__(
         is_token_embedding: bool = False,
         force_device: Optional[torch.device] = None,
         force_max_length: bool = False,
-        feature_extractor: Optional[FeatureExtractionMixin] = None,
+        feature_extractor: Optional[ImageProcessingMixin] = None,
         needs_manual_ocr: Optional[bool] = None,
         use_context_separator: bool = True,
     ) -> None:
@@ -453,13 +453,13 @@ def _tokenizer_from_bytes(cls, zip_data: BytesIO) -> PreTrainedTokenizer:
             return AutoTokenizer.from_pretrained(temp_dir)
 
     @classmethod
-    def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[FeatureExtractionMixin]:
+    def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[ImageProcessingMixin]:
         if zip_data is None:
             return None
         zip_obj = zipfile.ZipFile(zip_data)
         with tempfile.TemporaryDirectory() as temp_dir:
             zip_obj.extractall(temp_dir)
-            return AutoFeatureExtractor.from_pretrained(temp_dir, apply_ocr=False)
+            return AutoImageProcessor.from_pretrained(temp_dir, apply_ocr=False)
 
     def __tokenizer_bytes(self):
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -682,7 +682,7 @@ def __build_transformer_model_inputs(
                 batched_image_encodings = [image_encodings[i] for i in cpu_overflow_to_sample_mapping]
                 image_encodings = torch.stack(batched_image_encodings)
             image_encodings = image_encodings.to(flair.device)
-            if isinstance(self.feature_extractor, LayoutLMv2FeatureExtractor):
+            if isinstance(self.feature_extractor, LayoutLMv2ImageProcessor):
                 model_kwargs["image"] = image_encodings
             else:
                 model_kwargs["pixel_values"] = image_encodings
@@ -1099,15 +1099,15 @@ def __init__(
         logging.set_verbosity_error()
 
         self.tokenizer: PreTrainedTokenizer
-        self.feature_extractor: Optional[FeatureExtractionMixin]
+        self.feature_extractor: Optional[ImageProcessingMixin]
 
         if tokenizer_data is None:
             # load tokenizer and transformer model
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model, add_prefix_space=True, **transformers_tokenizer_kwargs, **kwargs
             )
             try:
-                self.feature_extractor = AutoFeatureExtractor.from_pretrained(model, apply_ocr=False, **kwargs)
+                self.feature_extractor = AutoImageProcessor.from_pretrained(model, apply_ocr=False, **kwargs)
             except OSError:
                 self.feature_extractor = None
         else:

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,10 +22,8 @@ filterwarnings = [
     'ignore:Please use `triu` from the `scipy.linalg` namespace, the `scipy.linalg.special_matrices` namespace is deprecated.',  # ignore gensim using deprecated scipy
     'ignore:bilinear is deprecated and will be removed in Pillow 10',  # huggingface layoutlmv2 has deprecated calls.
     'ignore:nearest is deprecated and will be removed in Pillow 10',  # huggingface layoutlmv2 has deprecated calls.
-    'ignore:The `device` argument is deprecated and will be removed in v5 of Transformers.',  # hf layoutlmv3 calls deprecated hf.
     "ignore:the imp module is deprecated:DeprecationWarning:past",  # ignore DeprecationWarning from hyperopt dependency
     "ignore:.*imp module.*:DeprecationWarning",  # ignore DeprecationWarnings that involve imp module
-    "ignore:The class LayoutLMv3FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv3ImageProcessor instead.",  # huggingface layoutlmv3 has deprecated calls.
     "ignore:pkg_resources",  # huggingface has deprecated calls.
     'ignore:Deprecated call to `pkg_resources',  # huggingface has deprecated calls.
     'ignore:distutils Version classes are deprecated.',  # faiss uses deprecated distutils.

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -12,4 +12,4 @@ types-Deprecated>=1.2.9.2
 types-requests>=2.28.11.17
 types-tabulate>=0.9.0.2
 pyab3p
-transformers!=4.40.1,!=4.40.0
+transformers>=5.0.0,!=4.40.1,!=4.40.0
diff --git a/requirements.txt b/requirements.txt
@@ -20,6 +20,6 @@ tabulate>=0.8.10
 torch>=1.13.1
 tqdm>=4.63.0
 transformer-smaller-training-vocab>=0.2.3
-transformers[sentencepiece]>=4.25.0,<5.0.0
+transformers[sentencepiece]>=4.25.0,<6.0.0
 wikipedia-api>=0.5.7
 bioc<3.0.0,>=2.0.0