Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions flair/embeddings/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
from transformers import (
CONFIG_MAPPING,
AutoConfig,
AutoFeatureExtractor,
AutoImageProcessor,
AutoModel,
AutoTokenizer,
FeatureExtractionMixin,
ImageProcessingMixin,
LayoutLMTokenizer,
LayoutLMTokenizerFast,
LayoutLMv2FeatureExtractor,
LayoutLMv2ImageProcessor,
PretrainedConfig,
PreTrainedTokenizer,
T5Config,
Expand Down Expand Up @@ -351,7 +351,7 @@ def __init__(
is_token_embedding: bool = False,
force_device: Optional[torch.device] = None,
force_max_length: bool = False,
feature_extractor: Optional[FeatureExtractionMixin] = None,
feature_extractor: Optional[ImageProcessingMixin] = None,
needs_manual_ocr: Optional[bool] = None,
use_context_separator: bool = True,
) -> None:
Expand Down Expand Up @@ -453,13 +453,13 @@ def _tokenizer_from_bytes(cls, zip_data: BytesIO) -> PreTrainedTokenizer:
return AutoTokenizer.from_pretrained(temp_dir)

@classmethod
def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[FeatureExtractionMixin]:
def _feature_extractor_from_bytes(cls, zip_data: Optional[BytesIO]) -> Optional[ImageProcessingMixin]:
if zip_data is None:
return None
zip_obj = zipfile.ZipFile(zip_data)
with tempfile.TemporaryDirectory() as temp_dir:
zip_obj.extractall(temp_dir)
return AutoFeatureExtractor.from_pretrained(temp_dir, apply_ocr=False)
return AutoImageProcessor.from_pretrained(temp_dir, apply_ocr=False)

Comment thread
denniszag marked this conversation as resolved.
def __tokenizer_bytes(self):
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down Expand Up @@ -682,7 +682,7 @@ def __build_transformer_model_inputs(
batched_image_encodings = [image_encodings[i] for i in cpu_overflow_to_sample_mapping]
image_encodings = torch.stack(batched_image_encodings)
image_encodings = image_encodings.to(flair.device)
if isinstance(self.feature_extractor, LayoutLMv2FeatureExtractor):
if isinstance(self.feature_extractor, LayoutLMv2ImageProcessor):
model_kwargs["image"] = image_encodings
else:
model_kwargs["pixel_values"] = image_encodings
Expand Down Expand Up @@ -1099,15 +1099,15 @@ def __init__(
logging.set_verbosity_error()

self.tokenizer: PreTrainedTokenizer
self.feature_extractor: Optional[FeatureExtractionMixin]
self.feature_extractor: Optional[ImageProcessingMixin]

if tokenizer_data is None:
# load tokenizer and transformer model
self.tokenizer = AutoTokenizer.from_pretrained(
model, add_prefix_space=True, **transformers_tokenizer_kwargs, **kwargs
)
try:
self.feature_extractor = AutoFeatureExtractor.from_pretrained(model, apply_ocr=False, **kwargs)
self.feature_extractor = AutoImageProcessor.from_pretrained(model, apply_ocr=False, **kwargs)
except OSError:
self.feature_extractor = None
else:
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ filterwarnings = [
'ignore:Please use `triu` from the `scipy.linalg` namespace, the `scipy.linalg.special_matrices` namespace is deprecated.', # ignore gensim using deprecated scipy
'ignore:bilinear is deprecated and will be removed in Pillow 10', # huggingface layoutlmv2 has deprecated calls.
'ignore:nearest is deprecated and will be removed in Pillow 10', # huggingface layoutlmv2 has deprecated calls.
'ignore:The `device` argument is deprecated and will be removed in v5 of Transformers.', # hf layoutlmv3 calls deprecated hf.
"ignore:the imp module is deprecated:DeprecationWarning:past", # ignore DeprecationWarning from hyperopt dependency
"ignore:.*imp module.*:DeprecationWarning", # ignore DeprecationWarnings that involve imp module
"ignore:The class LayoutLMv3FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv3ImageProcessor instead.", # huggingface layoutlmv3 has deprecated calls.
"ignore:pkg_resources", # huggingface has deprecated calls.
'ignore:Deprecated call to `pkg_resources', # huggingface has deprecated calls.
'ignore:distutils Version classes are deprecated.', # faiss uses deprecated distutils.
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ types-Deprecated>=1.2.9.2
types-requests>=2.28.11.17
types-tabulate>=0.9.0.2
pyab3p
transformers!=4.40.1,!=4.40.0
transformers>=5.0.0,!=4.40.1,!=4.40.0
Comment thread
denniszag marked this conversation as resolved.
Outdated
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ tabulate>=0.8.10
torch>=1.13.1
tqdm>=4.63.0
transformer-smaller-training-vocab>=0.2.3
transformers[sentencepiece]>=4.25.0,<5.0.0
transformers[sentencepiece]>=4.25.0,<6.0.0
wikipedia-api>=0.5.7
bioc<3.0.0,>=2.0.0
Loading