mirror of
https://github.com/immich-app/immich.git
synced 2025-12-12 15:50:43 -08:00
feat(ml): multilingual ocr (#23527)
* handle other languages in ml server * add variants to model selector * no need to override path * unused import
This commit is contained in:
@@ -78,6 +78,14 @@ _INSIGHTFACE_MODELS = {
|
|||||||
_PADDLE_MODELS = {
|
_PADDLE_MODELS = {
|
||||||
"PP-OCRv5_server",
|
"PP-OCRv5_server",
|
||||||
"PP-OCRv5_mobile",
|
"PP-OCRv5_mobile",
|
||||||
|
"CH__PP-OCRv5_server",
|
||||||
|
"CH__PP-OCRv5_mobile",
|
||||||
|
"EL__PP-OCRv5_mobile",
|
||||||
|
"EN__PP-OCRv5_mobile",
|
||||||
|
"ESLAV__PP-OCRv5_mobile",
|
||||||
|
"KOREAN__PP-OCRv5_mobile",
|
||||||
|
"LATIN__PP-OCRv5_mobile",
|
||||||
|
"TH__PP-OCRv5_mobile",
|
||||||
}
|
}
|
||||||
|
|
||||||
SUPPORTED_PROVIDERS = [
|
SUPPORTED_PROVIDERS = [
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class TextDetector(InferenceModel):
|
|||||||
identity = (ModelType.DETECTION, ModelTask.OCR)
|
identity = (ModelType.DETECTION, ModelTask.OCR)
|
||||||
|
|
||||||
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
||||||
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
|
super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
|
||||||
self.max_resolution = 736
|
self.max_resolution = 736
|
||||||
self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
|
self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
|
||||||
self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
|
self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ class TextRecognizer(InferenceModel):
|
|||||||
identity = (ModelType.RECOGNITION, ModelTask.OCR)
|
identity = (ModelType.RECOGNITION, ModelTask.OCR)
|
||||||
|
|
||||||
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
||||||
|
self.language = LangRec[model_name.split("__")[0]] if "__" in model_name else LangRec.CH
|
||||||
self.min_score = model_kwargs.get("minScore", 0.9)
|
self.min_score = model_kwargs.get("minScore", 0.9)
|
||||||
self._empty: TextRecognitionOutput = {
|
self._empty: TextRecognitionOutput = {
|
||||||
"box": np.empty(0, dtype=np.float32),
|
"box": np.empty(0, dtype=np.float32),
|
||||||
@@ -41,7 +42,7 @@ class TextRecognizer(InferenceModel):
|
|||||||
engine_type=EngineType.ONNXRUNTIME,
|
engine_type=EngineType.ONNXRUNTIME,
|
||||||
ocr_version=OCRVersion.PPOCRV5,
|
ocr_version=OCRVersion.PPOCRV5,
|
||||||
task_type=TaskType.REC,
|
task_type=TaskType.REC,
|
||||||
lang_type=LangRec.CH,
|
lang_type=self.language,
|
||||||
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
|
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -61,6 +62,7 @@ class TextRecognizer(InferenceModel):
|
|||||||
session=session.session,
|
session=session.session,
|
||||||
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
|
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
|
||||||
rec_img_shape=(3, 48, 320),
|
rec_img_shape=(3, 48, 320),
|
||||||
|
lang_type=self.language,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return session
|
return session
|
||||||
|
|||||||
@@ -20,8 +20,8 @@ class TextRecognitionOutput(TypedDict):
|
|||||||
|
|
||||||
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
|
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
|
||||||
class OcrOptions(dict[str, Any]):
|
class OcrOptions(dict[str, Any]):
|
||||||
def __init__(self, **options: Any) -> None:
|
def __init__(self, lang_type: LangRec | None = None, **options: Any) -> None:
|
||||||
super().__init__(**options)
|
super().__init__(**options)
|
||||||
self.engine_type = EngineType.ONNXRUNTIME
|
self.engine_type = EngineType.ONNXRUNTIME
|
||||||
self.lang_type = LangRec.CH
|
self.lang_type = lang_type
|
||||||
self.font_path = None
|
self.font_path = None
|
||||||
|
|||||||
@@ -275,8 +275,14 @@
|
|||||||
name="ocr-model"
|
name="ocr-model"
|
||||||
bind:value={config.machineLearning.ocr.modelName}
|
bind:value={config.machineLearning.ocr.modelName}
|
||||||
options={[
|
options={[
|
||||||
{ value: 'PP-OCRv5_server', text: 'PP-OCRv5_server' },
|
{ text: 'PP-OCRv5_server (Chinese, Japanese and English)', value: 'PP-OCRv5_server' },
|
||||||
{ value: 'PP-OCRv5_mobile', text: 'PP-OCRv5_mobile' },
|
{ text: 'PP-OCRv5_mobile (Chinese, Japanese and English)', value: 'PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (English-only)', value: 'EN__PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (Greek and English)', value: 'EL__PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (Korean and English)', value: 'KOREAN__PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (Latin script languages)', value: 'LATIN__PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (Russian, Belarusian, Ukrainian and English)', value: 'ESLAV__PP-OCRv5_mobile' },
|
||||||
|
{ text: 'PP-OCRv5_mobile (Thai and English)', value: 'TH__PP-OCRv5_mobile' },
|
||||||
]}
|
]}
|
||||||
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
|
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
|
||||||
isEdited={config.machineLearning.ocr.modelName !== savedConfig.machineLearning.ocr.modelName}
|
isEdited={config.machineLearning.ocr.modelName !== savedConfig.machineLearning.ocr.modelName}
|
||||||
|
|||||||
Reference in New Issue
Block a user