feat(normalizer): add user normalizer json

2025-12-12 15:50:01 -08:00 · 2025-08-12 00:39:43 +03:00
parent 03fd8c0bf8
commit 088d232bfd
3 changed files with 64 additions and 28 deletions
--- a/fastanime/cli/interactive/menu/media/provider_search.py
+++ b/fastanime/cli/interactive/menu/media/provider_search.py
@@ -7,7 +7,7 @@ from ...state import InternalDirective, MenuName, ProviderState, State
@session.menu
 def provider_search(ctx: Context, state: State) -> State | InternalDirective:
    from .....core.utils.fuzzy import fuzz
-    from .....core.utils.normalizer import normalize_title
+    from .....core.utils.normalizer import normalize_title, update_user_normalizer_json

    feedback = ctx.feedback
    media_item = state.media_api.media_item
@@ -71,6 +71,12 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective:
        if not chosen_title or chosen_title == "Back":
            return InternalDirective.BACK

+        if selector.confirm(
+            f"Would you like to update your local normalizer json with: {chosen_title} for {media_title}"
+        ):
+            update_user_normalizer_json(
+                chosen_title, media_title, config.general.provider.value
+            )
        selected_provider_anime = provider_results_map[chosen_title]

    with feedback.progress(
--- a/fastanime/cli/service/download/service.py
+++ b/fastanime/cli/service/download/service.py
@@ -34,7 +34,7 @@ class DownloadService:
        media_api_service: "BaseApiClient",
        provider_service: "BaseAnimeProvider",
    ):
-        self.config = config
+        self.app_config = config
        self.registry = registry_service
        self.media_api = media_api_service
        self.provider = provider_service
@@ -157,7 +157,7 @@ class DownloadService:
                        continue
                    if (
                        episode.download_attempts
-                        <= self.config.downloads.max_retry_attempts
+                        <= self.app_config.downloads.max_retry_attempts
                    ):
                        logger.info(
                            f"Retrying {episode_number} of {record.media_item.title.english}"
@@ -187,12 +187,17 @@ class DownloadService:

            # 1. Search the provider to get the provider-specific ID
            provider_search_results = self.provider.search(
-                SearchParams(query=media_title)
+                SearchParams(
+                    query=normalize_title(
+                        media_title, self.app_config.general.provider.value, True
+                    ),
+                    translation_type=self.app_config.stream.translation_type,
+                )
            )

            if not provider_search_results or not provider_search_results.results:
                raise ValueError(
-                    f"Could not find '{media_title}' on provider '{self.config.general.provider.value}'"
+                    f"Could not find '{media_title}' on provider '{self.app_config.general.provider.value}'"
                )

            # 2. Find the best match using fuzzy logic (like auto-select)
@@ -203,7 +208,7 @@ class DownloadService:
                provider_results_map.keys(),
                key=lambda p_title: fuzz.ratio(
                    normalize_title(
-                        p_title, self.config.general.provider.value
+                        p_title, self.app_config.general.provider.value
                    ).lower(),
                    media_title.lower(),
                ),
@@ -225,7 +230,7 @@ class DownloadService:
                    anime_id=provider_anime.id,
                    query=media_title,
                    episode=episode_number,
-                    translation_type=self.config.stream.translation_type,
+                    translation_type=self.app_config.stream.translation_type,
                )
            )
            if not streams_iterator:
@@ -235,11 +240,11 @@ class DownloadService:
            if not server or not server.links:
                raise ValueError(f"No stream links found for Episode {episode_number}")

-            if server.name != self.config.downloads.server.value:
+            if server.name != self.app_config.downloads.server.value:
                while True:
                    try:
                        _server = next(streams_iterator)
-                        if _server.name == self.config.downloads.server.value:
+                        if _server.name == self.app_config.downloads.server.value:
                            server = _server
                            break
                    except StopIteration:
@@ -259,9 +264,9 @@ class DownloadService:
                silent=False,
                headers=server.headers,
                subtitles=[sub.url for sub in server.subtitles],
-                merge=self.config.downloads.merge_subtitles,
-                clean=self.config.downloads.cleanup_after_merge,
-                no_check_certificate=self.config.downloads.no_check_certificate,
+                merge=self.app_config.downloads.merge_subtitles,
+                clean=self.app_config.downloads.cleanup_after_merge,
+                no_check_certificate=self.app_config.downloads.no_check_certificate,
            )

            result = self.downloader.download(download_params)
@@ -280,7 +285,7 @@ class DownloadService:
                    file_path=result.merged_path or result.video_path,
                    file_size=file_size,
                    quality=stream_link.quality,
-                    provider_name=self.config.general.provider.value,
+                    provider_name=self.app_config.general.provider.value,
                    server_name=server.name,
                    subtitle_paths=result.subtitle_paths,
                )
--- a/fastanime/core/utils/normalizer.py
+++ b/fastanime/core/utils/normalizer.py
@@ -46,14 +46,19 @@ import json
 import logging
 from typing import Dict, Optional

-from ..constants import ASSETS_DIR
+from ..constants import APP_DATA_DIR, ASSETS_DIR

 logger = logging.getLogger(__name__)

 # Cache for the normalizer data to avoid repeated file reads
 _normalizer_cache: Optional[Dict[str, Dict[str, str]]] = None

+USER_NORMALIZER_JSON = APP_DATA_DIR / "normalizer.json"

+DEFAULT_NORMALIZER_JSON = ASSETS_DIR / "normalizer.json"
+
+
+# will load one in the config dir if available and merge them
 def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
    """
    Load the normalizer.json file and cache it.
@@ -70,21 +75,41 @@ def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
    if _normalizer_cache is not None:
        return _normalizer_cache

-    normalizer_path = ASSETS_DIR / "normalizer.json"
+    default_normalizer = {}
+    user_normalizer = {}
+    with open(DEFAULT_NORMALIZER_JSON, "r", encoding="utf-8") as f:
+        default_normalizer: dict = json.load(f)
+    if USER_NORMALIZER_JSON.exists():
+        with open(USER_NORMALIZER_JSON, "r", encoding="utf-8") as f:
+            user_normalizer: dict = json.load(f)

-    try:
-        with open(normalizer_path, "r", encoding="utf-8") as f:
-            _normalizer_cache = json.load(f)
-        logger.debug("Loaded normalizer data from %s", normalizer_path)
-        # Type checker now knows _normalizer_cache is not None
-        assert _normalizer_cache is not None
-        return _normalizer_cache
-    except FileNotFoundError:
-        logger.error("Normalizer file not found at %s", normalizer_path)
-        raise
-    except json.JSONDecodeError as e:
-        logger.error("Invalid JSON in normalizer file: %s", e)
-        raise
+    _normalizer_cache = default_normalizer
+    for key in default_normalizer:
+        if key in user_normalizer:
+            _normalizer_cache[key].update(user_normalizer[key])
+
+    return _normalizer_cache
+
+
+def update_user_normalizer_json(
+    provider_title: str, media_api_title: str, provider_name: str
+):
+    import time
+
+    from .file import AtomicWriter
+
+    print(
+        "UPDATING USER NORMALIZER JSON. PLEASE CONTRIBUTE TO THE PROJECT BY OPENING A PR ON GITHUB TO MERGE YOUR NORMALIZER JSON TO MAIN. MAEMOTTE KANSHA SHIMASU :)"
+    )
+    print(f"NORMALIZER JSON PATH IS: {USER_NORMALIZER_JSON}")
+    time.sleep(5)
+    if not _normalizer_cache:
+        raise RuntimeError(
+            "Fatal _normalizer_cache missing this should not be the case : (. Please report"
+        )
+    _normalizer_cache[provider_name][provider_title] = media_api_title
+    with AtomicWriter(USER_NORMALIZER_JSON) as f:
+        json.dump(_normalizer_cache, f, indent=2)


 def provider_title_to_media_api_title(provider_title: str, provider_name: str) -> str: