feat(normalizer): add user normalizer json

This commit is contained in:
Benexl
2025-08-12 00:39:43 +03:00
parent 03fd8c0bf8
commit 088d232bfd
3 changed files with 64 additions and 28 deletions

View File

@@ -7,7 +7,7 @@ from ...state import InternalDirective, MenuName, ProviderState, State
@session.menu
def provider_search(ctx: Context, state: State) -> State | InternalDirective:
from .....core.utils.fuzzy import fuzz
from .....core.utils.normalizer import normalize_title
from .....core.utils.normalizer import normalize_title, update_user_normalizer_json
feedback = ctx.feedback
media_item = state.media_api.media_item
@@ -71,6 +71,12 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective:
if not chosen_title or chosen_title == "Back":
return InternalDirective.BACK
if selector.confirm(
f"Would you like to update your local normalizer json with: {chosen_title} for {media_title}"
):
update_user_normalizer_json(
chosen_title, media_title, config.general.provider.value
)
selected_provider_anime = provider_results_map[chosen_title]
with feedback.progress(

View File

@@ -34,7 +34,7 @@ class DownloadService:
media_api_service: "BaseApiClient",
provider_service: "BaseAnimeProvider",
):
self.config = config
self.app_config = config
self.registry = registry_service
self.media_api = media_api_service
self.provider = provider_service
@@ -157,7 +157,7 @@ class DownloadService:
continue
if (
episode.download_attempts
<= self.config.downloads.max_retry_attempts
<= self.app_config.downloads.max_retry_attempts
):
logger.info(
f"Retrying {episode_number} of {record.media_item.title.english}"
@@ -187,12 +187,17 @@ class DownloadService:
# 1. Search the provider to get the provider-specific ID
provider_search_results = self.provider.search(
SearchParams(query=media_title)
SearchParams(
query=normalize_title(
media_title, self.app_config.general.provider.value, True
),
translation_type=self.app_config.stream.translation_type,
)
)
if not provider_search_results or not provider_search_results.results:
raise ValueError(
f"Could not find '{media_title}' on provider '{self.config.general.provider.value}'"
f"Could not find '{media_title}' on provider '{self.app_config.general.provider.value}'"
)
# 2. Find the best match using fuzzy logic (like auto-select)
@@ -203,7 +208,7 @@ class DownloadService:
provider_results_map.keys(),
key=lambda p_title: fuzz.ratio(
normalize_title(
p_title, self.config.general.provider.value
p_title, self.app_config.general.provider.value
).lower(),
media_title.lower(),
),
@@ -225,7 +230,7 @@ class DownloadService:
anime_id=provider_anime.id,
query=media_title,
episode=episode_number,
translation_type=self.config.stream.translation_type,
translation_type=self.app_config.stream.translation_type,
)
)
if not streams_iterator:
@@ -235,11 +240,11 @@ class DownloadService:
if not server or not server.links:
raise ValueError(f"No stream links found for Episode {episode_number}")
if server.name != self.config.downloads.server.value:
if server.name != self.app_config.downloads.server.value:
while True:
try:
_server = next(streams_iterator)
if _server.name == self.config.downloads.server.value:
if _server.name == self.app_config.downloads.server.value:
server = _server
break
except StopIteration:
@@ -259,9 +264,9 @@ class DownloadService:
silent=False,
headers=server.headers,
subtitles=[sub.url for sub in server.subtitles],
merge=self.config.downloads.merge_subtitles,
clean=self.config.downloads.cleanup_after_merge,
no_check_certificate=self.config.downloads.no_check_certificate,
merge=self.app_config.downloads.merge_subtitles,
clean=self.app_config.downloads.cleanup_after_merge,
no_check_certificate=self.app_config.downloads.no_check_certificate,
)
result = self.downloader.download(download_params)
@@ -280,7 +285,7 @@ class DownloadService:
file_path=result.merged_path or result.video_path,
file_size=file_size,
quality=stream_link.quality,
provider_name=self.config.general.provider.value,
provider_name=self.app_config.general.provider.value,
server_name=server.name,
subtitle_paths=result.subtitle_paths,
)

View File

@@ -46,14 +46,19 @@ import json
import logging
from typing import Dict, Optional
from ..constants import ASSETS_DIR
from ..constants import APP_DATA_DIR, ASSETS_DIR
logger = logging.getLogger(__name__)
# Cache for the normalizer data to avoid repeated file reads
_normalizer_cache: Optional[Dict[str, Dict[str, str]]] = None
USER_NORMALIZER_JSON = APP_DATA_DIR / "normalizer.json"
DEFAULT_NORMALIZER_JSON = ASSETS_DIR / "normalizer.json"
# will load one in the config dir if available and merge them
def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
"""
Load the normalizer.json file and cache it.
@@ -70,21 +75,41 @@ def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
if _normalizer_cache is not None:
return _normalizer_cache
normalizer_path = ASSETS_DIR / "normalizer.json"
default_normalizer = {}
user_normalizer = {}
with open(DEFAULT_NORMALIZER_JSON, "r", encoding="utf-8") as f:
default_normalizer: dict = json.load(f)
if USER_NORMALIZER_JSON.exists():
with open(USER_NORMALIZER_JSON, "r", encoding="utf-8") as f:
user_normalizer: dict = json.load(f)
try:
with open(normalizer_path, "r", encoding="utf-8") as f:
_normalizer_cache = json.load(f)
logger.debug("Loaded normalizer data from %s", normalizer_path)
# Type checker now knows _normalizer_cache is not None
assert _normalizer_cache is not None
return _normalizer_cache
except FileNotFoundError:
logger.error("Normalizer file not found at %s", normalizer_path)
raise
except json.JSONDecodeError as e:
logger.error("Invalid JSON in normalizer file: %s", e)
raise
_normalizer_cache = default_normalizer
for key in default_normalizer:
if key in user_normalizer:
_normalizer_cache[key].update(user_normalizer[key])
return _normalizer_cache
def update_user_normalizer_json(
provider_title: str, media_api_title: str, provider_name: str
):
import time
from .file import AtomicWriter
print(
"UPDATING USER NORMALIZER JSON. PLEASE CONTRIBUTE TO THE PROJECT BY OPENING A PR ON GITHUB TO MERGE YOUR NORMALIZER JSON TO MAIN. MAEMOTTE KANSHA SHIMASU :)"
)
print(f"NORMALIZER JSON PATH IS: {USER_NORMALIZER_JSON}")
time.sleep(5)
if not _normalizer_cache:
raise RuntimeError(
"Fatal _normalizer_cache missing this should not be the case : (. Please report"
)
_normalizer_cache[provider_name][provider_title] = media_api_title
with AtomicWriter(USER_NORMALIZER_JSON) as f:
json.dump(_normalizer_cache, f, indent=2)
def provider_title_to_media_api_title(provider_title: str, provider_name: str) -> str: