feat: add title normalization utilities and integrate into provider search

This commit is contained in:
Benexl
2025-07-26 10:37:49 +03:00
parent b18e419831
commit 3ea37c4079
2 changed files with 369 additions and 3 deletions

View File

@@ -1,5 +1,6 @@
from rich.progress import Progress
from .....core.utils.fuzzy import fuzz
from .....core.utils.normalizer import normalize_title
from .....libs.provider.anime.params import SearchParams
from .....libs.provider.anime.types import SearchResult
@@ -29,9 +30,9 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective:
return InternalDirective.BACK
provider_search_results = provider.search(
SearchParams(query=media_title, translation_type=config.stream.translation_type)
SearchParams(query=normalize_title(media_title, config.general.provider.value,True), translation_type=config.stream.translation_type)
)
if not provider_search_results or not provider_search_results.results:
feedback.warning(
f"Could not find '{media_title}' on {provider.__class__.__name__}",
@@ -50,7 +51,7 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective:
# Use fuzzy matching to find the best title
best_match_title = max(
provider_results_map.keys(),
key=lambda p_title: fuzz.ratio(p_title.lower(), media_title.lower()),
key=lambda p_title: fuzz.ratio(normalize_title(p_title,config.general.provider.value).lower(), media_title.lower()),
)
feedback.info("Auto-selecting best match: {best_match_title}")
selected_provider_anime = provider_results_map[best_match_title]

View File

@@ -0,0 +1,365 @@
"""
Title normalization utilities for converting between provider and media API titles.
This module provides functions to normalize anime titles between different providers
(allanime, hianime, animepahe) and media APIs (AniList) using the normalizer.json
mapping file located in the assets directory.
The normalizer.json file contains mappings in the following format:
{
"provider_name": {
"provider_title": "media_api_title",
...
},
...
}
Key Features:
- Bidirectional title conversion (provider ↔ media API)
- Caching for performance optimization
- Runtime mapping support for dynamic additions
- Comprehensive error handling and logging
- Type hints for better IDE support
Example Usage:
>>> from fastanime.core.utils.normalizer import (
... provider_title_to_media_api_title,
... media_api_title_to_provider_title
... )
# Convert provider title to media API title
>>> provider_title_to_media_api_title("1P", "allanime")
'one piece'
# Convert media API title to provider title
>>> media_api_title_to_provider_title("one piece", "allanime")
'1P'
# Check available providers
>>> get_available_providers()
['allanime', 'hianime', 'animepahe']
Author: FastAnime Contributors
"""
import json
import logging
from pathlib import Path
from typing import Dict, Optional
from ..constants import ASSETS_DIR
logger = logging.getLogger(__name__)
# Cache for the normalizer data to avoid repeated file reads
_normalizer_cache: Optional[Dict[str, Dict[str, str]]] = None
def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
"""
Load the normalizer.json file and cache it.
Returns:
Dictionary containing provider mappings from normalizer.json
Raises:
FileNotFoundError: If normalizer.json is not found
json.JSONDecodeError: If normalizer.json is malformed
"""
global _normalizer_cache
if _normalizer_cache is not None:
return _normalizer_cache
normalizer_path = ASSETS_DIR / "normalizer.json"
try:
with open(normalizer_path, 'r', encoding='utf-8') as f:
_normalizer_cache = json.load(f)
logger.debug("Loaded normalizer data from %s", normalizer_path)
# Type checker now knows _normalizer_cache is not None
assert _normalizer_cache is not None
return _normalizer_cache
except FileNotFoundError:
logger.error("Normalizer file not found at %s", normalizer_path)
raise
except json.JSONDecodeError as e:
logger.error("Invalid JSON in normalizer file: %s", e)
raise
def provider_title_to_media_api_title(
provider_title: str,
provider_name: str
) -> str:
"""
Convert a provider title to its equivalent media API title.
This function takes a title from a specific provider (e.g., "1P" from allanime)
and converts it to the standard media API title (e.g., "one piece").
Args:
provider_title: The title as it appears on the provider
provider_name: The name of the provider (e.g., "allanime", "hianime", "animepahe")
Returns:
The normalized media API title, or the original title if no mapping exists
Example:
>>> provider_title_to_media_api_title("1P", "allanime")
"one piece"
>>> provider_title_to_media_api_title("My Star", "hianime")
"Oshi no Ko"
>>> provider_title_to_media_api_title("Unknown Title", "allanime")
"Unknown Title"
"""
try:
normalizer_data = _load_normalizer_data()
# Check if the provider exists in the normalizer data
if provider_name not in normalizer_data:
logger.debug("Provider '%s' not found in normalizer data", provider_name)
return provider_title
provider_mappings = normalizer_data[provider_name]
# Return the mapped title if it exists, otherwise return the original
normalized_title = provider_mappings.get(provider_title, provider_title)
if normalized_title != provider_title:
logger.debug(
"Normalized provider title: '%s' -> '%s' (provider: %s)",
provider_title, normalized_title, provider_name
)
return normalized_title
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to load normalizer data: %s", e)
return provider_title
def media_api_title_to_provider_title(
media_api_title: str,
provider_name: str
) -> str:
"""
Convert a media API title to its equivalent provider title.
This function takes a standard media API title and converts it to the title
used by a specific provider. This is the reverse operation of
provider_title_to_media_api_title().
Args:
media_api_title: The title as it appears in the media API (e.g., AniList)
provider_name: The name of the provider (e.g., "allanime", "hianime", "animepahe")
Returns:
The provider-specific title, or the original title if no mapping exists
Example:
>>> media_api_title_to_provider_title("one piece", "allanime")
"1P"
>>> media_api_title_to_provider_title("Oshi no Ko", "hianime")
"My Star"
>>> media_api_title_to_provider_title("Unknown Title", "allanime")
"Unknown Title"
"""
try:
normalizer_data = _load_normalizer_data()
# Check if the provider exists in the normalizer data
if provider_name not in normalizer_data:
logger.debug("Provider '%s' not found in normalizer data", provider_name)
return media_api_title
provider_mappings = normalizer_data[provider_name]
# Create a reverse mapping (media_api_title -> provider_title)
reverse_mappings = {v: k for k, v in provider_mappings.items()}
# Return the mapped title if it exists, otherwise return the original
provider_title = reverse_mappings.get(media_api_title, media_api_title)
if provider_title != media_api_title:
logger.debug(
"Converted media API title to provider title: '%s' -> '%s' (provider: %s)",
media_api_title, provider_title, provider_name
)
return provider_title
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to load normalizer data: %s", e)
return media_api_title
def normalize_title(
title: str,
provider_name: str,
use_provider_mapping: bool = False
) -> str:
"""
Normalize a title for search operations.
This convenience function determines the appropriate normalization direction
based on the use_provider_mapping parameter.
Args:
title: The title to normalize
provider_name: The name of the provider
use_provider_mapping: If True, convert media API title to provider title.
If False, convert provider title to media API title.
Returns:
The normalized title
Example:
>>> normalize_title_for_search("one piece", "allanime", use_provider_mapping=True)
"1P"
>>> normalize_title_for_search("1P", "allanime", use_provider_mapping=False)
"one piece"
"""
if use_provider_mapping:
return media_api_title_to_provider_title(title, provider_name)
else:
return provider_title_to_media_api_title(title, provider_name)
def get_available_providers() -> list[str]:
"""
Get a list of all available providers in the normalizer data.
Returns:
List of provider names that have mappings defined
Example:
>>> get_available_providers()
['allanime', 'hianime', 'animepahe']
"""
try:
normalizer_data = _load_normalizer_data()
return list(normalizer_data.keys())
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to load normalizer data: %s", e)
return []
def clear_cache() -> None:
"""
Clear the internal cache for normalizer data.
This is useful for testing or when the normalizer.json file has been updated
and you want to reload the data.
"""
global _normalizer_cache
_normalizer_cache = None
logger.debug("Cleared normalizer cache")
def get_provider_mappings(provider_name: str) -> Dict[str, str]:
"""
Get all title mappings for a specific provider.
Args:
provider_name: The name of the provider
Returns:
Dictionary mapping provider titles to media API titles
Example:
>>> mappings = get_provider_mappings("allanime")
>>> print(mappings["1P"])
"one piece"
"""
try:
normalizer_data = _load_normalizer_data()
return normalizer_data.get(provider_name, {})
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to load normalizer data: %s", e)
return {}
def has_mapping(title: str, provider_name: str, reverse: bool = False) -> bool:
"""
Check if a mapping exists for the given title and provider.
Args:
title: The title to check
provider_name: The name of the provider
reverse: If True, check for media API -> provider mapping.
If False, check for provider -> media API mapping.
Returns:
True if a mapping exists, False otherwise
Example:
>>> has_mapping("1P", "allanime", reverse=False)
True
>>> has_mapping("one piece", "allanime", reverse=True)
True
>>> has_mapping("Unknown Title", "allanime", reverse=False)
False
"""
try:
normalizer_data = _load_normalizer_data()
if provider_name not in normalizer_data:
return False
provider_mappings = normalizer_data[provider_name]
if reverse:
# Check if title exists as a value (media API title)
return title in provider_mappings.values()
else:
# Check if title exists as a key (provider title)
return title in provider_mappings
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to load normalizer data: %s", e)
return False
def add_runtime_mapping(
provider_title: str,
media_api_title: str,
provider_name: str
) -> None:
"""
Add a new mapping at runtime (not persisted to file).
This is useful for adding mappings discovered during runtime that
are not present in the normalizer.json file.
Args:
provider_title: The provider-specific title
media_api_title: The media API title
provider_name: The name of the provider
Note:
This mapping is only stored in memory and will be lost when
the cache is cleared or the application restarts.
Example:
>>> add_runtime_mapping("Custom Title", "Normalized Title", "allanime")
>>> provider_title_to_media_api_title("Custom Title", "allanime")
"Normalized Title"
"""
try:
normalizer_data = _load_normalizer_data()
# Initialize provider if it doesn't exist
if provider_name not in normalizer_data:
normalizer_data[provider_name] = {}
# Add the mapping
normalizer_data[provider_name][provider_title] = media_api_title
logger.info(
"Added runtime mapping: '%s' -> '%s' (provider: %s)",
provider_title, media_api_title, provider_name
)
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.warning("Failed to add runtime mapping: %s", e)