import logging import re from html.parser import HTMLParser from itertools import cycle from urllib.parse import quote_plus from yt_dlp.utils import ( clean_html, extract_attributes, get_element_by_class, get_element_html_by_class, get_elements_by_class, get_elements_html_by_class, ) from ..base_provider import AnimeProvider from ..decorators import debug_provider from ..utils import give_random_quality from .constants import SERVERS_AVAILABLE from .extractors import MegaCloud from .types import HiAnimeStream logger = logging.getLogger(__name__) LINK_TO_STREAMS_REGEX = re.compile(r".*://(.*)/embed-(2|4|6)/e-([0-9])/(.*)\?.*") IMAGE_HTML_ELEMENT_REGEX = re.compile(r"") class ParseAnchorAndImgTag(HTMLParser): def __init__(self): super().__init__() self.img_tag = None self.a_tag = None def handle_starttag(self, tag, attrs): if tag == "img": self.img_tag = {attr[0]: attr[1] for attr in attrs} if tag == "a": self.a_tag = {attr[0]: attr[1] for attr in attrs} class HiAnime(AnimeProvider): # HEADERS = {"Referer": "https://hianime.to/home"} @debug_provider def search_for_anime(self, anime_title: str, translation_type, **kwargs): query = quote_plus(anime_title) url = f"https://hianime.to/search?keyword={query}" response = self.session.get(url) if not response.ok: return search_page = response.text search_results_html_items = get_elements_by_class("flw-item", search_page) results = [] for search_results_html_item in search_results_html_items: film_poster_html = get_element_by_class( "film-poster", search_results_html_item ) if not film_poster_html: continue # get availableEpisodes episodes_html = get_element_html_by_class("tick-sub", film_poster_html) episodes = clean_html(episodes_html) or 12 # get anime id and poster image url parser = ParseAnchorAndImgTag() parser.feed(film_poster_html) image_data = parser.img_tag anime_link_data = parser.a_tag if not image_data or not anime_link_data: continue episodes = int(episodes) # finally!! image_link = image_data["data-src"] anime_id = anime_link_data["data-id"] title = anime_link_data["title"] result = { "availableEpisodes": list(range(1, episodes)), "id": anime_id, "title": title, "poster": image_link, } results.append(result) self.store.set(result["id"], "search_result", result) return {"pageInfo": {}, "results": results} @debug_provider def get_anime(self, hianime_id, **kwargs): anime_result = {} if d := self.store.get(str(hianime_id), "search_result"): anime_result = d anime_url = f"https://hianime.to/ajax/v2/episode/list/{hianime_id}" response = self.session.get(anime_url, timeout=10) if response.ok: response_json = response.json() hianime_anime_page = response_json["html"] episodes_info_container_html = get_element_html_by_class( "ss-list", hianime_anime_page ) episodes_info_html_list = get_elements_html_by_class( "ep-item", episodes_info_container_html ) # keys: [ data-number: episode_number, data-id: episode_id, title: episode_title , href:episode_page_url] episodes_info_dicts = [ extract_attributes(episode_dict) for episode_dict in episodes_info_html_list ] episodes = [episode["data-number"] for episode in episodes_info_dicts] episodes_info = [ { "id": episode["data-id"], "title": ( (episode["title"] or "").replace( f"Episode {episode['data-number']}", "" ) or anime_result["title"] ) + f"; Episode {episode['data-number']}", "episode": episode["data-number"], } for episode in episodes_info_dicts ] self.store.set( str(hianime_id), "anime_info", episodes_info, ) return { "id": hianime_id, "availableEpisodesDetail": { "dub": episodes, "sub": episodes, "raw": episodes, }, "poster": anime_result["poster"], "title": anime_result["title"], "episodes_info": episodes_info, } @debug_provider def get_episode_streams(self, anime_id, episode, translation_type, **kwargs): if d := self.store.get(str(anime_id), "anime_info"): episodes_info = d episode_details = [ episode_details for episode_details in episodes_info if episode_details["episode"] == episode ] if not episode_details: return episode_details = episode_details[0] episode_url = f"https://hianime.to/ajax/v2/episode/servers?episodeId={episode_details['id']}" response = self.session.get(episode_url) if response.ok: response_json = response.json() episode_page_html = response_json["html"] servers_containers_html = get_elements_html_by_class( "ps__-list", episode_page_html ) if not servers_containers_html: return # sub servers try: servers_html_sub = get_elements_html_by_class( "server-item", servers_containers_html[0] ) except Exception: logger.warning("HiAnime: sub not found") servers_html_sub = None # dub servers try: servers_html_dub = get_elements_html_by_class( "server-item", servers_containers_html[1] ) except Exception: logger.warning("HiAnime: dub not found") servers_html_dub = None if translation_type == "dub": servers_html = servers_html_dub else: servers_html = servers_html_sub if not servers_html: return @debug_provider def _get_server(server_name, server_html): # keys: [ data-type: translation_type, data-id: embed_id, data-server-id: server_id ] servers_info = extract_attributes(server_html) server_id = servers_info["data-id"] embed_url = ( f"https://hianime.to/ajax/v2/episode/sources?id={server_id}" ) embed_response = self.session.get(embed_url) if embed_response.ok: embed_json = embed_response.json() raw_link_to_streams = embed_json["link"] match server_name: # TODO: Finish the other servers case "HD2": data = MegaCloud(self.session).extract( raw_link_to_streams ) return { "headers": {}, "subtitles": [ { "url": track["file"], "language": track["label"], } for track in data["tracks"] if track["kind"] == "captions" ], "server": server_name, "episode_title": episode_details["title"], "links": give_random_quality( [ {"link": link["url"]} for link in data["sources"] ] ), } case _: # NOTE: THIS METHOD DOES'NT WORK will get the other servers later match = LINK_TO_STREAMS_REGEX.match(raw_link_to_streams) if not match: return provider_domain = match.group(1) embed_type = match.group(2) episode_number = match.group(3) source_id = match.group(4) link_to_streams = f"https://{provider_domain}/embed-{embed_type}/ajax/e-{episode_number}/getSources?id={source_id}" link_to_streams_response = self.session.get( link_to_streams ) if link_to_streams_response.ok: juicy_streams_json: "HiAnimeStream" = ( link_to_streams_response.json() ) return { "headers": {}, "subtitles": [ { "url": track["file"], "language": track["label"], } for track in juicy_streams_json["tracks"] if track["kind"] == "captions" ], "server": server_name, "episode_title": episode_details["title"], "links": give_random_quality( [ {"link": link["file"]} for link in juicy_streams_json["tracks"] ] ), } for server_name, server_html in zip( cycle(SERVERS_AVAILABLE), servers_html ): if server_name == "HD2": if server := _get_server(server_name, server_html): yield server