From e6e8c45f5a9a4453b884bf41a1797e84b6e3dfb9 Mon Sep 17 00:00:00 2001 From: projectdx Date: Sat, 27 Dec 2025 23:27:46 +0900 Subject: [PATCH] =?UTF-8?q?=EB=98=90=20=EB=A7=8E=EC=9D=80=20=EC=88=98?= =?UTF-8?q?=EC=A0=95=EC=9D=84=20=ED=96=88=EC=8A=B4.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/ffmpeg_queue_v1.py | 128 +++- lib/ytdlp_downloader.py | 157 +++++ mod_linkkf.py | 221 +++--- mod_ohli24.py | 666 ++++++++++-------- templates/anime_downloader_linkkf_queue.html | 94 ++- .../anime_downloader_linkkf_request.html | 4 +- .../anime_downloader_linkkf_setting.html | 1 + templates/anime_downloader_ohli24_queue.html | 47 +- .../anime_downloader_ohli24_request.html | 23 +- .../anime_downloader_ohli24_setting.html | 1 + 10 files changed, 916 insertions(+), 426 deletions(-) create mode 100644 lib/ytdlp_downloader.py diff --git a/lib/ffmpeg_queue_v1.py b/lib/ffmpeg_queue_v1.py index f52fb1f..69b3942 100644 --- a/lib/ffmpeg_queue_v1.py +++ b/lib/ffmpeg_queue_v1.py @@ -205,6 +205,10 @@ class FfmpegQueue(object): logger.info(f"save_path: {dirname}, filename: {filename}") logger.info(f"headers: {_headers}") + # 자막 URL 로그 + vtt_url = getattr(entity, 'vtt', None) + logger.info(f"Subtitle URL (vtt): {vtt_url}") + # 터미널에서 수동 테스트용 ffmpeg 명령어 output_file = os.path.join(dirname, filename) referer = _headers.get("Referer", "") if _headers else "" @@ -214,33 +218,51 @@ class FfmpegQueue(object): logger.info(ffmpeg_cmd) logger.info(f"=== END COMMAND ===") - # m3u8 URL인 경우 커스텀 HLS 다운로더 사용 (ffmpeg 8.0 .jpg 확장자 문제 우회) + # m3u8 URL인 경우 다운로드 방법 설정에 따라 분기 if video_url.endswith('.m3u8'): - logger.info("Using custom HLS downloader for m3u8 URL...") - from .hls_downloader import HlsDownloader + # 다운로드 방법 설정 확인 + download_method = P.ModelSetting.get(f"{self.name}_download_method") + logger.info(f"Download method: {download_method}") # 다운로드 시작 전 카운트 증가 self.current_ffmpeg_count += 1 logger.info(f"Download started, current_ffmpeg_count: {self.current_ffmpeg_count}/{self.max_ffmpeg_count}") # 별도 스레드에서 다운로드 실행 (동시 다운로드 지원) - def run_hls_download(downloader_self, entity_ref, output_file_ref, headers_ref): + def run_download(downloader_self, entity_ref, output_file_ref, headers_ref, method): def progress_callback(percent, current, total, speed="", elapsed=""): entity_ref.ffmpeg_status = 5 # DOWNLOADING - entity_ref.ffmpeg_status_kor = f"다운로드중 ({current}/{total})" + if method == "ytdlp": + entity_ref.ffmpeg_status_kor = f"다운로드중 (yt-dlp) {percent}%" + else: + entity_ref.ffmpeg_status_kor = f"다운로드중 ({current}/{total})" entity_ref.ffmpeg_percent = percent entity_ref.current_speed = speed entity_ref.download_time = elapsed entity_ref.refresh_status() - hls_downloader = HlsDownloader( - m3u8_url=video_url, - output_path=output_file_ref, - headers=headers_ref, - callback=progress_callback - ) + if method == "ytdlp": + # yt-dlp 사용 + from .ytdlp_downloader import YtdlpDownloader + logger.info("Using yt-dlp downloader...") + downloader = YtdlpDownloader( + url=video_url, + output_path=output_file_ref, + headers=headers_ref, + callback=progress_callback + ) + else: + # 기본: HLS 다운로더 사용 + from .hls_downloader import HlsDownloader + logger.info("Using custom HLS downloader for m3u8 URL...") + downloader = HlsDownloader( + m3u8_url=video_url, + output_path=output_file_ref, + headers=headers_ref, + callback=progress_callback + ) - success, message = hls_downloader.download() + success, message = downloader.download() # 다운로드 완료 후 카운트 감소 downloader_self.current_ffmpeg_count -= 1 @@ -252,17 +274,75 @@ class FfmpegQueue(object): entity_ref.ffmpeg_percent = 100 entity_ref.download_completed() entity_ref.refresh_status() - logger.info(f"HLS download completed: {output_file_ref}") + logger.info(f"Download completed: {output_file_ref}") + + # 자막 파일 다운로드 (vtt_url이 있는 경우) + vtt_url = getattr(entity_ref, 'vtt', None) + if vtt_url: + try: + import requests + # 자막 파일 경로 생성 (비디오 파일명.srt) + video_basename = os.path.splitext(output_file_ref)[0] + srt_path = video_basename + ".srt" + + logger.info(f"Downloading subtitle from: {vtt_url}") + sub_response = requests.get(vtt_url, headers=headers_ref, timeout=30) + + if sub_response.status_code == 200: + vtt_content = sub_response.text + + # VTT를 SRT로 변환 (간단한 변환) + srt_content = vtt_content + if vtt_content.startswith("WEBVTT"): + # WEBVTT 헤더 제거 + lines = vtt_content.split("\n") + srt_lines = [] + cue_index = 1 + i = 0 + while i < len(lines): + line = lines[i].strip() + # WEBVTT, NOTE, STYLE 등 메타데이터 스킵 + if line.startswith("WEBVTT") or line.startswith("NOTE") or line.startswith("STYLE"): + i += 1 + continue + # 빈 줄 스킵 + if not line: + i += 1 + continue + # 타임코드 라인 (00:00:00.000 --> 00:00:00.000) + if "-->" in line: + # VTT 타임코드를 SRT 형식으로 변환 (. -> ,) + srt_timecode = line.replace(".", ",") + srt_lines.append(str(cue_index)) + srt_lines.append(srt_timecode) + cue_index += 1 + i += 1 + # 자막 텍스트 읽기 + while i < len(lines) and lines[i].strip(): + srt_lines.append(lines[i].rstrip()) + i += 1 + srt_lines.append("") + else: + i += 1 + srt_content = "\n".join(srt_lines) + + with open(srt_path, "w", encoding="utf-8") as f: + f.write(srt_content) + logger.info(f"Subtitle saved: {srt_path}") + else: + logger.warning(f"Subtitle download failed: HTTP {sub_response.status_code}") + except Exception as sub_err: + logger.error(f"Subtitle download error: {sub_err}") else: entity_ref.ffmpeg_status = -1 entity_ref.ffmpeg_status_kor = f"실패: {message}" entity_ref.refresh_status() - logger.error(f"HLS download failed: {message}") + logger.error(f"Download failed: {message}") # 스레드 시작 download_thread = threading.Thread( - target=run_hls_download, - args=(self, entity, output_file, _headers) + target=run_download, + args=(self, entity, output_file, _headers, download_method) ) download_thread.daemon = True download_thread.start() @@ -443,14 +523,20 @@ class FfmpegQueue(object): def add_queue(self, entity): try: - # entity = QueueEntity.create(info) - # if entity is not None: - # LogicQueue.download_queue.put(entity) - # return True entity.entity_id = self.static_index self.static_index += 1 self.entity_list.append(entity) self.download_queue.put(entity) + + # 소켓IO로 추가 이벤트 전송 + try: + from framework import socketio + namespace = f"/{self.P.package_name}/{self.name}/queue" + socketio.emit("add", entity.as_dict(), namespace=namespace) + logger.debug(f"Emitted 'add' event for entity {entity.entity_id}") + except Exception as e: + logger.debug(f"Socket emit error (non-critical): {e}") + return True except Exception as exception: self.P.logger.error("Exception:%s", exception) @@ -528,7 +614,7 @@ class FfmpegQueue(object): def get_entity_list(self): ret = [] - P.logger.debug(self) + #P.logger.debug(self) for x in self.entity_list: tmp = x.as_dict() ret.append(tmp) diff --git a/lib/ytdlp_downloader.py b/lib/ytdlp_downloader.py new file mode 100644 index 0000000..d1b76f0 --- /dev/null +++ b/lib/ytdlp_downloader.py @@ -0,0 +1,157 @@ +""" +yt-dlp Downloader for linkkf +- Uses yt-dlp as Python module or subprocess +- Same interface as HlsDownloader for easy switching +""" +import os +import subprocess +import sys +import time +import re +import logging + +logger = logging.getLogger(__name__) + + +class YtdlpDownloader: + """yt-dlp 기반 다운로더""" + + def __init__(self, url, output_path, headers=None, callback=None): + self.url = url + self.output_path = output_path + self.headers = headers or {} + self.callback = callback # 진행 상황 콜백 + self.cancelled = False + self.process = None + self.error_output = [] # 에러 메시지 저장 + + # 속도 및 시간 계산용 + self.start_time = None + self.current_speed = "" + self.elapsed_time = "" + self.percent = 0 + + def format_time(self, seconds): + """시간을 읽기 좋은 형식으로 변환""" + seconds = int(seconds) + if seconds < 60: + return f"{seconds}초" + elif seconds < 3600: + mins = seconds // 60 + secs = seconds % 60 + return f"{mins}분 {secs}초" + else: + hours = seconds // 3600 + mins = (seconds % 3600) // 60 + return f"{hours}시간 {mins}분" + + def format_speed(self, bytes_per_sec): + """속도를 읽기 좋은 형식으로 변환""" + if bytes_per_sec is None: + return "" + if bytes_per_sec < 1024: + return f"{bytes_per_sec:.0f} B/s" + elif bytes_per_sec < 1024 * 1024: + return f"{bytes_per_sec / 1024:.1f} KB/s" + else: + return f"{bytes_per_sec / (1024 * 1024):.2f} MB/s" + + def download(self): + """yt-dlp Python 모듈로 다운로드 수행""" + try: + import yt_dlp + except ImportError: + return False, "yt-dlp를 찾을 수 없습니다. pip install yt-dlp 로 설치해주세요." + + try: + self.start_time = time.time() + + # 출력 디렉토리 생성 + output_dir = os.path.dirname(self.output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 진행률 콜백 + def progress_hook(d): + if self.cancelled: + raise Exception("Cancelled") + + if d['status'] == 'downloading': + # 진행률 추출 + total = d.get('total_bytes') or d.get('total_bytes_estimate') or 0 + downloaded = d.get('downloaded_bytes', 0) + speed = d.get('speed', 0) + + if total > 0: + self.percent = (downloaded / total) * 100 + + self.current_speed = self.format_speed(speed) if speed else "" + + if self.start_time: + elapsed = time.time() - self.start_time + self.elapsed_time = self.format_time(elapsed) + + # 콜백 호출 + if self.callback: + self.callback( + percent=int(self.percent), + current=int(self.percent), + total=100, + speed=self.current_speed, + elapsed=self.elapsed_time + ) + + elif d['status'] == 'finished': + logger.info(f"yt-dlp download finished: {d.get('filename', '')}") + + # yt-dlp 옵션 설정 + ydl_opts = { + 'outtmpl': self.output_path, + 'progress_hooks': [progress_hook], + 'quiet': False, + 'no_warnings': False, + 'noprogress': False, + } + + # 헤더 추가 + http_headers = {} + if self.headers: + if self.headers.get('Referer'): + http_headers['Referer'] = self.headers['Referer'] + if self.headers.get('User-Agent'): + http_headers['User-Agent'] = self.headers['User-Agent'] + + if http_headers: + ydl_opts['http_headers'] = http_headers + + logger.info(f"yt-dlp downloading: {self.url}") + logger.info(f"Output path: {self.output_path}") + logger.info(f"Headers: {http_headers}") + + # 다운로드 실행 + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([self.url]) + + # 파일 존재 확인 + if os.path.exists(self.output_path): + return True, "Download completed" + else: + # yt-dlp가 확장자를 변경했을 수 있음 + base_name = os.path.splitext(self.output_path)[0] + for ext in ['.mp4', '.mkv', '.webm', '.ts']: + possible_path = base_name + ext + if os.path.exists(possible_path): + if possible_path != self.output_path: + os.rename(possible_path, self.output_path) + return True, "Download completed" + + return False, "Output file not found" + + except Exception as e: + error_msg = str(e) + logger.error(f"yt-dlp download error: {error_msg}") + return False, f"yt-dlp 실패: {error_msg}" + + def cancel(self): + """다운로드 취소""" + self.cancelled = True diff --git a/mod_linkkf.py b/mod_linkkf.py index 9b2cbaf..575dd85 100644 --- a/mod_linkkf.py +++ b/mod_linkkf.py @@ -30,25 +30,8 @@ from lxml import html from plugin import PluginModuleBase from requests_cache import CachedSession -packages = ["beautifulsoup4", "requests-cache", "cloudscraper"] - -for package in packages: - try: - import package - - except ModuleNotFoundError: - if package == "playwright": - pass - # os.system(f"pip3 install playwright") - # os.system(f"playwright install") - except ImportError: - # main(["install", package]) - if package == "playwright": - pass - # os.system(f"pip3 install {package}") - # os.system(f"playwright install") - else: - os.system(f"pip3 install {package}") +# cloudscraper는 lazy import로 처리 +import cloudscraper from anime_downloader.lib.ffmpeg_queue_v1 import FfmpegQueue, FfmpegQueueEntity from anime_downloader.lib.util import Util @@ -75,6 +58,7 @@ class LogicLinkkf(PluginModuleBase): download_queue = None download_thread = None current_download_count = 0 + _scraper = None # cloudscraper 싱글톤 cache_path = os.path.dirname(__file__) @@ -119,6 +103,7 @@ class LogicLinkkf(PluginModuleBase): "linkkf_image_url_prefix_series": "", "linkkf_image_url_prefix_episode": "", "linkkf_discord_notify": "True", + "linkkf_download_method": "ffmpeg", # ffmpeg or ytdlp } # default_route_socketio(P, self) default_route_socketio_module(self, attach="/setting") @@ -230,7 +215,60 @@ class LogicLinkkf(PluginModuleBase): ret = {"ret": "error", "log": "Queue not initialized"} return jsonify(ret) elif sub == "add_queue_checked_list": - return jsonify({"ret": "not_implemented"}) + # 선택된 에피소드 일괄 추가 (백그라운드 스레드로 처리) + import threading + from flask import current_app + + logger.info("========= add_queue_checked_list START =========") + ret = {"ret": "success", "message": "백그라운드에서 추가 중..."} + try: + form_data = request.form.get("data") + if not form_data: + ret["ret"] = "error" + ret["log"] = "No data received" + return jsonify(ret) + + episode_list = json.loads(form_data) + logger.info(f"Received {len(episode_list)} episodes to add in background") + + # Flask app 참조 저장 (스레드에서 사용) + app = current_app._get_current_object() + + # 백그라운드 스레드에서 추가 작업 수행 + def add_episodes_background(flask_app, downloader_self, episodes): + added = 0 + skipped = 0 + with flask_app.app_context(): + for episode_info in episodes: + try: + result = downloader_self.add(episode_info) + if result in ["enqueue_db_append", "enqueue_db_exist"]: + added += 1 + logger.debug(f"Added episode {episode_info.get('_id')}") + else: + skipped += 1 + logger.debug(f"Skipped episode {episode_info.get('_id')}: {result}") + except Exception as ep_err: + logger.error(f"Error adding episode: {ep_err}") + skipped += 1 + + logger.info(f"add_queue_checked_list completed: added={added}, skipped={skipped}") + + thread = threading.Thread( + target=add_episodes_background, + args=(app, self, episode_list) + ) + thread.daemon = True + thread.start() + + ret["count"] = len(episode_list) + + except Exception as e: + logger.error(f"add_queue_checked_list error: {e}") + logger.error(traceback.format_exc()) + ret["ret"] = "error" + ret["log"] = str(e) + return jsonify(ret) elif sub == "web_list": return jsonify({"ret": "not_implemented"}) elif sub == "db_remove": @@ -336,67 +374,50 @@ class LogicLinkkf(PluginModuleBase): logger.error(f"socketio_callback error: {e}") @staticmethod - def get_html(url, cached=False): + def _extract_cat1_urls(html_content): + """cat1 = [...] 패턴에서 URL 목록 추출 (중복 코드 제거용 헬퍼)""" + regex = r"cat1 = [^\[]*([^\]]*)" + cat_match = re.findall(regex, html_content) + if not cat_match: + return [] + url_regex = r"\"([^\"]*)\"" + return re.findall(url_regex, cat_match[0]) + + @staticmethod + def get_html(url, cached=False, timeout=10): try: if LogicLinkkf.referer is None: - LogicLinkkf.referer = f"{ModelSetting.get('linkkf_url')}" + LogicLinkkf.referer = f"{P.ModelSetting.get('linkkf_url')}" - # return LogicLinkkfYommi.get_html_requests(url) - return LogicLinkkf.get_html_cloudflare(url) + return LogicLinkkf.get_html_cloudflare(url, timeout=timeout) except Exception as e: logger.error("Exception:%s", e) logger.error(traceback.format_exc()) @staticmethod - def get_html_cloudflare(url, cached=False): - logger.debug(f"cloudflare protection bypass {'=' * 30}") - + def get_html_cloudflare(url, cached=False, timeout=10): + """Cloudflare 보호 우회를 위한 HTTP 요청 (싱글톤 패턴)""" user_agents_list = [ "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", ] - # ua = UserAgent(verify_ssl=False) LogicLinkkf.headers["User-Agent"] = random.choice(user_agents_list) + LogicLinkkf.headers["Referer"] = LogicLinkkf.referer or "" - LogicLinkkf.headers["Referer"] = LogicLinkkf.referer + # cloudscraper 싱글톤 패턴 - 매 요청마다 생성하지 않음 + if LogicLinkkf._scraper is None: + LogicLinkkf._scraper = cloudscraper.create_scraper( + delay=10, + browser={"custom": "linkkf"}, + ) - # logger.debug(f"headers:: {LogicLinkkfYommi.headers}") - - if LogicLinkkf.session is None: - LogicLinkkf.session = requests.Session() - - # LogicLinkkfYommi.session = requests.Session() - # re_sess = requests.Session() - # logger.debug(LogicLinkkfYommi.session) - - # sess = cloudscraper.create_scraper( - # # browser={"browser": "firefox", "mobile": False}, - # browser={"browser": "chrome", "mobile": False}, - # debug=True, - # sess=LogicLinkkfYommi.session, - # delay=10, - # ) - # scraper = cloudscraper.create_scraper(sess=re_sess) - scraper = cloudscraper.create_scraper( - # debug=True, - delay=10, - sess=LogicLinkkf.session, - browser={ - "custom": "linkkf", - }, - ) - - # print(scraper.get(url, headers=LogicLinkkfYommi.headers).content) - # print(scraper.get(url).content) - # return scraper.get(url, headers=LogicLinkkfYommi.headers).content - # logger.debug(LogicLinkkfYommi.headers) - return scraper.get( + return LogicLinkkf._scraper.get( url, headers=LogicLinkkf.headers, - timeout=10, + timeout=timeout, ).content.decode("utf8", errors="replace") @staticmethod @@ -410,7 +431,7 @@ class LogicLinkkf(PluginModuleBase): else: code = str(args[0]) - print(code) + logger.debug(f"add_whitelist code: {code}") whitelist_program = P.ModelSetting.get("linkkf_auto_code_list") # whitelist_programs = [ @@ -462,15 +483,19 @@ class LogicLinkkf(PluginModuleBase): @staticmethod def extract_video_url_from_playid(playid_url): """ - linkkf.live의 playid URL에서 실제 비디오 URL(m3u8)을 추출합니다. + linkkf.live의 playid URL에서 실제 비디오 URL(m3u8)과 자막 URL(vtt)을 추출합니다. 예시: - playid_url: https://linkkf.live/playid/403116/?server=12&slug=11 - iframe: https://play.sub3.top/r2/play.php?id=n8&url=403116s11 - m3u8: https://n8.hlz3.top/403116s11/index.m3u8 + + Returns: + (video_url, referer_url, vtt_url) """ video_url = None referer_url = None + vtt_url = None try: logger.info(f"Extracting video URL from: {playid_url}") @@ -497,7 +522,7 @@ class LogicLinkkf(PluginModuleBase): iframe_src = iframe.get("src") logger.info(f"Found iframe: {iframe_src}") - # Step 2: iframe 페이지에서 m3u8 URL 추출 + # Step 2: iframe 페이지에서 m3u8 URL과 vtt URL 추출 iframe_headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Referer": playid_url @@ -522,6 +547,21 @@ class LogicLinkkf(PluginModuleBase): video_url = source_match.group(1) logger.info(f"Found source URL: {video_url}") + # VTT 자막 URL 추출 + # 예: + vtt_pattern = re.compile(r"]+src=['\"]([^'\"]*\.vtt)['\"]") + vtt_match = vtt_pattern.search(iframe_content) + if vtt_match: + vtt_url = vtt_match.group(1) + logger.info(f"Found VTT subtitle URL: {vtt_url}") + else: + # 대안 패턴: url: '...vtt' + vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]") + vtt_match2 = vtt_pattern2.search(iframe_content) + if vtt_match2: + vtt_url = vtt_match2.group(1) + logger.info(f"Found VTT subtitle URL (alt pattern): {vtt_url}") + referer_url = iframe_src else: logger.warning("No iframe found in playid page") @@ -530,7 +570,7 @@ class LogicLinkkf(PluginModuleBase): logger.error(f"Error extracting video URL: {e}") logger.error(traceback.format_exc()) - return video_url, referer_url + return video_url, referer_url, vtt_url def get_video_url_from_url(url, url2): video_url = None @@ -657,7 +697,7 @@ class LogicLinkkf(PluginModuleBase): referer_url = url2 elif "linkkf" in url2: - logger.deubg("linkkf routine") + logger.debug("linkkf routine") # linkkf 계열 처리 => URL 리스트를 받아오고, 하나 골라 방문 해서 m3u8을 받아온다. referer_url = url2 data2 = LogicLinkkf.get_html(url2) @@ -674,7 +714,7 @@ class LogicLinkkf(PluginModuleBase): return LogicLinkkf.get_video_url_from_url(url2, url3) elif url3.startswith("/"): url3 = urlparse.urljoin(url2, url3) - print("url3 = ", url3) + logger.debug(f"url3 = {url3}") LogicLinkkf.referer = url2 data3 = LogicLinkkf.get_html(url3) # logger.info('data3: %s', data3) @@ -706,7 +746,7 @@ class LogicLinkkf(PluginModuleBase): # logger.info("download url2 : %s , url3 : %s" % (url2, url3)) video_url = url3 elif "#V" in url2: # V 패턴 추가 - print("#v routine") + logger.debug("#v routine") data2 = LogicLinkkf.get_html(url2) @@ -1223,38 +1263,6 @@ class LogicLinkkf(PluginModuleBase): logger.error("Exception:%s", e) logger.error(traceback.format_exc()) - @staticmethod - def get_html( - url: str, - referer: str = None, - cached: bool = False, - stream: bool = False, - timeout: int = 5, - ): - data = "" - headers = { - "referer": "https://linkkf.live", - "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36" - "Mozilla/5.0 (Macintosh; Intel " - "Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 " - "Whale/3.12.129.46 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - } - try: - if LogicOhli24.session is None: - LogicOhli24.session = requests.session() - - # logger.debug('get_html :%s', url) - headers["Referer"] = "" if referer is None else referer - page_content = LogicOhli24.session.get( - url, headers=headers, timeout=timeout - ) - data = page_content.text - except Exception as e: - logger.error("Exception:%s", e) - logger.error(traceback.format_exc()) - return data def get_html_requests(self, url, cached=False): if LogicLinkkf.session is None: @@ -1486,9 +1494,9 @@ class LinkkfQueueEntity(FfmpegQueueEntity): self.filepath = os.path.join(self.savepath, self.filename) if self.filename else self.savepath logger.info(f"[DEBUG] filepath set to: '{self.filepath}'") - # playid URL에서 실제 비디오 URL 추출 + # playid URL에서 실제 비디오 URL과 자막 URL 추출 try: - video_url, referer_url = LogicLinkkf.extract_video_url_from_playid(playid_url) + video_url, referer_url, vtt_url = LogicLinkkf.extract_video_url_from_playid(playid_url) if video_url: self.url = video_url @@ -1498,6 +1506,11 @@ class LinkkfQueueEntity(FfmpegQueueEntity): "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" } logger.info(f"Video URL extracted: {self.url}") + + # 자막 URL 저장 + if vtt_url: + self.vtt = vtt_url + logger.info(f"Subtitle URL saved: {self.vtt}") else: # 추출 실패 시 원본 URL 사용 (fallback) self.url = playid_url @@ -1564,7 +1577,7 @@ class LinkkfQueueEntity(FfmpegQueueEntity): if len(tree.xpath(xpath_select_query)) > 0: # by k45734 - print("ok") + logger.debug("make_episode_info: select found") xpath_select_query = '//select[@class="switcher"]/option' for tag in tree.xpath(xpath_select_query): url2s2 = tag.attrib["value"] @@ -1575,7 +1588,7 @@ class LinkkfQueueEntity(FfmpegQueueEntity): else: url2s.append(url2s2) else: - print(":: else ::") + logger.debug("make_episode_info: else branch") tt = re.search(r"var player_data=(.*?)<", data, re.S) json_string = tt.group(1) diff --git a/mod_ohli24.py b/mod_ohli24.py index ce48727..9d3bfc1 100644 --- a/mod_ohli24.py +++ b/mod_ohli24.py @@ -28,16 +28,8 @@ from flask import request, render_template, jsonify from lxml import html from sqlalchemy import or_, desc -pkgs = ["bs4", "jsbeautifier", "aiohttp", "lxml", "loguru"] -for pkg in pkgs: - try: - importlib.import_module(pkg) - # except ImportError: - except ImportError: - subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"]) - # main(["install", pkg]) - subprocess.check_call([sys.executable, "-m", "pip", "install", pkg]) - importlib.import_module(pkg) +# third-party +import requests # third party package import aiohttp @@ -101,13 +93,14 @@ class LogicOhli24(PluginModuleBase): self.db_default = { "ohli24_db_version": "1", - "ohli24_url": "https://ohli24.org", + "ohli24_url": "https://ani.ohli24.com", "ohli24_download_path": os.path.join(path_data, P.package_name, "ohli24"), "ohli24_auto_make_folder": "True", f"{self.name}_recent_code": "", "ohli24_auto_make_season_folder": "True", "ohli24_finished_insert": "[완결]", "ohli24_max_ffmpeg_process_count": "1", + f"{self.name}_download_method": "ffmpeg", # ffmpeg or ytdlp "ohli24_order_desc": "False", "ohli24_auto_start": "False", "ohli24_interval": "* 5 * * *", @@ -469,13 +462,6 @@ class LogicOhli24(PluginModuleBase): return self.current_data if code.startswith("http"): - - # if code.split('c/')[1] is not None: - # code = code.split('c/')[1] - # code_type = 'c' - # elif code.split('e/')[1] is not None: - # code_type = 'e' - # code = code.split('e/')[1] if "/c/" in code: code = code.split("c/")[1] code_type = "c" @@ -485,43 +471,84 @@ class LogicOhli24(PluginModuleBase): logger.info(f"code:::: {code}") + base_url = P.ModelSetting.get("ohli24_url").rstrip("/") # 뒤에 슬래시 제거 + if code_type == "c": - url = P.ModelSetting.get("ohli24_url") + "/c/" + code + url = base_url + "/c/" + code elif code_type == "e": - url = P.ModelSetting.get("ohli24_url") + "/e/" + code + url = base_url + "/e/" + code else: - url = P.ModelSetting.get("ohli24_url") + "/e/" + code + url = base_url + "/e/" + code if wr_id is not None: - # print(len(wr_id)) if len(wr_id) > 0: - url = P.ModelSetting.get("ohli24_url") + "/bbs/board.php?bo_table=" + bo_table + "&wr_id=" + wr_id - else: - pass + url = base_url + "/bbs/board.php?bo_table=" + bo_table + "&wr_id=" + wr_id logger.debug("url:::> %s", url) response_data = LogicOhli24.get_html(url, timeout=10) + logger.debug(f"HTML length: {len(response_data)}") + # 디버깅: HTML 일부 출력 + if len(response_data) < 1000: + logger.warning(f"Short HTML response: {response_data[:500]}") + else: + # item-subject 있는지 확인 + if "item-subject" in response_data: + logger.info("Found item-subject in HTML") + else: + logger.warning("item-subject NOT found in HTML") + if "itemprop=\"image\"" in response_data: + logger.info("Found itemprop=image in HTML") + else: + logger.warning("itemprop=image NOT found in HTML") + tree = html.fromstring(response_data) - title = tree.xpath('//div[@class="view-title"]/h1/text()')[0] - # image = tree.xpath('//div[@class="view-info"]/div[@class="image"]/div/img')[0]['src'] - image = tree.xpath('//div[@class="image"]/div/img/@src')[0] - image = image.replace("..", P.ModelSetting.get("ohli24_url")) - des_items = tree.xpath('//div[@class="list"]/p') - des = {} - des_key = [ - "_otit", - "_dir", - "_pub", - "_tag", - "_classifi", - "_country", - "_grade", - "_total_chapter", - "_show_time", - "_release_year", - "_drawing", + + # 제목 추출 - h1[itemprop="headline"] 또는 기타 h1 + title = "" + title_xpaths = [ + '//h1[@itemprop="headline"]/text()', + '//h1[@itemprop="headline"]//text()', + '//div[@class="view-wrap"]//h1/text()', + '//h1/text()', ] + for xpath in title_xpaths: + result = tree.xpath(xpath) + if result: + title = "".join(result).strip() + if title and title != "OHLI24": + break + + if not title or "OHLI24" in title: + title = urllib.parse.unquote(code) + + logger.info(f"title:: {title}") + + # 이미지 추출 - img[itemprop="image"] 또는 img.img-tag + image = "" + image_xpaths = [ + '//img[@itemprop="image"]/@src', + '//img[@class="img-tag"]/@src', + '//div[@class="view-wrap"]//img/@src', + '//div[contains(@class, "view-img")]//img/@src', + ] + for xpath in image_xpaths: + result = tree.xpath(xpath) + if result: + image = result[0] + if image and not "logo" in image.lower(): + break + + if image: + if image.startswith(".."): + image = image.replace("..", P.ModelSetting.get("ohli24_url")) + elif not image.startswith("http"): + image = P.ModelSetting.get("ohli24_url") + image + + logger.info(f"image:: {image}") + + # 설명 정보 추출 + des = {} description_dict = { "원제": "_otit", "원작": "_org", @@ -543,70 +570,88 @@ class LogicOhli24(PluginModuleBase): "런타임": "_run_time", "작화": "_drawing", } + + # view-fields에서 메타데이터 추출 시도 + des_items = tree.xpath('//div[@class="list"]/p') + if not des_items: + des_items = tree.xpath('//div[contains(@class, "view-field")]') + + for item in des_items: + try: + span = item.xpath(".//span//text()") + if span and span[0] in description_dict: + key = description_dict[span[0]] + value = item.xpath(".//span/text()") + des[key] = value[1] if len(value) > 1 else "" + except Exception: + pass - list_body_li = tree.xpath('//ul[@class="list-body"]/li') - # logger.debug(f"list_body_li:: {list_body_li}") + # 에피소드 목록 추출 - a.item-subject episodes = [] - vi = None - for li in list_body_li: - # logger.debug(li) - title = li.xpath(".//a/text()")[0].strip() - thumbnail = image - # logger.info(li.xpath('//a[@class="item-subject"]/@href')) - link = P.ModelSetting.get("ohli24_url") + li.xpath('.//a[@class="item-subject"]/@href')[0] - # logger.debug(f"link:: {link}") - _date = li.xpath('.//div[@class="wr-date"]/text()')[0] - m = hashlib.md5(title.encode("utf-8")) - # _vi = hashlib.md5(title.encode('utf-8').hexdigest()) - # logger.info(m.hexdigest()) - _vi = m.hexdigest() - episodes.append( - { - "title": title, - "link": link, + episode_links = tree.xpath('//a[@class="item-subject"]') + + for a_elem in episode_links: + try: + ep_title = "".join(a_elem.xpath(".//text()")).strip() + href = a_elem.get("href", "") + + if not href.startswith("http"): + href = P.ModelSetting.get("ohli24_url").rstrip("/") + href + + # 부모에서 날짜 찾기 + parent = a_elem.getparent() + _date = "" + if parent is not None: + grandparent = parent.getparent() + if grandparent is not None: + date_result = grandparent.xpath('.//div[@class="wr-date"]/text()') + if not date_result: + date_result = grandparent.xpath('.//*[contains(@class, "date")]/text()') + _date = date_result[0].strip() if date_result else "" + + m = hashlib.md5(ep_title.encode("utf-8")) + _vi = m.hexdigest() + + episodes.append({ + "title": ep_title, + "link": href, "thumbnail": image, "date": _date, "day": _date, - "_id": title, - "va": link, + "_id": ep_title, + "va": href, "_vi": _vi, "content_code": code, - } - ) + }) + except Exception as ep_err: + logger.warning(f"Episode parse error: {ep_err}") + continue + + logger.info(f"Found {len(episodes)} episodes") - # logger.info("des_items length:: %s", len(des_items)) - for idx, item in enumerate(des_items): - # key = des_key[idx] - span = item.xpath(".//span//text()") - # logger.info(span) - key = description_dict[span[0]] - try: - des[key] = item.xpath(".//span/text()")[1] - except IndexError: - des[key] = "" - - # logger.info(f"des::>> {des}") - image = image.replace("..", P.ModelSetting.get("ohli24_url")) - # logger.info("images:: %s", image) - logger.info("title:: %s", title) - - ser_description = tree.xpath('//div[@class="view-stocon"]/div[@class="c"]/text()') + # 줄거리 추출 + ser_description_result = tree.xpath('//div[@class="view-stocon"]/div[@class="c"]/text()') + if not ser_description_result: + ser_description_result = tree.xpath('//div[contains(@class, "view-story")]//text()') + ser_description = ser_description_result if ser_description_result else [] data = { "title": title, "image": image, - "date": "2022.01.11 00:30 (화)", + "date": "", + "day": "", "ser_description": ser_description, "des": des, "episode": episodes, + "code": code, } if not P.ModelSetting.get_bool("ohli24_order_desc"): data["episode"] = list(reversed(data["episode"])) data["list_order"] = "desc" + self.current_data = data return data - # logger.info(response_text) except Exception as e: P.logger.error("Exception:%s", e) @@ -775,50 +820,88 @@ class LogicOhli24(PluginModuleBase): return True @staticmethod - def get_html(url, headers=None, referer=None, stream=False, timeout=10, stealth=False): - data = "" - if headers is None: - headers = { - "referer": f"https://ohli24.org", - "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36" - "Mozilla/5.0 (Macintosh; Intel " - "Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 " - "Whale/3.12.129.46 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - } - - try: - - print("cloudflare protection bypass ==================P") - response_date = "" - if headers is not None: - LogicOhli24.headers = headers - if LogicOhli24.session is None: - LogicOhli24.session = requests.session() - - LogicOhli24.session.verify = False - # logger.debug('get_html :%s', url) - # LogicOhli24.headers["Referer"] = "" if referer is None else referer - # logger.debug(f"referer:: {referer}") - if referer: - LogicOhli24.headers["Referer"] = referer - - # logger.info(headers) - # logger.debug(f"LogicOhli24.headers:: {LogicOhli24.headers}") + def get_html(url, headers=None, referer=None, stream=False, timeout=60, stealth=False, data=None, method='GET'): + """별도 스레드에서 cloudscraper 실행하여 gevent SSL 충돌 및 Cloudflare 우회""" + from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError + import time + from urllib import parse + + # URL 인코딩 (한글 주소 대응) + if '://' in url: + try: + scheme, netloc, path, params, query, fragment = parse.urlparse(url) + # 이미 인코딩된 경우를 대비해 unquote 후 다시 quote + path = parse.quote(parse.unquote(path), safe='/') + query = parse.quote(parse.unquote(query), safe='=&%') + url = parse.urlunparse((scheme, netloc, path, params, query, fragment)) + except: + pass + def fetch_url_with_cloudscraper(url, headers, timeout, data, method): + """별도 스레드에서 cloudscraper로 실행""" + import cloudscraper + scraper = cloudscraper.create_scraper( + browser={'browser': 'chrome', 'platform': 'darwin', 'mobile': False}, + delay=10 + ) + # 프록시 설정 (필요시 사용) proxies = { "http": "http://192.168.0.2:3138", "https": "http://192.168.0.2:3138", } - - page_content = LogicOhli24.session.get(url, headers=LogicOhli24.headers, timeout=timeout, proxies=proxies) - response_data = page_content.text - # logger.debug(response_data) - return response_data - except Exception as e: - logger.error("Exception:%s", e) - logger.error(traceback.format_exc()) + if method.upper() == 'POST': + response = scraper.post(url, headers=headers, data=data, timeout=timeout, proxies=proxies) + else: + response = scraper.get(url, headers=headers, timeout=timeout, proxies=proxies) + return response.text + + response_data = "" + + if headers is None: + headers = { + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "accept-language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", + } + + if referer: + # Referer 인코딩 + if '://' in referer: + try: + scheme, netloc, path, params, query, fragment = parse.urlparse(referer) + path = parse.quote(parse.unquote(path), safe='/') + query = parse.quote(parse.unquote(query), safe='=&%') + referer = parse.urlunparse((scheme, netloc, path, params, query, fragment)) + except: + pass + headers["referer"] = referer + elif "referer" not in headers: + headers["referer"] = "https://ani.ohli24.com" + + max_retries = 3 + for attempt in range(max_retries): + try: + logger.debug(f"get_html (cloudscraper in thread) {method} attempt {attempt + 1}: {url}") + + # ThreadPoolExecutor로 별도 스레드에서 cloudscraper 실행 + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(fetch_url_with_cloudscraper, url, headers, timeout, data, method) + response_data = future.result(timeout=timeout + 10) + + if response_data and (len(response_data) > 10 or method.upper() == 'POST'): + logger.debug(f"get_html success, length: {len(response_data)}") + return response_data + else: + logger.warning(f"Short response (len={len(response_data) if response_data else 0})") + + except FuturesTimeoutError: + logger.warning(f"get_html attempt {attempt + 1} timed out") + except Exception as e: + logger.warning(f"get_html attempt {attempt + 1} failed: {e}") + + if attempt < max_retries - 1: + time.sleep(3) + return response_data ######################################################### @@ -1025,166 +1108,97 @@ class Ohli24QueueEntity(FfmpegQueueEntity): # Get episode info from OHLI24 site def make_episode_info(self): try: - base_url = "https://a21.ohli24.com" base_url = P.ModelSetting.get("ohli24_url") - iframe_url = "" - - # https://ohli24.org/e/%EB%85%B9%EC%9D%84%20%EB%A8%B9%EB%8A%94%20%EB%B9%84%EC%8A%A4%EC%BD%94%206%ED%99%94 + + # 에피소드 페이지 URL (예: https://ani.ohli24.com/e/원펀맨 3기 1화) url = self.info["va"] - + if "//e/" in url: + url = url.replace("//e/", "/e/") + ourls = parse.urlparse(url) - + headers = { "Referer": f"{ourls.scheme}://{ourls.netloc}", - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", } - logger.debug(headers) - logger.debug("make_episode_info()::url==> %s", url) + logger.debug(f"make_episode_info()::url==> {url}") logger.info(f"self.info:::> {self.info}") - - # text = requests.get(url, headers=headers).text + + # Step 1: 에피소드 페이지에서 cdndania.com iframe 찾기 text = LogicOhli24.get_html(url, headers=headers, referer=f"{ourls.scheme}://{ourls.netloc}") - # logger.debug(text) - soup1 = BeautifulSoup(text, "lxml") - pattern = re.compile(r"url : \"\.\.(.*)\"") - script = soup1.find("script", text=pattern) - - if script: - match = pattern.search(script.text) - if match: - iframe_url = match.group(1) - logger.info("iframe_url::> %s", iframe_url) - - # logger.debug(soup1.find("iframe")) - - # iframe_url = soup1.find("iframe")["src"] - # logger.info("iframe_url::> %s", iframe_url) - - print(base_url) - print(iframe_url) - # exit() - iframe_src = f'{P.ModelSetting.get("ohli24_url")}{iframe_url}' - - iframe_html = LogicOhli24.get_html(iframe_src, headers=headers, timeout=600) - - # print(iframe_html) - pattern = r"