#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022/02/08 3:44 PM
# @Author : yommi
# @Site :
# @File : logic_ohli24
# @Software: PyCharm
import asyncio
import hashlib
import importlib
import json
import os
import re
import subprocess
import sys
import threading
import traceback
import urllib
from datetime import datetime, date
from urllib import parse
# third-party
import requests
# third-party
from flask import request, render_template, jsonify
from lxml import html
from sqlalchemy import or_, desc
# third-party
import requests
# third party package
import aiohttp
from bs4 import BeautifulSoup
import jsbeautifier
# sjva 공용
from framework import db, scheduler, path_data, socketio
from framework.util import Util
# from framework.common.util import headers
from framework import F
from plugin import PluginModuleBase
from .lib.ffmpeg_queue_v1 import FfmpegQueueEntity, FfmpegQueue
from support.expand.ffmpeg import SupportFfmpeg
from .lib.util import Util
# from support_site import SupportKakaotv
from .setup import *
logger = P.logger
print("*=" * 50)
name = "ohli24"
class LogicOhli24(PluginModuleBase):
current_headers = None
current_data = None
referer = None
origin_url = None
episode_url = None
cookies = None
session = requests.Session()
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.5249.114 Whale/3.17.145.12 Safari/537.36",
"authority": "ndoodle.xyz",
"accept": "*/*",
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://ndoodle.xyz/video/e6e31529675d0ef99d777d729c423382",
}
useragent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, "
"like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36"
}
download_queue = None
download_thread = None
current_download_count = 0
def __init__(self, P):
super(LogicOhli24, self).__init__(P, "setting", scheduler_desc="ohli24 자동 다운로드")
self.name = name
self.db_default = {
"ohli24_db_version": "1",
"ohli24_url": "https://ani.ohli24.com",
"ohli24_download_path": os.path.join(path_data, P.package_name, "ohli24"),
"ohli24_auto_make_folder": "True",
f"{self.name}_recent_code": "",
"ohli24_auto_make_season_folder": "True",
"ohli24_finished_insert": "[완결]",
"ohli24_max_ffmpeg_process_count": "1",
f"{self.name}_download_method": "ffmpeg", # ffmpeg or ytdlp
"ohli24_order_desc": "False",
"ohli24_auto_start": "False",
"ohli24_interval": "* 5 * * *",
"ohli24_auto_mode_all": "False",
"ohli24_auto_code_list": "",
"ohli24_current_code": "",
"ohli24_uncompleted_auto_enqueue": "False",
"ohli24_image_url_prefix_series": "https://www.jetcloud.cc/series/",
"ohli24_image_url_prefix_episode": "https://www.jetcloud-list.cc/thumbnail/",
"ohli24_discord_notify": "True",
}
self.queue = None
# default_route_socketio(P, self)
default_route_socketio_module(self, attach="/queue")
@staticmethod
def db_init():
pass
# try:
# for key, value in P.Logic.db_default.items():
# if db.session.query(ModelSetting).filter_by(key=key).count() == 0:
# db.session.add(ModelSetting(key, value))
# db.session.commit()
# except Exception as e:
# logger.error('Exception:%s', e)
# logger.error(traceback.format_exc())
def process_menu(self, sub, req):
arg = P.ModelSetting.to_dict()
arg["sub"] = self.name
if sub in ["setting", "queue", "list", "category", "request", "search"]:
if sub == "request" and req.args.get("content_code") is not None:
arg["ohli24_current_code"] = req.args.get("content_code")
elif sub == "setting":
job_id = "%s_%s" % (self.P.package_name, self.name)
arg["scheduler"] = str(scheduler.is_include(job_id))
arg["is_running"] = str(scheduler.is_running(job_id))
return render_template(
"{package_name}_{module_name}_{sub}.html".format(
package_name=P.package_name, module_name=self.name, sub=sub
),
arg=arg,
)
return render_template("sample.html", title="%s - %s" % (P.package_name, sub))
# @staticmethod
def process_ajax(self, sub, req):
try:
data = []
cate = request.form.get("type", None)
page = request.form.get("page", None)
if sub == "analysis":
code = request.form["code"]
# cate = request.form["type"]
wr_id = request.form.get("wr_id", None)
bo_table = request.form.get("bo_table", None)
P.ModelSetting.set("ohli24_current_code", code)
data = self.get_series_info(code, wr_id, bo_table)
P.ModelSetting.set(f"{self.name}_recent_code", code)
self.current_data = data
return jsonify({"ret": "success", "data": data, "code": code})
elif sub == "anime_list":
data = self.get_anime_info(cate, page)
return jsonify({"ret": "success", "cate": cate, "page": page, "data": data})
elif sub == "complete_list":
logger.debug("cate:: %s", cate)
page = request.form["page"]
data = self.get_anime_info(cate, page)
return jsonify({"ret": "success", "cate": cate, "page": page, "data": data})
elif sub == "search":
query = request.form["query"]
page = request.form["page"]
data = self.get_search_result(query, page, cate)
return jsonify(
{
"ret": "success",
"cate": cate,
"page": page,
"query": query,
"data": data,
}
)
elif sub == "add_queue":
ret = {}
info = json.loads(request.form["data"])
logger.info(f"info:: {info}")
ret["ret"] = self.add(info)
return jsonify(ret)
# todo: new version
# info = json.loads(request.form["data"])
# logger.info(info)
# logger.info(self.current_data)
# # 1. db 조회
# db_item = ModelOhli24Program.get(info['_id'])
# logger.debug(db_item)
#
# if db_item is not None:
# print(f"db_item is not None")
# pass
# else:
# if db_item == None:
# db_item = ModelOhli24Program(info['_id'], self.get_episode(info['_id']))
# db_item.save()
elif sub == "entity_list":
return jsonify(self.queue.get_entity_list())
elif sub == "queue_list":
print(sub)
return {"test"}
elif sub == "queue_command":
ret = self.queue.command(req.form["command"], int(req.form["entity_id"]))
return jsonify(ret)
elif sub == "add_queue_checked_list":
data = json.loads(request.form["data"])
def func():
count = 0
for tmp in data:
add_ret = self.add(tmp)
if add_ret.startswith("enqueue"):
self.socketio_callback("list_refresh", "")
count += 1
notify = {
"type": "success",
"msg": "%s 개의 에피소드를 큐에 추가 하였습니다." % count,
}
socketio.emit("notify", notify, namespace="/framework", broadcast=True)
thread = threading.Thread(target=func, args=())
thread.daemon = True
thread.start()
return jsonify("")
elif sub == "web_list3":
print("web_list3")
print(request)
P.logger.debug(req)
P.logger.debug("web_list3")
ret = ModelOhli24Item.web_list(req)
print(ret)
# return jsonify("test")
# return jsonify(ModelOhli24Item.web_list(req))
elif sub == "web_list2":
logger.debug("web_list2")
return jsonify(ModelOhli24Item.web_list(request))
elif sub == "db_remove":
return jsonify(ModelOhli24Item.delete_by_id(req.form["id"]))
elif sub == "add_whitelist":
try:
# params = request.get_data()
# logger.debug(f"params: {params}")
# data_code = request.args.get("data_code")
params = request.get_json()
logger.debug(f"params:: {params}")
if params is not None:
code = params["data_code"]
logger.debug(f"params: {code}")
ret = LogicOhli24.add_whitelist(code)
else:
ret = LogicOhli24.add_whitelist()
return jsonify(ret)
except Exception as e:
logger.error(f"Exception: {e}")
logger.error(traceback.format_exc())
except Exception as e:
P.logger.error(f"Exception: {e}")
P.logger.error(traceback.format_exc())
def get_episode(self, clip_id):
for _ in self.current_data["episode"]:
if _["title"] == clip_id:
return _
def process_command(self, command, arg1, arg2, arg3, req):
ret = {"ret": "success"}
if command == "queue_list":
logger.debug("queue_list")
logger.debug(f"self.queue.get_entity_list():: {self.queue.get_entity_list()}")
ret = [x for x in self.queue.get_entity_list()]
return ret
elif command == "download_program":
_pass = arg2
db_item = ModelOhli24Program.get(arg1)
if _pass == "false" and db_item is not None:
ret["ret"] = "warning"
ret["msg"] = "이미 DB에 있는 항목 입니다."
elif (
_pass == "true"
and db_item is not None
and ModelOhli24Program.get_by_id_in_queue(db_item.id) is not None
):
ret["ret"] = "warning"
ret["msg"] = "이미 큐에 있는 항목 입니다."
else:
if db_item is None:
db_item = ModelOhli24Program(arg1, self.get_episode(arg1))
db_item.save()
db_item.init_for_queue()
self.download_queue.put(db_item)
ret["msg"] = "다운로드를 추가 하였습니다."
elif command == "list":
ret = []
for ins in SupportFfmpeg.get_list():
ret.append(ins.get_data())
elif command == "queue_command":
if arg1 == "cancel":
pass
elif arg1 == "reset":
logger.debug("reset")
# if self.queue is not None:
# with self.queue.mutex:
# self.queue.queue.clear()
if self.download_queue is not None:
with self.download_queue.mutex:
self.download_queue.queue.clear()
return jsonify(ret)
@staticmethod
def add_whitelist(*args):
ret = {}
logger.debug(f"args: {args}")
try:
if len(args) == 0:
code = str(LogicOhli24.current_data["code"])
else:
code = str(args[0])
print(code)
whitelist_program = P.ModelSetting.get("ohli24_auto_code_list")
# whitelist_programs = [
# str(x.strip().replace(" ", ""))
# for x in whitelist_program.replace("\n", "|").split("|")
# ]
whitelist_programs = [str(x.strip()) for x in whitelist_program.replace("\n", "|").split("|")]
if code not in whitelist_programs:
whitelist_programs.append(code)
whitelist_programs = filter(lambda x: x != "", whitelist_programs) # remove blank code
whitelist_program = "|".join(whitelist_programs)
entity = (
db.session.query(P.ModelSetting).filter_by(key="ohli24_auto_code_list").with_for_update().first()
)
entity.value = whitelist_program
db.session.commit()
ret["ret"] = True
ret["code"] = code
if len(args) == 0:
return LogicOhli24.current_data
else:
return ret
else:
ret["ret"] = False
ret["log"] = "이미 추가되어 있습니다."
except Exception as e:
logger.error(f"Exception: {str(e)}")
logger.error(traceback.format_exc())
ret["ret"] = False
ret["log"] = str(e)
return ret
def setting_save_after(self, change_list):
if self.queue.get_max_ffmpeg_count() != P.ModelSetting.get_int("ohli24_max_ffmpeg_process_count"):
self.queue.set_max_ffmpeg_count(P.ModelSetting.get_int("ohli24_max_ffmpeg_process_count"))
def scheduler_function(self):
# Todo: 스케쥴링 함수 미구현
logger.debug(f"ohli24 scheduler_function::=========================")
content_code_list = P.ModelSetting.get_list("ohli24_auto_code_list", "|")
logger.debug(f"content_code_list::: {content_code_list}")
url_list = ["https://www.naver.com/", "https://www.daum.net/"]
week = ["월요일", "화요일", "수요일", "목요일", "금요일", "토요일", "일요일"]
today = date.today()
# print(today)
# print()
# print(today.weekday())
url = f'{P.ModelSetting.get("ohli24_url")}/bbs/board.php?bo_table=ing&sca={week[today.weekday()]}'
# print(url)
if "all" in content_code_list:
ret_data = LogicOhli24.get_auto_anime_info(self, url=url)
logger.debug(f"today_info:: {ret_data}")
for item in ret_data["anime_list"]:
# wr_id = request.form.get("wr_id", None)
# bo_table = request.form.get("bo_table", None)
wr_id = None
bo_table = None
data = []
# print(code)
# logger.info("code::: %s", code)
# logger.debug(item)
# 잠시 중지
# data = self.get_series_info(item["code"], wr_id, bo_table)
# logger.debug(data)
# result = asyncio.run(LogicOhli24.main(url_list))
# logger.debug(f"result:: {result}")
elif len(content_code_list) > 0:
for item in content_code_list:
url = P.ModelSetting.get("ohli24_url") + "/c/" + item
logger.debug(f"scheduling url: {url}")
# ret_data = LogicOhli24.get_auto_anime_info(self, url=url)
content_info = self.get_series_info(item, "", "")
# logger.debug(content_info)
for episode_info in content_info["episode"]:
add_ret = self.add(episode_info)
if add_ret.startswith("enqueue"):
self.socketio_callback("list_refresh", "")
# logger.debug(f"data: {data}")
# self.current_data = data
# db 에서 다운로드 완료 유무 체크
@staticmethod
async def get_data(url) -> str:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
content = await response.text()
# print(response)
return content
@staticmethod
async def main(url_list: list):
input_coroutines = [LogicOhli24.get_data(url_) for url_ in url_list]
res = await asyncio.gather(*input_coroutines)
return res
def get_series_info(self, code, wr_id, bo_table):
code_type = "c"
code = urllib.parse.quote(code)
try:
if self.current_data is not None and "code" in self.current_data and self.current_data["code"] == code:
return self.current_data
if code.startswith("http"):
if "/c/" in code:
code = code.split("c/")[1]
code_type = "c"
elif "/e/" in code:
code = code.split("e/")[1]
code_type = "e"
logger.info(f"code:::: {code}")
base_url = P.ModelSetting.get("ohli24_url").rstrip("/") # 뒤에 슬래시 제거
if code_type == "c":
url = base_url + "/c/" + code
elif code_type == "e":
url = base_url + "/e/" + code
else:
url = base_url + "/e/" + code
if wr_id is not None:
if len(wr_id) > 0:
url = base_url + "/bbs/board.php?bo_table=" + bo_table + "&wr_id=" + wr_id
logger.debug("url:::> %s", url)
response_data = LogicOhli24.get_html(url, timeout=10)
logger.debug(f"HTML length: {len(response_data)}")
# 디버깅: HTML 일부 출력
if len(response_data) < 1000:
logger.warning(f"Short HTML response: {response_data[:500]}")
else:
# item-subject 있는지 확인
if "item-subject" in response_data:
logger.info("Found item-subject in HTML")
else:
logger.warning("item-subject NOT found in HTML")
if "itemprop=\"image\"" in response_data:
logger.info("Found itemprop=image in HTML")
else:
logger.warning("itemprop=image NOT found in HTML")
tree = html.fromstring(response_data)
# 제목 추출 - h1[itemprop="headline"] 또는 기타 h1
title = ""
title_xpaths = [
'//h1[@itemprop="headline"]/text()',
'//h1[@itemprop="headline"]//text()',
'//div[@class="view-wrap"]//h1/text()',
'//h1/text()',
]
for xpath in title_xpaths:
result = tree.xpath(xpath)
if result:
title = "".join(result).strip()
if title and title != "OHLI24":
break
if not title or "OHLI24" in title:
title = urllib.parse.unquote(code)
logger.info(f"title:: {title}")
# 이미지 추출 - img[itemprop="image"] 또는 img.img-tag
image = ""
image_xpaths = [
'//img[@itemprop="image"]/@src',
'//img[@class="img-tag"]/@src',
'//div[@class="view-wrap"]//img/@src',
'//div[contains(@class, "view-img")]//img/@src',
]
for xpath in image_xpaths:
result = tree.xpath(xpath)
if result:
image = result[0]
if image and not "logo" in image.lower():
break
if image:
if image.startswith(".."):
image = image.replace("..", P.ModelSetting.get("ohli24_url"))
elif not image.startswith("http"):
image = P.ModelSetting.get("ohli24_url") + image
logger.info(f"image:: {image}")
# 설명 정보 추출
des = {}
description_dict = {
"원제": "_otit",
"원작": "_org",
"감독": "_dir",
"각본": "_scr",
"캐릭터 디자인": "_character_design",
"음악": "_sound",
"제작사": "_pub",
"장르": "_tag",
"분류": "_classifi",
"제작국가": "_country",
"방영일": "_date",
"등급": "_grade",
"총화수": "_total_chapter",
"상영시간": "_show_time",
"상영일": "_release_date",
"개봉년도": "_release_year",
"개봉일": "_opening_date",
"런타임": "_run_time",
"작화": "_drawing",
}
# view-fields에서 메타데이터 추출 시도
des_items = tree.xpath('//div[@class="list"]/p')
if not des_items:
des_items = tree.xpath('//div[contains(@class, "view-field")]')
for item in des_items:
try:
span = item.xpath(".//span//text()")
if span and span[0] in description_dict:
key = description_dict[span[0]]
value = item.xpath(".//span/text()")
des[key] = value[1] if len(value) > 1 else ""
except Exception:
pass
# 에피소드 목록 추출 - a.item-subject
episodes = []
episode_links = tree.xpath('//a[@class="item-subject"]')
for a_elem in episode_links:
try:
ep_title = "".join(a_elem.xpath(".//text()")).strip()
href = a_elem.get("href", "")
if not href.startswith("http"):
href = P.ModelSetting.get("ohli24_url").rstrip("/") + href
# 부모에서 날짜 찾기
parent = a_elem.getparent()
_date = ""
if parent is not None:
grandparent = parent.getparent()
if grandparent is not None:
date_result = grandparent.xpath('.//div[@class="wr-date"]/text()')
if not date_result:
date_result = grandparent.xpath('.//*[contains(@class, "date")]/text()')
_date = date_result[0].strip() if date_result else ""
m = hashlib.md5(ep_title.encode("utf-8"))
_vi = m.hexdigest()
episodes.append({
"title": ep_title,
"link": href,
"thumbnail": image,
"date": _date,
"day": _date,
"_id": ep_title,
"va": href,
"_vi": _vi,
"content_code": code,
})
except Exception as ep_err:
logger.warning(f"Episode parse error: {ep_err}")
continue
logger.info(f"Found {len(episodes)} episodes")
# 줄거리 추출
ser_description_result = tree.xpath('//div[@class="view-stocon"]/div[@class="c"]/text()')
if not ser_description_result:
ser_description_result = tree.xpath('//div[contains(@class, "view-story")]//text()')
ser_description = ser_description_result if ser_description_result else []
data = {
"title": title,
"image": image,
"date": "",
"day": "",
"ser_description": ser_description,
"des": des,
"episode": episodes,
"code": code,
}
if not P.ModelSetting.get_bool("ohli24_order_desc"):
data["episode"] = list(reversed(data["episode"]))
data["list_order"] = "desc"
self.current_data = data
return data
except Exception as e:
P.logger.error("Exception:%s", e)
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
def get_anime_info(self, cate, page):
print(cate, page)
try:
if cate == "ing":
url = P.ModelSetting.get("ohli24_url") + "/bbs/board.php?bo_table=" + cate + "&page=" + page
elif cate == "movie":
url = P.ModelSetting.get("ohli24_url") + "/bbs/board.php?bo_table=" + cate + "&page=" + page
else:
url = P.ModelSetting.get("ohli24_url") + "/bbs/board.php?bo_table=" + cate + "&page=" + page
# cate == "complete":
logger.info("url:::> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
data["anime_list"] = []
for item in tmp_items:
entity = {}
entity["link"] = item.xpath(".//a/@href")[0]
entity["code"] = entity["link"].split("/")[-1]
entity["title"] = item.xpath(".//div[@class='post-title']/text()")[0].strip()
# logger.debug(item.xpath(".//div[@class='img-item']/img/@src")[0])
# logger.debug(item.xpath(".//div[@class='img-item']/img/@data-ezsrc")[0])
# entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[
# 0
# ].replace("..", P.ModelSetting.get("ohli24_url"))
if len(item.xpath(".//div[@class='img-item']/img/@src")) > 0:
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[0].replace(
"..", P.ModelSetting.get("ohli24_url")
)
else:
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@data-ezsrc")[0]
data["ret"] = "success"
data["anime_list"].append(entity)
return data
except Exception as e:
P.logger.error("Exception:%s", e)
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
def get_auto_anime_info(self, url: str = ""):
try:
logger.info("url:::> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
data["anime_list"] = []
for item in tmp_items:
entity = {}
entity["link"] = item.xpath(".//a/@href")[0]
entity["code"] = entity["link"].split("/")[-1]
entity["title"] = item.xpath(".//div[@class='post-title']/text()")[0].strip()
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[0].replace(
"..", P.ModelSetting.get("ohli24_url")
)
data["ret"] = "success"
data["anime_list"].append(entity)
return data
except Exception as e:
P.logger.error("Exception:%s", e)
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
# @staticmethod
def get_search_result(self, query, page, cate):
try:
_query = urllib.parse.quote(query)
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/search.php?srows=24&gr_id=&sfl=wr_subject&stx="
+ _query
+ "&page="
+ page
)
logger.info("get_search_result()::url> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
data["anime_list"] = []
for item in tmp_items:
entity = {}
entity["link"] = item.xpath(".//a/@href")[0]
# entity["code"] = entity["link"].split("/")[-1]
entity["wr_id"] = entity["link"].split("=")[-1]
# logger.debug(item.xpath(".//div[@class='post-title']/text()").join())
entity["title"] = "".join(item.xpath(".//div[@class='post-title']/text()")).strip()
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[0].replace(
"..", P.ModelSetting.get("ohli24_url")
)
entity["code"] = item.xpath(".//div[@class='img-item']/img/@alt")[0]
data["ret"] = "success"
data["anime_list"].append(entity)
return data
except Exception as e:
P.logger.error(f"Exception: {str(e)}")
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
# @staticmethod
def plugin_load(self):
try:
# SupportFfmpeg.initialize(ffmpeg_modelsetting.get('ffmpeg_path'), os.path.join(F.config['path_data'], 'tmp'),
# self.callback_function, ffmpeg_modelsetting.get_int('max_pf_count'))
# plugin loading download_queue 가 없으면 생성
# if self.download_queue is None:
# self.download_queue = queue.Queue()
SupportFfmpeg.initialize(
"ffmpeg",
os.path.join(F.config["path_data"], "tmp"),
self.callback_function,
P.ModelSetting.get(f"{name}_max_ffmpeg_process_count"),
)
logger.debug("%s plugin_load", P.package_name)
self.queue = FfmpegQueue(
P,
P.ModelSetting.get_int(f"{name}_max_ffmpeg_process_count"),
name,
self,
)
self.current_data = None
self.queue.queue_start()
except Exception as e:
logger.error("Exception:%s", e)
logger.error(traceback.format_exc())
# @staticmethod
def plugin_unload(self):
try:
logger.debug("%s plugin_unload", P.package_name)
scheduler.remove_job("%s_recent" % P.package_name)
except Exception as e:
logger.error("Exception:%s", e)
logger.error(traceback.format_exc())
@staticmethod
def reset_db() -> bool:
db.session.query(ModelOhli24Item).delete()
db.session.commit()
return True
@staticmethod
def get_html(url, headers=None, referer=None, stream=False, timeout=60, stealth=False, data=None, method='GET'):
"""별도 스레드에서 cloudscraper 실행하여 gevent SSL 충돌 및 Cloudflare 우회"""
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
import time
from urllib import parse
# URL 인코딩 (한글 주소 대응)
if '://' in url:
try:
scheme, netloc, path, params, query, fragment = parse.urlparse(url)
# 이미 인코딩된 경우를 대비해 unquote 후 다시 quote
path = parse.quote(parse.unquote(path), safe='/')
query = parse.quote(parse.unquote(query), safe='=&%')
url = parse.urlunparse((scheme, netloc, path, params, query, fragment))
except:
pass
def fetch_url_with_cloudscraper(url, headers, timeout, data, method):
"""별도 스레드에서 cloudscraper로 실행"""
import cloudscraper
scraper = cloudscraper.create_scraper(
browser={'browser': 'chrome', 'platform': 'darwin', 'mobile': False},
delay=10
)
# 프록시 설정 (필요시 사용)
proxies = {
"http": "http://192.168.0.2:3138",
"https": "http://192.168.0.2:3138",
}
if method.upper() == 'POST':
response = scraper.post(url, headers=headers, data=data, timeout=timeout, proxies=proxies)
else:
response = scraper.get(url, headers=headers, timeout=timeout, proxies=proxies)
return response.text
response_data = ""
if headers is None:
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
}
if referer:
# Referer 인코딩
if '://' in referer:
try:
scheme, netloc, path, params, query, fragment = parse.urlparse(referer)
path = parse.quote(parse.unquote(path), safe='/')
query = parse.quote(parse.unquote(query), safe='=&%')
referer = parse.urlunparse((scheme, netloc, path, params, query, fragment))
except:
pass
headers["referer"] = referer
elif "referer" not in headers:
headers["referer"] = "https://ani.ohli24.com"
max_retries = 3
for attempt in range(max_retries):
try:
logger.debug(f"get_html (cloudscraper in thread) {method} attempt {attempt + 1}: {url}")
# ThreadPoolExecutor로 별도 스레드에서 cloudscraper 실행
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(fetch_url_with_cloudscraper, url, headers, timeout, data, method)
response_data = future.result(timeout=timeout + 10)
if response_data and (len(response_data) > 10 or method.upper() == 'POST'):
logger.debug(f"get_html success, length: {len(response_data)}")
return response_data
else:
logger.warning(f"Short response (len={len(response_data) if response_data else 0})")
except FuturesTimeoutError:
logger.warning(f"get_html attempt {attempt + 1} timed out")
except Exception as e:
logger.warning(f"get_html attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(3)
return response_data
#########################################################
def add(self, episode_info):
if self.is_exist(episode_info):
return "queue_exist"
else:
logger.debug(f"episode_info:: {episode_info}")
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug("db_entity:::> %s", db_entity)
# logger.debug("db_entity.status ::: %s", db_entity.status)
if db_entity is None:
entity = Ohli24QueueEntity(P, self, episode_info)
logger.debug("entity:::> %s", entity.as_dict())
ModelOhli24Item.append(entity.as_dict())
# # logger.debug("entity:: type >> %s", type(entity))
#
self.queue.add_queue(entity)
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0,
# save_path=entity.savepath, timeout_minute=60, headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
return "enqueue_db_append"
elif db_entity.status != "completed":
entity = Ohli24QueueEntity(P, self, episode_info)
logger.debug("entity:::> %s", entity.as_dict())
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0, save_path=entity.savepath, timeout_minute=60,
# headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
self.queue.add_queue(entity)
return "enqueue_db_exist"
else:
return "db_completed"
def is_exist(self, info):
# print(self.queue)
# print(self.queue.entity_list)
for en in self.queue.entity_list:
if en.info["_id"] == info["_id"]:
return True
# return False
def callback_function(self, **args):
logger.debug("callback_function============")
logger.debug(args)
refresh_type = None
if args["type"] == "status_change":
if args["status"] == SupportFfmpeg.Status.DOWNLOADING:
refresh_type = "status_change"
elif args["status"] == SupportFfmpeg.Status.COMPLETED:
refresh_type = "status_change"
logger.debug("mod_ohli24.py:: download completed........")
elif args["status"] == SupportFfmpeg.Status.READY:
data = {
"type": "info",
"msg": "다운로드중 Duration(%s)" % args["data"]["duration_str"]
+ "
"
+ args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
# socketio.emit("notify", data, namespace='/framework', broadcast=True)
refresh_type = "add"
elif args["type"] == "last":
if args["status"] == SupportFfmpeg.Status.WRONG_URL:
data = {"type": "warning", "msg": "잘못된 URL입니다"}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "add"
elif args["status"] == SupportFfmpeg.Status.WRONG_DIRECTORY:
data = {
"type": "warning",
"msg": "잘못된 디렉토리입니다.
" + args["data"]["save_fullpath"],
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "add"
elif args["status"] == SupportFfmpeg.Status.ERROR or args["status"] == SupportFfmpeg.Status.EXCEPTION:
data = {
"type": "warning",
"msg": "다운로드 시작 실패.
" + args["data"]["save_fullpath"],
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "add"
elif args["status"] == SupportFfmpeg.Status.USER_STOP:
data = {
"type": "warning",
"msg": "다운로드가 중지 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.COMPLETED:
logger.debug("download completed........")
data = {
"type": "success",
"msg": "다운로드가 완료 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.TIME_OVER:
data = {
"type": "warning",
"msg": "시간초과로 중단 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.PF_STOP:
data = {
"type": "warning",
"msg": "PF초과로 중단 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.FORCE_STOP:
data = {
"type": "warning",
"msg": "강제 중단 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.HTTP_FORBIDDEN:
data = {
"type": "warning",
"msg": "403에러로 중단 되었습니다.
" + args["data"]["save_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["status"] == SupportFfmpeg.Status.ALREADY_DOWNLOADING:
data = {
"type": "warning",
"msg": "임시파일폴더에 파일이 있습니다.
" + args["data"]["temp_fullpath"],
"url": "/ffmpeg/download/list",
}
socketio.emit("notify", data, namespace="/framework", broadcast=True)
refresh_type = "last"
elif args["type"] == "normal":
if args["status"] == SupportFfmpeg.Status.DOWNLOADING:
refresh_type = "status"
# P.logger.info(refresh_type)
self.socketio_callback(refresh_type, args["data"])
class Ohli24QueueEntity(FfmpegQueueEntity):
def __init__(self, P, module_logic, info):
super(Ohli24QueueEntity, self).__init__(P, module_logic, info)
self._vi = None
self.url = None
self.epi_queue = None
self.filepath = None
self.savepath = None
self.quality = None
self.filename = None
self.vtt = None
self.season = 1
self.content_title = None
self.srt_url = None
self.headers = None
# Todo::: 임시 주석 처리
self.make_episode_info()
def refresh_status(self):
self.module_logic.socketio_callback("status", self.as_dict())
def info_dict(self, tmp):
# logger.debug('self.info::> %s', self.info)
for key, value in self.info.items():
tmp[key] = value
tmp["vtt"] = self.vtt
tmp["season"] = self.season
tmp["content_title"] = self.content_title
tmp["ohli24_info"] = self.info
tmp["epi_queue"] = self.epi_queue
return tmp
def download_completed(self):
logger.debug("download completed.......!!")
db_entity = ModelOhli24Item.get_by_ohli24_id(self.info["_id"])
if db_entity is not None:
db_entity.status = "completed"
db_entity.complated_time = datetime.now()
db_entity.save()
# Get episode info from OHLI24 site
def make_episode_info(self):
try:
base_url = P.ModelSetting.get("ohli24_url")
# 에피소드 페이지 URL (예: https://ani.ohli24.com/e/원펀맨 3기 1화)
url = self.info["va"]
if "//e/" in url:
url = url.replace("//e/", "/e/")
ourls = parse.urlparse(url)
headers = {
"Referer": f"{ourls.scheme}://{ourls.netloc}",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
logger.debug(f"make_episode_info()::url==> {url}")
logger.info(f"self.info:::> {self.info}")
# Step 1: 에피소드 페이지에서 cdndania.com iframe 찾기
text = LogicOhli24.get_html(url, headers=headers, referer=f"{ourls.scheme}://{ourls.netloc}")
# 디버깅: HTML에 cdndania 있는지 확인
if "cdndania" in text:
logger.info("cdndania found in HTML")
else:
logger.warning("cdndania NOT found in HTML - page may be dynamically loaded")
logger.debug(f"HTML snippet: {text[:1000]}")
soup = BeautifulSoup(text, "lxml")
# mcpalyer 클래스 내의 iframe 찾기
player_div = soup.find("div", class_="mcpalyer")
logger.debug(f"player_div (mcpalyer): {player_div is not None}")
if not player_div:
player_div = soup.find("div", class_="embed-responsive")
logger.debug(f"player_div (embed-responsive): {player_div is not None}")
iframe = None
if player_div:
iframe = player_div.find("iframe")
logger.debug(f"iframe in player_div: {iframe is not None}")
if not iframe:
iframe = soup.find("iframe", src=re.compile(r"cdndania\.com"))
logger.debug(f"iframe with cdndania src: {iframe is not None}")
if not iframe:
# 모든 iframe 찾기
all_iframes = soup.find_all("iframe")
logger.debug(f"Total iframes found: {len(all_iframes)}")
for i, f in enumerate(all_iframes):
logger.debug(f"iframe {i}: src={f.get('src', 'no src')}")
if all_iframes:
iframe = all_iframes[0]
if not iframe or not iframe.get("src"):
logger.error("No iframe found on episode page")
return
iframe_src = iframe.get("src")
logger.info(f"Found cdndania iframe: {iframe_src}")
# Step 2: cdndania.com 페이지에서 m3u8 URL 추출
video_url, vtt_url = self.extract_video_from_cdndania(iframe_src, url)
if not video_url:
logger.error("Failed to extract video URL from cdndania")
return
self.url = video_url
self.srt_url = vtt_url
logger.info(f"Video URL: {self.url}")
if self.srt_url:
logger.info(f"Subtitle URL: {self.srt_url}")
# 헤더 설정
self.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": iframe_src,
}
# 파일명 생성
match = re.compile(r"(?P