diff --git a/logic_anilife.py b/logic_anilife.py index 306b2cd..68a5c09 100644 --- a/logic_anilife.py +++ b/logic_anilife.py @@ -5,6 +5,7 @@ import json from datetime import datetime import hashlib import re +import asyncio import lxml.etree @@ -23,10 +24,12 @@ for package in packages: # main(["install", package]) os.system(f"pip install {package}") +from bs4 import BeautifulSoup import cloudscraper # third-party from flask import request, render_template, jsonify +from sqlalchemy import or_, and_, func, not_, desc # sjva 공용 from framework import db, scheduler, path_data, socketio @@ -73,16 +76,16 @@ class LogicAniLife(LogicModuleBase): current_headers = None current_data = None referer = None + origin_url = None session = requests.Session() headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" - "Chrome/71.0.3578.98 Safari/537.36", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", "Referer": "", - "Cookie": "" - # "Cookie": "_ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; sbtsck=javuwDzcOJqUyweM1OQeNGzHbjoHp7Cgw44XnPdM738c3E=; SPSI=e48379959d54a6a62cc7abdcafdb2761; SPSE=h5HfMGLJzLqzNafMD3YaOvHSC9xfh77CcWdKvexp/z5N5OsTkIiYSCudQhFffEfk/0pcOTVf0DpeV0RoNopzig==; anilife_csrf=b93b9f25a12a51cf185805ec4de7cf9d; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1660912282:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=SXPX8ELcRgh6N/9rNgjpQoNfaX2DRceeKYR0/ul7qTI9gApWQpZxr8jgymf/r0HsUT551vtOv2CMWpIn0Hd26A==; DCSS=89508000A76BBD939F6DDACE5BD9EB902D2212A; DGCC=Wdm; adOtr=7L4Xe58995d; spcsrf=6554fa003bf6a46dd9b7417acfacc20a; _ga_56VYJJ7FTM=GS1.1.1660912281.10.1.1660912576.0.0.0; PRLST=EO", + # "Cookie": "" + # "Cookie": "_ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; SL_G_WPT_TO=ko; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; SPSI=944c237cdd8606d80e5e330a0f332d03; SPSE=itZcXMDuso0ktWnDkV2G0HVwWEctCgDjrcFMlEQ5C745wqvp1pEEddrsAsjPUBjl6/8+9Njpq1IG3wt/tVag7w==; sbtsck=jav9aILa6Ofn0dEQr5DhDq5rpbd1JUoNgKwxBpZrqYd+CM=; anilife_csrf=54ee9d15c87864ee5e2538a63d894ad6; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; DCST=pE9; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1661170429:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=GWyTLTvSMF/lQD77ojQkGyl+7JvTudkSwV1GKeNVUcWEBa/msln9zzsBj7lj+89ywSRBM34Ol73AKf+KHZ9bZA==; DCSS=9D44115EC4CE12CADB88A005DC65A3CD74A211E; DGCC=zdV; spcsrf=fba136251afc6b5283109fc920322c70; sp_lit=kw0Xkp66eQ7bV0f0tNClhg==; PRLST=gt; adOtr=2C4H9c4d78d; _ga_56VYJJ7FTM=GS1.1.1661168661.18.1.1661173389.0.0.0", } useragent = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, " @@ -101,7 +104,7 @@ class LogicAniLife(LogicModuleBase): try: print("cloudflare protection bypass ==================") # return LogicAniLife.get_html_cloudflare(url) - return LogicAniLife.get_html_selenium(url) + return LogicAniLife.get_html_selenium(url, referer) # return LogicAniLife.get_html_playwright(url) # import browser_cookie3 @@ -180,8 +183,108 @@ class LogicAniLife(LogicModuleBase): # logger.error(traceback.format_exc()) @staticmethod - def get_html_playwright(url): + async def get_html_playwright( + url, headless=False, referer=None, engine="chrome", stealth=False + ): from playwright.sync_api import sync_playwright + from playwright.async_api import async_playwright + from playwright_stealth import stealth_sync, stealth_async + + import time + + # scraper = cloudscraper.create_scraper( + # browser={"browser": "chrome", "platform": "windows", "desktop": True}, + # debug=False, + # # sess=LogicAniLife.session, + # delay=10, + # ) + # + # cookie_value, user_agent = scraper.get_cookie_string(url) + # + # logger.debug(f"cookie_value:: {cookie_value}") + + start = time.time() + ua = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/69.0.3497.100 Safari/537.36" + ) + # from playwright_stealth import stealth_sync + cookie = None + + def set_cookie(req): + nonlocal cookie + if "cookie" in req.headers: + cookie = req.headers["cookie"] + + async with async_playwright() as p: + if engine == "chrome": + browser = await p.chromium.launch(channel="chrome", headless=headless) + elif engine == "webkit": + browser = await p.webkit.launch(headless=headless) + else: + browser = await p.firefox.launch(headless=headless) + # context = browser.new_context( + # user_agent=ua, + # ) + + LogicAniLife.headers["Referer"] = "https://anilife.live/detail/id/471" + # print(LogicAniLife.headers) + + if referer is not None: + LogicAniLife.headers["Referer"] = referer + + print(LogicAniLife.headers) + + context = await browser.new_context(extra_http_headers=LogicAniLife.headers) + + # LogicAniLife.headers["Cookie"] = cookie_value + + # context.set_extra_http_headers(LogicAniLife.headers) + + page = await context.new_page() + + # page.set_extra_http_headers(LogicAniLife.headers) + + if stealth: + await stealth_async(page) + + # page.on("request", set_cookie) + # stealth_sync(page) + print(LogicAniLife.headers["Referer"]) + + page.on("request", set_cookie) + + print(f'Referer:: {LogicAniLife.headers["Referer"]}') + await page.set_extra_http_headers(LogicAniLife.headers) + + await page.goto( + url, wait_until="load", referer=LogicAniLife.headers["Referer"] + ) + # page.wait_for_timeout(10000) + await asyncio.sleep(3.5) + + # await page.reload() + + # time.sleep(10) + # cookies = context.cookies + # print(cookies) + + print(f"page.url:: {page.url}") + LogicAniLife.origin_url = page.url + + # print(page.content()) + + print(f"run at {time.time() - start} sec") + + return await page.content() + + @staticmethod + def get_vod_url(url, headless=False): + from playwright.sync_api import sync_playwright + from playwright_stealth import stealth_sync + import html_to_json + import time # scraper = cloudscraper.create_scraper( @@ -204,39 +307,77 @@ class LogicAniLife(LogicModuleBase): # from playwright_stealth import stealth_sync with sync_playwright() as p: - browser = p.webkit.launch(headless=True) - context = browser.new_context( - user_agent=ua, - ) - LogicAniLife.referer = "https://anilife.live/" + # browser = p.chromium.launch(headless=headless) + browser = p.webkit.launch(headless=headless) + # context = browser.new_context( + # user_agent=ua, + # ) + + LogicAniLife.headers[ + "Referer" + ] = "https://anilife.live/g/l?id=14344143-040a-4e40-9399-a7d22d94554b" + print(LogicAniLife.headers) + + context = browser.new_context(extra_http_headers=LogicAniLife.headers) - LogicAniLife.headers["Referer"] = LogicAniLife.referer # LogicAniLife.headers["Cookie"] = cookie_value - print(LogicAniLife.headers) - context.set_extra_http_headers(LogicAniLife.headers) + # context.set_extra_http_headers(LogicAniLife.headers) page = context.new_page() + # page.set_extra_http_headers(LogicAniLife.headers) + + stealth_sync(page) + def set_cookie(req): if "cookie" in req.headers: print(req.headers["cookie"]) cookie = req.headers["cookie"] - page.on("request", set_cookie) + # page.on("request", set_cookie) # stealth_sync(page) - page.goto(url, wait_until="domcontentloaded") + page.goto( + url, wait_until="networkidle", referer=LogicAniLife.headers["Referer"] + ) + # page.wait_for_timeout(10000) + time.sleep(1) + # page.reload() + + # time.sleep(10) cookies = context.cookies - print(cookies) + # print(cookies) # print(page.content()) + # vod_url = page.evaluate( + # """() => { + # return console.log(vodUrl_1080p) }""" + # ) + vod_url = page.evaluate( + """async () =>{ + return _0x55265f(0x99) + alJson[_0x55265f(0x91)] + }""" + ) + + print(vod_url) print(f"run at {time.time() - start} sec") - return page.content() + # html_content = LogicAniLife.get_html_selenium( + # vod_url, "https://anilife.live" + # ) + + html_content = LogicAniLife.get_html_playwright( + vod_url, False, referer="https://anilife.live" + ) + print(f"html_content:: {html_content}") + output_json = html_to_json.convert(html_content) + print(output_json) + + return vod_url @staticmethod - def get_html_selenium(url): + def get_html_selenium(url, referer): from selenium.webdriver.common.by import By from selenium import webdriver from selenium_stealth import stealth @@ -269,6 +410,60 @@ class LogicAniLife(LogicModuleBase): return source_code.encode("utf-8") + # Create a request interceptor + @staticmethod + def interceptor(request): + del request.headers["Referer"] # Delete the header first + request.headers[ + "Referer" + ] = "https://anilife.live/g/l?id=0a36917f-39cc-43ea-b0c6-0c86d27c2408" + + @staticmethod + def get_html_seleniumwire(url, referer, wired=False): + from selenium import webdriver + from selenium.webdriver.common.by import By + from seleniumwire import webdriver as wired_webdriver + from selenium_stealth import stealth + import time + + options = webdriver.ChromeOptions() + # 크롬드라이버 헤더 옵션추가 (리눅스에서 실행시 필수) + options.add_argument("start-maximized") + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_experimental_option("excludeSwitches", ["enable-automation"]) + options.add_experimental_option("useAutomationExtension", False) + # 크롬드라이버 경로 + driver_path = "./bin/Darwin/chromedriver" + if wired: + driver = wired_webdriver.Chrome( + executable_path=driver_path, chrome_options=options + ) + else: + driver = webdriver.Chrome( + executable_path=driver_path, chrome_options=options + ) + + # stealth ====================================== + # stealth( + # driver, + # languages=["en-US", "en"], + # vendor="Google Inc.", + # platform="Win32", + # webgl_vendor="Intel Inc.", + # renderer="Intel Iris OpenGL Engine", + # fix_hairline=True, + # ) + if wired: + driver.request_interceptor = LogicAniLife.interceptor + driver.get(url) + driver.refresh() + time.sleep(1) + elem = driver.find_element(By.XPATH, "//*") + source_code = elem.get_attribute("outerHTML") + + return source_code.encode("utf-8") + @staticmethod def get_html_cloudflare(url, cached=False): # scraper = cloudscraper.create_scraper( @@ -367,6 +562,13 @@ class LogicAniLife(LogicModuleBase): logger.info(f"info:: {info}") ret["ret"] = self.add(info) return jsonify(ret) + elif sub == "entity_list": + return jsonify(self.queue.get_entity_list()) + elif sub == "queue_command": + ret = self.queue.command( + req.form["command"], int(req.form["entity_id"]) + ) + return jsonify(ret) except Exception as e: P.logger.error("Exception:%s", e) @@ -612,8 +814,22 @@ class LogicAniLife(LogicModuleBase): logger.debug(f"db_entity():: => {db_entity}") - return "enqueue_db_append" - # pass + if db_entity is None: + logger.debug(f"episode_info:: {episode_info}") + entity = AniLifeQueueEntity(P, self, episode_info) + logger.debug("entity:::> %s", entity.as_dict()) + ModelAniLifeItem.append(entity.as_dict()) + + self.queue.add_queue(entity) + + return "enqueue_db_append" + elif db_entity.status != "completed": + entity = AniLifeQueueEntity(P, self, episode_info) + + self.queue.add_queue(entity) + return "enqueue_db_exist" + else: + return "db_completed" def is_exist(self, info): for e in self.queue.entity_list: @@ -623,7 +839,103 @@ class LogicAniLife(LogicModuleBase): class AniLifeQueueEntity(FfmpegQueueEntity): - pass + def __init__(self, P, module_logic, info): + super(AniLifeQueueEntity, self).__init__(P, module_logic, info) + self._vi = None + self.url = None + self.epi_queue = None + self.filepath = None + self.savepath = None + self.quality = None + self.filename = None + self.vtt = None + self.season = 1 + self.content_title = None + self.srt_url = None + self.headers = None + # Todo::: 임시 주석 처리 + self.make_episode_info() + + def refresh_status(self): + self.module_logic.socketio_callback("status", self.as_dict()) + + def info_dict(self, tmp): + logger.debug("self.info::> %s", self.info) + for key, value in self.info.items(): + tmp[key] = value + tmp["vtt"] = self.vtt + tmp["season"] = self.season + tmp["content_title"] = self.content_title + tmp["anilife_info"] = self.info + tmp["epi_queue"] = self.epi_queue + return tmp + + def donwload_completed(self): + db_entity = ModelAniLifeItem.get_by_anilife_id(self.info["_id"]) + if db_entity is not None: + db_entity.status = "completed" + db_entity.complated_time = datetime.now() + db_entity.save() + + def make_episode_info(self): + logger.debug("make_episode_info() routine ==========") + try: + # 다운로드 추가 + base_url = "https://anilife.live" + iframe_url = "" + + url = self.info["va"] + logger.debug(f"url:: {url}") + + ourls = parse.urlparse(url) + + headers = { + "Referer": f"{ourls.scheme}://{ourls.netloc}", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36", + } + headers["Referer"] = "https://anilife.live/detail/id/471" + + logger.debug("make_episode_info()::url==> %s", url) + logger.info(f"self.info:::> {self.info}") + + referer = "https://anilife.live/g/l?id=13fd4d28-ff18-4764-9968-7e7ea7347c51" + + # text = requests.get(url, headers=headers).text + # text = LogicAniLife.get_html_seleniumwire(url, referer=referer, wired=True) + # https://anilife.live/ani/provider/10f60832-20d1-4918-be62-0f508bf5460c + referer_url = ( + "https://anilife.live/g/l?id=d4be1e0e-301b-403b-be1b-cf19f3ccfd23" + ) + text = asyncio.run( + LogicAniLife.get_html_playwright( + url, + headless=False, + referer=referer_url, + engine="chrome", + stealth=True, + ) + ) + + # logger.debug(text) + soup = BeautifulSoup(text, "lxml") + + all_scripts = soup.find_all("script") + # print(all_scripts) + + regex = r"(?Phttp?s:\/\/.*=jawcloud)" + match = re.compile(regex).search(text) + + print(match) + if match: + jawcloud_url = match.group("jawcloud_url") + + print(jawcloud_url) + + vod_1080p_url = LogicAniLife.get_vod_url(jawcloud_url) + print(vod_1080p_url) + except Exception as e: + P.logger.error("Exception:%s", e) + P.logger.error(traceback.format_exc()) class ModelAniLifeItem(db.Model): @@ -679,3 +991,51 @@ class ModelAniLifeItem(db.Model): def save(self): db.session.add(self) db.session.commit() + + @classmethod + def make_query(cls, search="", order="desc", option="all"): + query = db.session.query(cls) + if search is not None and search != "": + if search.find("|") != -1: + tmp = search.split("|") + conditions = [] + for tt in tmp: + if tt != "": + conditions.append(cls.filename.like("%" + tt.strip() + "%")) + query = query.filter(or_(*conditions)) + elif search.find(",") != -1: + tmp = search.split(",") + for tt in tmp: + if tt != "": + query = query.filter(cls.filename.like("%" + tt.strip() + "%")) + else: + query = query.filter(cls.filename.like("%" + search + "%")) + if option == "completed": + query = query.filter(cls.status == "completed") + + query = ( + query.order_by(desc(cls.id)) if order == "desc" else query.order_by(cls.id) + ) + return query + + @classmethod + def append(cls, q): + item = ModelAniLifeItem() + item.content_code = q["content_code"] + item.season = q["season"] + item.episode_no = q["epi_queue"] + item.title = q["content_title"] + item.episode_title = q["title"] + item.ohli24_va = q["va"] + item.ohli24_vi = q["_vi"] + item.ohli24_id = q["_id"] + item.quality = q["quality"] + item.filepath = q["filepath"] + item.filename = q["filename"] + item.savepath = q["savepath"] + item.video_url = q["url"] + item.vtt_url = q["vtt"] + item.thumbnail = q["thumbnail"] + item.status = "wait" + item.ohli24_info = q["anilife_info"] + item.save() diff --git a/templates/anime_downloader_anilife_setting.html b/templates/anime_downloader_anilife_setting.html new file mode 100644 index 0000000..eb58acd --- /dev/null +++ b/templates/anime_downloader_anilife_setting.html @@ -0,0 +1,68 @@ +{% extends "base.html" %} +{% block content %} +
+ {{ macros.m_button_group([['global_setting_save_btn', '설정 저장']])}} + {{ macros.m_row_start('5') }} + {{ macros.m_row_end() }} + +
+ +
+
+ + +{% endblock %} \ No newline at end of file