From 3102ba50ad1fc7879f1b01f3fff213e69c82a395 Mon Sep 17 00:00:00 2001 From: projectdx Date: Mon, 24 Oct 2022 03:59:23 +0900 Subject: [PATCH] anilife update 2022.10.23(01.) --- logic_anilife.py | 455 ++++++++++++++++++++++++----------------------- 1 file changed, 231 insertions(+), 224 deletions(-) diff --git a/logic_anilife.py b/logic_anilife.py index 7f88288..de87ee1 100644 --- a/logic_anilife.py +++ b/logic_anilife.py @@ -102,7 +102,7 @@ class LogicAniLife(LogicModuleBase): } useragent = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, " - "like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36" + "like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36" } def __init__(self, P): @@ -112,7 +112,7 @@ class LogicAniLife(LogicModuleBase): default_route_socketio(P, self) @staticmethod - def get_html(url, referer=None, stream=False, timeout=5): + def get_html(url: str, referer: str = None, stream: bool = False, timeout: int = 5) -> str: data = "" try: print("cloudflare protection bypass ==================") @@ -143,7 +143,7 @@ class LogicAniLife(LogicModuleBase): return data @staticmethod - def get_html_requests(url, referer=None, stream=False, timeout=5): + def get_html_requests(url: str, referer: str = None, stream: str = False, timeout: int = 5) -> str: data = "" try: print("get_html_requests ==================") @@ -173,153 +173,172 @@ class LogicAniLife(LogicModuleBase): @staticmethod async def get_html_playwright( - url, headless=False, referer=None, engine="chrome", stealth=False - ): - from playwright.sync_api import sync_playwright - from playwright.async_api import async_playwright - from playwright_stealth import stealth_sync, stealth_async + url: str, headless: bool = False, referer: str = None, engine: str = "chrome", stealth: bool = False + ) -> str: + try: + from playwright.sync_api import sync_playwright + from playwright.async_api import async_playwright + from playwright_stealth import stealth_sync, stealth_async - import time + import time - browser_args = [ - "--window-size=1300,570", - "--window-position=000,000", - "--disable-dev-shm-usage", - "--no-sandbox", - "--disable-web-security", - "--disable-features=site-per-process", - "--disable-setuid-sandbox", - "--disable-accelerated-2d-canvas", - "--no-first-run", - "--no-zygote", - # '--single-process', - "--disable-gpu", - "--use-gl=egl", - "--disable-blink-features=AutomationControlled", - "--disable-background-networking", - "--enable-features=NetworkService,NetworkServiceInProcess", - "--disable-background-timer-throttling", - "--disable-backgrounding-occluded-windows", - "--disable-breakpad", - "--disable-client-side-phishing-detection", - "--disable-component-extensions-with-background-pages", - "--disable-default-apps", - "--disable-extensions", - "--disable-features=Translate", - "--disable-hang-monitor", - "--disable-ipc-flooding-protection", - "--disable-popup-blocking", - "--disable-prompt-on-repost", - "--disable-renderer-backgrounding", - "--disable-sync", - "--force-color-profile=srgb", - "--metrics-recording-only", - "--enable-automation", - "--password-store=basic", - "--use-mock-keychain", - "--hide-scrollbars", - "--mute-audio", - ] - # scraper = cloudscraper.create_scraper( - # browser={"browser": "chrome", "platform": "windows", "desktop": True}, - # debug=False, - # # sess=LogicAniLife.session, - # delay=10, - # ) - # - # cookie_value, user_agent = scraper.get_cookie_string(url) - # - # logger.debug(f"cookie_value:: {cookie_value}") - - start = time.time() - ua = ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/69.0.3497.100 Safari/537.36" - ) - # from playwright_stealth import stealth_sync - cookie = None - - def set_cookie(req): - nonlocal cookie - if "cookie" in req.headers: - cookie = req.headers["cookie"] - - async with async_playwright() as p: - if engine == "chrome": - browser = await p.chromium.launch( - channel="chrome", args=browser_args, headless=headless - ) - elif engine == "webkit": - browser = await p.webkit.launch( - headless=headless, - args=browser_args, - ) - else: - browser = await p.firefox.launch( - headless=headless, - args=browser_args, - ) - # context = browser.new_context( - # user_agent=ua, + cookie = None + browser_args = [ + "--window-size=1300,570", + "--window-position=000,000", + "--disable-dev-shm-usage", + "--no-sandbox", + "--disable-web-security", + "--disable-features=site-per-process", + "--disable-setuid-sandbox", + "--disable-accelerated-2d-canvas", + "--no-first-run", + "--no-zygote", + # '--single-process', + "--disable-gpu", + "--use-gl=egl", + "--disable-blink-features=AutomationControlled", + "--disable-background-networking", + "--enable-features=NetworkService,NetworkServiceInProcess", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-default-apps", + "--disable-extensions", + "--disable-features=Translate", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-sync", + "--force-color-profile=srgb", + "--metrics-recording-only", + "--enable-automation", + "--password-store=basic", + "--use-mock-keychain", + "--hide-scrollbars", + "--mute-audio", + ] + # scraper = cloudscraper.create_scraper( + # browser={"browser": "chrome", "platform": "windows", "desktop": True}, + # debug=False, + # # sess=LogicAniLife.session, + # delay=10, # ) + # + # cookie_value, user_agent = scraper.get_cookie_string(url) + # + # logger.debug(f"cookie_value:: {cookie_value}") - LogicAniLife.headers["Referer"] = "https://anilife.live/detail/id/471" - # print(LogicAniLife.headers) - - LogicAniLife.headers["Referer"] = LogicAniLife.episode_url - - if referer is not None: - LogicAniLife.headers["Referer"] = referer - - logger.debug(f"LogicAniLife.headers::: {LogicAniLife.headers}") - context = await browser.new_context(extra_http_headers=LogicAniLife.headers) - await context.add_cookies(LogicAniLife.cookies) - - # LogicAniLife.headers["Cookie"] = cookie_value - - # context.set_extra_http_headers(LogicAniLife.headers) - - page = await context.new_page() - - # page.set_extra_http_headers(LogicAniLife.headers) - - if stealth: - await stealth_async(page) - - # page.on("request", set_cookie) - # stealth_sync(page) - print(LogicAniLife.headers["Referer"]) - - page.on("request", set_cookie) - - print(f'Referer:: {LogicAniLife.headers["Referer"]}') - # await page.set_extra_http_headers(LogicAniLife.headers) - - await page.goto( - url, wait_until="load", referer=LogicAniLife.headers["Referer"] + start = time.time() + ua = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/69.0.3497.100 Safari/537.36" ) - # page.wait_for_timeout(10000) - await asyncio.sleep(2.9) - # await page.reload() + # from playwright_stealth import stealth_sync - # time.sleep(10) - # cookies = context.cookies - # print(cookies) + def set_cookie(req): + nonlocal cookie + if "cookie" in req.headers: + cookie = req.headers["cookie"] - print(f"page.url:: {page.url}") - LogicAniLife.origin_url = page.url + async with async_playwright() as p: + try: + if engine == "chrome": + browser = await p.chromium.launch( + channel="chrome", args=browser_args, headless=headless + ) + elif engine == "webkit": + browser = await p.webkit.launch( + headless=headless, + args=browser_args, + ) + else: + browser = await p.firefox.launch( + headless=headless, + args=browser_args, + ) + # context = browser.new_context( + # user_agent=ua, + # ) - # print(page.content()) + LogicAniLife.headers[ + "Referer" + ] = "https://anilife.live/detail/id/471" + # print(LogicAniLife.headers) - print(f"run at {time.time() - start} sec") + LogicAniLife.headers["Referer"] = LogicAniLife.episode_url - return await page.content() + if referer is not None: + LogicAniLife.headers["Referer"] = referer + + logger.debug(f"LogicAniLife.headers::: {LogicAniLife.headers}") + context = await browser.new_context( + extra_http_headers=LogicAniLife.headers + ) + await context.add_cookies(LogicAniLife.cookies) + + # LogicAniLife.headers["Cookie"] = cookie_value + + # context.set_extra_http_headers(LogicAniLife.headers) + + page = await context.new_page() + + # page.set_extra_http_headers(LogicAniLife.headers) + + if stealth: + await stealth_async(page) + + # page.on("request", set_cookie) + # stealth_sync(page) + print(LogicAniLife.headers["Referer"]) + + page.on("request", set_cookie) + + print(f'Referer:: {LogicAniLife.headers["Referer"]}') + # await page.set_extra_http_headers(LogicAniLife.headers) + + await page.goto( + url, wait_until="load", referer=LogicAniLife.headers["Referer"] + ) + # page.wait_for_timeout(10000) + await asyncio.sleep(2.9) + + # await page.reload() + + # time.sleep(10) + # cookies = context.cookies + # print(cookies) + + print(f"page.url:: {page.url}") + LogicAniLife.origin_url = page.url + + # print(page.content()) + + print(f"run at {time.time() - start} sec") + + return await page.content() + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + finally: + await browser.close() + + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + finally: + # browser.close() + pass @staticmethod async def get_vod_url_v1( - url, headless=False, referer=None, engine="chrome", stealth=False + url, headless=False, referer=None, engine="chrome", stealth=False ): from playwright.sync_api import sync_playwright from playwright.async_api import async_playwright @@ -421,7 +440,7 @@ class LogicAniLife(LogicModuleBase): return await page.content() @staticmethod - async def get_vod_url(url, headless=False): + async def get_vod_url(url: str, headless: bool = False) -> str: from playwright.sync_api import sync_playwright from playwright.async_api import async_playwright from playwright_stealth import stealth_async @@ -488,110 +507,96 @@ class LogicAniLife(LogicModuleBase): # from playwright_stealth import stealth_sync async with async_playwright() as p: - # browser = await p.chromium.launch(headless=headless, args=browser_args) - browser = await p.chromium.launch(headless=headless, args=browser_args) + try: + # browser = await p.chromium.launch(headless=headless, args=browser_args) + browser = await p.chromium.launch(headless=headless, args=browser_args) - # browser = await p.webkit.launch(headless=headless) - # context = browser.new_context( - # user_agent=ua, - # ) + # browser = await p.webkit.launch(headless=headless) + # context = browser.new_context( + # user_agent=ua, + # ) - LogicAniLife.headers[ - "Referer" - ] = "https://anilife.live/g/l?id=14344143-040a-4e40-9399-a7d22d94554b" - # print(LogicAniLife.headers) + LogicAniLife.headers[ + "Referer" + ] = "https://anilife.live/g/l?id=14344143-040a-4e40-9399-a7d22d94554b" + # print(LogicAniLife.headers) - # context = await browser.new_context(extra_http_headers=LogicAniLife.headers) - context = await browser.new_context() - await context.set_extra_http_headers(LogicAniLife.headers) + # context = await browser.new_context(extra_http_headers=LogicAniLife.headers) + context = await browser.new_context() + await context.set_extra_http_headers(LogicAniLife.headers) - # await context.add_cookies(LogicAniLife.cookies) + # await context.add_cookies(LogicAniLife.cookies) - # tracer = HarTracer(context=context, browser_name=p.chromium.name) - tracer = HarTracer(context=context, browser_name=p.webkit.name) + # tracer = HarTracer(context=context, browser_name=p.chromium.name) + tracer = HarTracer(context=context, browser_name=p.webkit.name) - # LogicAniLife.headers["Cookie"] = cookie_value + # LogicAniLife.headers["Cookie"] = cookie_value - # context.set_extra_http_headers(LogicAniLife.headers) + # context.set_extra_http_headers(LogicAniLife.headers) - page = await context.new_page() + page = await context.new_page() - # await page.set_extra_http_headers(LogicAniLife.headers) + # await page.set_extra_http_headers(LogicAniLife.headers) - # await stealth_async(page) - # logger.debug(url) + # await stealth_async(page) + # logger.debug(url) - # page.on("request", set_cookie) - # stealth_sync(page) - # await page.goto( - # url, wait_until="load", referer=LogicAniLife.headers["Referer"] - # ) - # await page.goto(url, wait_until="load") - await page.goto(url, wait_until="domcontentloaded") + # page.on("request", set_cookie) + # stealth_sync(page) + # await page.goto( + # url, wait_until="load", referer=LogicAniLife.headers["Referer"] + # ) + # await page.goto(url, wait_until="load") + await page.goto(url, wait_until="domcontentloaded") - har = await tracer.flush() + har = await tracer.flush() - # page.wait_for_timeout(10000) - await asyncio.sleep(2) + # page.wait_for_timeout(10000) + await asyncio.sleep(2) - # logger.debug(har) - # page.reload() + # logger.debug(har) + # page.reload() - # time.sleep(10) - # cookies = context.cookies - # print(cookies) + # time.sleep(10) + # cookies = context.cookies + # print(cookies) - # print(page.content()) - # vod_url = page.evaluate( - # """() => { - # return console.log(vodUrl_1080p) }""" - # ) + # print(page.content()) + # vod_url = page.evaluate( + # """() => { + # return console.log(vodUrl_1080p) }""" + # ) - # vod_url = page.evaluate( - # """async () =>{ - # return _0x55265f(0x99) + alJson[_0x55265f(0x91)] - # }""" - # ) - result_har_json = har.to_json() - result_har_dict = har.to_dict() - # logger.debug(result_har_dict) + # vod_url = page.evaluate( + # """async () =>{ + # return _0x55265f(0x99) + alJson[_0x55265f(0x91)] + # }""" + # ) + result_har_json = har.to_json() + result_har_dict = har.to_dict() + # logger.debug(result_har_dict) - tmp_video_url = [] - for i, elem in enumerate(result_har_dict["log"]["entries"]): - if "m3u8" in elem["request"]["url"]: - logger.debug(elem["request"]["url"]) - tmp_video_url.append(elem["request"]["url"]) + tmp_video_url = [] + for i, elem in enumerate(result_har_dict["log"]["entries"]): + if "m3u8" in elem["request"]["url"]: + logger.debug(elem["request"]["url"]) + tmp_video_url.append(elem["request"]["url"]) - vod_url = tmp_video_url[-1] + vod_url = tmp_video_url[-1] - logger.debug(f"vod_url:: {vod_url}") + logger.debug(f"vod_url:: {vod_url}") - logger.debug(f"run at {time.time() - start} sec") + logger.debug(f"run at {time.time() - start} sec") - # html_content = LogicAniLife.get_html_selenium( - # vod_url, "https://anilife.live" - # ) - - # html_content = LogicAniLife.get_html_playwright( - # vod_url, False, referer="https://anilife.live" - # ) - - # html_content = LogicAniLife.get_html( - # vod_url, referer="https://anilife.live" - # ) - # html_content = LogicAniLife.get_html_requests( - # vod_url, referer="https://anilife.live" - # ) - - # print(f"html_content:: {html_content}") - # output_json = html_to_json.convert(html_content) - # resolution = output_json["html"][0]["body"][0]["_value"] - # logger.debug(f"output_json:: {resolution}") - - return vod_url + return vod_url + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + finally: + await browser.close() @staticmethod - def get_html_selenium(url, referer): + def get_html_selenium(url: str, referer: str) -> str: from selenium.webdriver.common.by import By from selenium import webdriver from selenium_stealth import stealth @@ -654,6 +659,8 @@ class LogicAniLife(LogicModuleBase): elem = driver.find_element(By.XPATH, "//*") source_code = elem.get_attribute("outerHTML") + driver.close() + return source_code.encode("utf-8") # Create a request interceptor @@ -846,7 +853,7 @@ class LogicAniLife(LogicModuleBase): def setting_save_after(self): if self.queue.get_max_ffmpeg_count() != P.ModelSetting.get_int( - "anilife_max_ffmpeg_process_count" + "anilife_max_ffmpeg_process_count" ): self.queue.set_max_ffmpeg_count( P.ModelSetting.get_int("anilife_max_ffmpeg_process_count") @@ -1025,16 +1032,16 @@ class LogicAniLife(LogicModuleBase): ) elif cate == "theater": url = ( - P.ModelSetting.get("anilife_url") - + "/vodtype/categorize/Movie/" - + page + P.ModelSetting.get("anilife_url") + + "/vodtype/categorize/Movie/" + + page ) wrapper_xpath = '//div[@class="bsx"]' else: url = ( - P.ModelSetting.get("anilife_url") - + "/vodtype/categorize/Movie/" - + page + P.ModelSetting.get("anilife_url") + + "/vodtype/categorize/Movie/" + + page ) # cate == "complete": logger.info("url:::> %s", url)