diff --git a/logic_anilife.py b/logic_anilife.py index c52dc41..50823e9 100644 --- a/logic_anilife.py +++ b/logic_anilife.py @@ -270,10 +270,13 @@ class LogicAniLife(LogicModuleBase): return await page.content() @staticmethod - def get_vod_url(url, headless=False): + async def get_vod_url_v1( + url, headless=False, referer=None, engine="chrome", stealth=False + ): from playwright.sync_api import sync_playwright - from playwright_stealth import stealth_sync - import html_to_json + from playwright.async_api import async_playwright + from playwright_har_tracer import HarTracer + from playwright_stealth import stealth_sync, stealth_async import time @@ -295,10 +298,111 @@ class LogicAniLife(LogicModuleBase): "Chrome/69.0.3497.100 Safari/537.36" ) # from playwright_stealth import stealth_sync + cookie = None - with sync_playwright() as p: - browser = p.chromium.launch(headless=headless) - # browser = p.webkit.launch(headless=headless) + def set_cookie(req): + nonlocal cookie + if "cookie" in req.headers: + cookie = req.headers["cookie"] + + async with async_playwright() as p: + if engine == "chrome": + browser = await p.chromium.launch(channel="chrome", headless=headless) + elif engine == "webkit": + browser = await p.webkit.launch(headless=headless) + else: + browser = await p.firefox.launch(headless=headless) + # context = browser.new_context( + # user_agent=ua, + # ) + + LogicAniLife.headers["Referer"] = "https://anilife.live/detail/id/471" + # print(LogicAniLife.headers) + + LogicAniLife.headers["Referer"] = LogicAniLife.episode_url + + if referer is not None: + LogicAniLife.headers["Referer"] = referer + + logger.debug(f"LogicAniLife.headers::: {LogicAniLife.headers}") + context = await browser.new_context(extra_http_headers=LogicAniLife.headers) + await context.add_cookies(LogicAniLife.cookies) + + # LogicAniLife.headers["Cookie"] = cookie_value + + # context.set_extra_http_headers(LogicAniLife.headers) + tracer = HarTracer(context=context, browser_name=p.webkit.name) + + page = await context.new_page() + + # page.set_extra_http_headers(LogicAniLife.headers) + + if stealth: + await stealth_async(page) + + # page.on("request", set_cookie) + # stealth_sync(page) + print(LogicAniLife.headers["Referer"]) + + page.on("request", set_cookie) + + print(f'Referer:: {LogicAniLife.headers["Referer"]}') + # await page.set_extra_http_headers(LogicAniLife.headers) + + await page.goto( + url, wait_until="load", referer=LogicAniLife.headers["Referer"] + ) + + har = await tracer.flush() + # page.wait_for_timeout(10000) + await asyncio.sleep(10) + + # await page.reload() + + # time.sleep(10) + # cookies = context.cookies + # print(cookies) + + print(f"page.url:: {page.url}") + LogicAniLife.origin_url = page.url + + # print(page.content()) + + print(f"run at {time.time() - start} sec") + + return await page.content() + + @staticmethod + async def get_vod_url(url, headless=False): + from playwright.sync_api import sync_playwright + from playwright.async_api import async_playwright + from playwright_stealth import stealth_async + import html_to_json + from playwright_har_tracer import HarTracer + import time + + # scraper = cloudscraper.create_scraper( + # browser={"browser": "chrome", "platform": "windows", "desktop": True}, + # debug=False, + # # sess=LogicAniLife.session, + # delay=10, + # ) + # + # cookie_value, user_agent = scraper.get_cookie_string(url) + # + # logger.debug(f"cookie_value:: {cookie_value}") + + start = time.time() + ua = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/69.0.3497.100 Safari/537.36" + ) + # from playwright_stealth import stealth_sync + + async with async_playwright() as p: + # browser = await p.chromium.launch(headless=headless) + browser = await p.webkit.launch(headless=headless) # context = browser.new_context( # user_agent=ua, # ) @@ -306,38 +410,46 @@ class LogicAniLife(LogicModuleBase): LogicAniLife.headers[ "Referer" ] = "https://anilife.live/g/l?id=14344143-040a-4e40-9399-a7d22d94554b" - print(LogicAniLife.headers) + # print(LogicAniLife.headers) - context = browser.new_context(extra_http_headers=LogicAniLife.headers) + # context = await browser.new_context(extra_http_headers=LogicAniLife.headers) + context = await browser.new_context() + await context.set_extra_http_headers(LogicAniLife.headers) - context.add_cookies(LogicAniLife.cookies) + # await context.add_cookies(LogicAniLife.cookies) + + # tracer = HarTracer(context=context, browser_name=p.chromium.name) + tracer = HarTracer(context=context, browser_name=p.webkit.name) # LogicAniLife.headers["Cookie"] = cookie_value # context.set_extra_http_headers(LogicAniLife.headers) - page = context.new_page() + page = await context.new_page() - # page.set_extra_http_headers(LogicAniLife.headers) + # await page.set_extra_http_headers(LogicAniLife.headers) - stealth_sync(page) - - def set_cookie(req): - if "cookie" in req.headers: - print(req.headers["cookie"]) - cookie = req.headers["cookie"] + # await stealth_async(page) + # logger.debug(url) # page.on("request", set_cookie) # stealth_sync(page) - page.goto( - url, wait_until="networkidle", referer=LogicAniLife.headers["Referer"] - ) + # await page.goto( + # url, wait_until="load", referer=LogicAniLife.headers["Referer"] + # ) + # await page.goto(url, wait_until="load") + await page.goto(url, wait_until="domcontentloaded") + + har = await tracer.flush() + # page.wait_for_timeout(10000) - time.sleep(1) + await asyncio.sleep(2) + + # logger.debug(har) # page.reload() # time.sleep(10) - cookies = context.cookies + # cookies = context.cookies # print(cookies) # print(page.content()) @@ -345,23 +457,35 @@ class LogicAniLife(LogicModuleBase): # """() => { # return console.log(vodUrl_1080p) }""" # ) - vod_url = page.evaluate( - """async () =>{ - return _0x55265f(0x99) + alJson[_0x55265f(0x91)] - }""" - ) + + # vod_url = page.evaluate( + # """async () =>{ + # return _0x55265f(0x99) + alJson[_0x55265f(0x91)] + # }""" + # ) + result_har_json = har.to_json() + result_har_dict = har.to_dict() + # logger.debug(result_har_dict) + + tmp_video_url = [] + for i, elem in enumerate(result_har_dict["log"]["entries"]): + if "m3u8" in elem["request"]["url"]: + logger.debug(elem["request"]["url"]) + tmp_video_url.append(elem["request"]["url"]) + + vod_url = tmp_video_url[-1] logger.debug(f"vod_url:: {vod_url}") - print(f"run at {time.time() - start} sec") + logger.debug(f"run at {time.time() - start} sec") # html_content = LogicAniLife.get_html_selenium( # vod_url, "https://anilife.live" # ) - html_content = LogicAniLife.get_html_playwright( - vod_url, False, referer="https://anilife.live" - ) + # html_content = LogicAniLife.get_html_playwright( + # vod_url, False, referer="https://anilife.live" + # ) # html_content = LogicAniLife.get_html( # vod_url, referer="https://anilife.live" @@ -370,10 +494,10 @@ class LogicAniLife(LogicModuleBase): # vod_url, referer="https://anilife.live" # ) - print(f"html_content:: {html_content}") - output_json = html_to_json.convert(html_content) - resolution = output_json["html"][0]["body"][0]["_value"] - logger.debug(f"output_json:: {resolution}") + # print(f"html_content:: {html_content}") + # output_json = html_to_json.convert(html_content) + # resolution = output_json["html"][0]["body"][0]["_value"] + # logger.debug(f"output_json:: {resolution}") return vod_url @@ -571,7 +695,7 @@ class LogicAniLife(LogicModuleBase): # logger.info("code::: %s", code) P.ModelSetting.set("anilife_current_code", code) - data = self.get_series_info(code, wr_id, bo_table) + data = self.get_series_info(code) self.current_data = data return jsonify({"ret": "success", "data": data, "code": code}) elif sub == "anime_list": @@ -627,7 +751,7 @@ class LogicAniLife(LogicModuleBase): return True # 시리즈 정보를 가져오는 함수 - def get_series_info(self, code, wr_id, bo_table): + def get_series_info(self, code): try: if code.isdigit(): url = P.ModelSetting.get("anilife_url") + "/detail/id/" + code @@ -711,7 +835,7 @@ class LogicAniLife(LogicModuleBase): episodes.append( { "ep_num": ep_num, - "title": title, + "title": f"{main_title} {ep_num}화 - {title}", "link": link, "thumbnail": image, "date": date, @@ -815,7 +939,6 @@ class LogicAniLife(LogicModuleBase): print(p.match(entity["link"]) != None) if p.match(entity["link"]) is None: - entity["link"] = P.ModelSetting.get("anilife_url") + entity["link"] # real_url = LogicAniLife.get_real_link(url=entity["link"]) @@ -919,10 +1042,11 @@ class AniLifeQueueEntity(FfmpegQueueEntity): ourls = parse.urlparse(url) - headers = { + self.headers = { "Referer": f"{ourls.scheme}://{ourls.netloc}", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36", } + headers["Referer"] = "https://anilife.live/detail/id/471" headers["Referer"] = LogicAniLife.episode_url @@ -944,13 +1068,15 @@ class AniLifeQueueEntity(FfmpegQueueEntity): text = asyncio.run( LogicAniLife.get_html_playwright( url, - headless=False, + headless=True, referer=referer_url, engine="chrome", stealth=True, ) ) + # vod_1080p_url = text + # logger.debug(text) soup = BeautifulSoup(text, "lxml") @@ -965,10 +1091,73 @@ class AniLifeQueueEntity(FfmpegQueueEntity): if match: jawcloud_url = match.group("jawcloud_url") - print(f"jawcloud_url:: {jawcloud_url}") + logger.debug(f"jawcloud_url:: {jawcloud_url}") - vod_1080p_url = LogicAniLife.get_vod_url(jawcloud_url) + # loop = asyncio.new_event_loop() + # asyncio.set_event_loop(loop) + # + logger.info(self.info) + + match = re.compile( + r"(?P