ohli24 api update 2023.01.13(01.)
This commit is contained in:
232
logic_ohli24.py
232
logic_ohli24.py
@@ -80,6 +80,10 @@ class LogicOhli24(LogicModuleBase):
|
||||
}
|
||||
current_headers = None
|
||||
current_data = None
|
||||
referer = None
|
||||
origin_url = None
|
||||
episode_url = None
|
||||
cookies = None
|
||||
|
||||
session = requests.Session()
|
||||
headers = {
|
||||
@@ -87,7 +91,8 @@ class LogicOhli24(LogicModuleBase):
|
||||
"Chrome/71.0.3578.98 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
||||
"Referer": "",
|
||||
# "Referer": "",
|
||||
# "Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n",
|
||||
}
|
||||
useragent = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, "
|
||||
@@ -100,6 +105,183 @@ class LogicOhli24(LogicModuleBase):
|
||||
self.queue = None
|
||||
default_route_socketio(P, self)
|
||||
|
||||
@staticmethod
|
||||
async def get_html_playwright(
|
||||
url: str,
|
||||
headless: bool = False,
|
||||
referer: str = "",
|
||||
engine: str = "chrome",
|
||||
stealth: bool = False,
|
||||
):
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
from playwright.async_api import async_playwright
|
||||
from playwright_stealth import stealth_sync, stealth_async
|
||||
|
||||
import time
|
||||
|
||||
cookie = None
|
||||
browser_args = [
|
||||
"--window-size=1300,570",
|
||||
"--window-position=000,000",
|
||||
"--disable-dev-shm-usage",
|
||||
"--no-sandbox",
|
||||
"--disable-web-security",
|
||||
"--disable-features=site-per-process",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-accelerated-2d-canvas",
|
||||
"--no-first-run",
|
||||
"--no-zygote",
|
||||
# '--single-process',
|
||||
"--disable-gpu",
|
||||
"--use-gl=egl",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
# "--disable-background-networking",
|
||||
"--enable-features=NetworkService,NetworkServiceInProcess",
|
||||
"--disable-background-timer-throttling",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--disable-breakpad",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--disable-component-extensions-with-background-pages",
|
||||
"--disable-default-apps",
|
||||
"--disable-extensions",
|
||||
"--disable-features=Translate",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-prompt-on-repost",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--disable-sync",
|
||||
"--force-color-profile=srgb",
|
||||
"--metrics-recording-only",
|
||||
# "--enable-automation",
|
||||
"--password-store=basic",
|
||||
"--use-mock-keychain",
|
||||
"--hide-scrollbars",
|
||||
"--mute-audio",
|
||||
]
|
||||
# scraper = cloudscraper.create_scraper(
|
||||
# browser={"browser": "chrome", "platform": "windows", "desktop": True},
|
||||
# debug=False,
|
||||
# # sess=LogicAniLife.session,
|
||||
# delay=10,
|
||||
# )
|
||||
#
|
||||
# cookie_value, user_agent = scraper.get_cookie_string(url)
|
||||
#
|
||||
# logger.debug(f"cookie_value:: {cookie_value}")
|
||||
|
||||
start = time.time()
|
||||
ua = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/69.0.3497.100 Safari/537.36"
|
||||
)
|
||||
|
||||
# from playwright_stealth import stealth_sync
|
||||
|
||||
def set_cookie(req):
|
||||
nonlocal cookie
|
||||
if "cookie" in req.headers:
|
||||
cookie = req.headers["cookie"]
|
||||
|
||||
async with async_playwright() as p:
|
||||
try:
|
||||
if engine == "chrome":
|
||||
browser = await p.chromium.launch(
|
||||
channel="chrome", args=browser_args, headless=headless
|
||||
)
|
||||
elif engine == "webkit":
|
||||
browser = await p.webkit.launch(
|
||||
headless=headless,
|
||||
args=browser_args,
|
||||
)
|
||||
else:
|
||||
browser = await p.firefox.launch(
|
||||
headless=headless,
|
||||
args=browser_args,
|
||||
)
|
||||
# context = browser.new_context(
|
||||
# user_agent=ua,
|
||||
# )
|
||||
|
||||
LogicOhli24.headers[
|
||||
"Referer"
|
||||
] = "https://anilife.live/detail/id/471"
|
||||
# print(LogicAniLife.headers)
|
||||
|
||||
LogicOhli24.headers["Referer"] = LogicOhli24.episode_url
|
||||
|
||||
if referer is not None:
|
||||
LogicOhli24.headers["Referer"] = referer
|
||||
|
||||
# logger.debug(f"LogicAniLife.headers::: {LogicOhli24.headers}")
|
||||
context = await browser.new_context(
|
||||
extra_http_headers=LogicOhli24.headers, ignore_https_errors=True
|
||||
)
|
||||
# await context.add_cookies(LogicOhli24.cookies)
|
||||
|
||||
# LogicAniLife.headers["Cookie"] = cookie_value
|
||||
|
||||
await context.set_extra_http_headers(LogicOhli24.headers)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# page.set_extra_http_headers(LogicAniLife.headers)
|
||||
|
||||
if stealth:
|
||||
await stealth_async(page)
|
||||
|
||||
# page.on("request", set_cookie)
|
||||
# stealth_sync(page)
|
||||
print(LogicOhli24.headers["Referer"])
|
||||
|
||||
page.on("request", set_cookie)
|
||||
|
||||
print(f'Referer:: {LogicOhli24.headers["Referer"]}')
|
||||
# await page.set_extra_http_headers(LogicAniLife.headers)
|
||||
|
||||
# domcontentloaded
|
||||
# load
|
||||
# networkidle
|
||||
await page.goto(
|
||||
url,
|
||||
wait_until="networkidle",
|
||||
referer=LogicOhli24.headers["Referer"],
|
||||
)
|
||||
# page.wait_for_timeout(10000)
|
||||
# await asyncio.sleep(2.9)
|
||||
await asyncio.sleep(6)
|
||||
|
||||
# await page.reload()
|
||||
|
||||
# time.sleep(10)
|
||||
# cookies = context.cookies
|
||||
# print(cookies)
|
||||
|
||||
print(f"page.url:: {page.url}")
|
||||
LogicOhli24.origin_url = page.url
|
||||
|
||||
# temp_content = await page.content()
|
||||
#
|
||||
# print(temp_content)
|
||||
|
||||
print(f"run at {time.time() - start} sec")
|
||||
|
||||
return await page.content()
|
||||
except Exception as e:
|
||||
logger.error("Exception:%s", e)
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Exception:%s", e)
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
# browser.close()
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def db_init():
|
||||
pass
|
||||
@@ -615,6 +797,16 @@ class LogicOhli24(LogicModuleBase):
|
||||
logger.info("url:::> %s", url)
|
||||
data = {}
|
||||
response_data = LogicOhli24.get_html(url, timeout=10)
|
||||
# response_data = asyncio.run(
|
||||
# LogicOhli24.get_html_playwright(
|
||||
# url,
|
||||
# headless=False,
|
||||
# # referer=referer_url,
|
||||
# engine="chrome",
|
||||
# # stealth=True,
|
||||
# )
|
||||
# )
|
||||
# print(response_data)
|
||||
tree = html.fromstring(response_data)
|
||||
tmp_items = tree.xpath('//div[@class="list-row"]')
|
||||
data["anime_count"] = len(tmp_items)
|
||||
@@ -743,15 +935,40 @@ class LogicOhli24(LogicModuleBase):
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def get_html(url, referer=None, stream=False, timeout=5):
|
||||
def get_html(
|
||||
url, headers=None, referer=None, stream=False, timeout=5, stealth=False
|
||||
):
|
||||
data = ""
|
||||
try:
|
||||
|
||||
print("cloudflare protection bypass ==================P")
|
||||
|
||||
if headers is not None:
|
||||
LogicOhli24.headers = headers
|
||||
|
||||
logger.debug(f"headers: {LogicOhli24.headers}")
|
||||
response_data = asyncio.run(
|
||||
LogicOhli24.get_html_playwright(
|
||||
url,
|
||||
headless=False,
|
||||
# referer=referer_url,
|
||||
engine="chrome",
|
||||
# stealth=stealth,
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug(len(response_data))
|
||||
|
||||
return response_data
|
||||
|
||||
if LogicOhli24.session is None:
|
||||
LogicOhli24.session = requests.session()
|
||||
|
||||
# logger.debug('get_html :%s', url)
|
||||
headers["Referer"] = "" if referer is None else referer
|
||||
|
||||
logger.info(headers)
|
||||
logger.debug(LogicOhli24.headers)
|
||||
page_content = LogicOhli24.session.get(
|
||||
url, headers=headers, timeout=timeout
|
||||
)
|
||||
@@ -848,7 +1065,8 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
|
||||
logger.debug("make_episode_info()::url==> %s", url)
|
||||
logger.info(f"self.info:::> {self.info}")
|
||||
|
||||
text = requests.get(url, headers=headers).text
|
||||
# text = requests.get(url, headers=headers).text
|
||||
text = LogicOhli24.get_html(url, headers=headers)
|
||||
# logger.debug(text)
|
||||
soup1 = BeautifulSoup(text, "lxml")
|
||||
pattern = re.compile(r"url : \"\.\.(.*)\"")
|
||||
@@ -864,10 +1082,11 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
|
||||
|
||||
iframe_src = iframe_url
|
||||
|
||||
# logger.debug(f"iframe_src:::> {iframe_src}")
|
||||
logger.debug(f"iframe_src:::> {iframe_src}")
|
||||
|
||||
resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
|
||||
# logger.info("resp1::>> %s", resp1)
|
||||
# resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
|
||||
resp1 = LogicOhli24.get_html(iframe_src, headers=headers, timeout=600)
|
||||
logger.info("resp1::>> %s", resp1)
|
||||
soup3 = BeautifulSoup(resp1, "lxml")
|
||||
# packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL)
|
||||
s_pattern = re.compile(r"(eval.+)", re.MULTILINE | re.DOTALL)
|
||||
@@ -918,6 +1137,7 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
|
||||
"Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 "
|
||||
"Whale/3.12.129.46 Safari/537.36",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n",
|
||||
}
|
||||
|
||||
payload = {
|
||||
|
||||
Reference in New Issue
Block a user