From 114c89867bc96e905dd64278a40155cf4a4af7ba Mon Sep 17 00:00:00 2001 From: projectdx Date: Sun, 15 May 2022 15:23:48 +0900 Subject: [PATCH] =?UTF-8?q?=EC=9D=B8=ED=94=84=EB=9F=B0=20=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=EB=AA=85=20=EB=B3=80=EA=B2=BD=20=ED=85=8C=EC=8A=A4?= =?UTF-8?q?=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __init__.py | 1 + debugger.py | 34 ++++++ logic_inflearn.py | 191 +++++++++++++++++++++++++++++++- templates/inflearn_request.html | 37 ++++--- 4 files changed, 246 insertions(+), 17 deletions(-) create mode 100644 debugger.py diff --git a/__init__.py b/__init__.py index b1fa0c4..d48153c 100755 --- a/__init__.py +++ b/__init__.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from .plugin import blueprint, menu, plugin_info, plugin_load, plugin_unload + # from .plugin import blueprint, menu, plugin_load, plugin_unload, plugin_info # from .plugin import blueprint, menu, plugin_load, plugin_unload, plugin_info # from .plugin import P diff --git a/debugger.py b/debugger.py new file mode 100644 index 0000000..eaa6e78 --- /dev/null +++ b/debugger.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/05/14 2:20 PM +# @Author : yommi +# @Site : +# @File : debugger.py +# @Software: PyCharm + +import logging + + +logging.basicConfig( + level=logging.DEBUG, + format="[%(asctime)s] [%(levelname)s] %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S %z", +) +logger = logging.getLogger(__name__) + + +class Debugger: + """Debug a method and return it back""" + + def __init__(self, func): + self.func = func + + def __call__(self, *args, **kwargs): + logger.debug(f"Entering : {self.func.__name__}") + logger.debug(f"args, kwargs : {args, kwargs}") + + result = self.func(*args, **kwargs) + + logger.debug(f"{self.func.__name__} returned : {result}") + + return result diff --git a/logic_inflearn.py b/logic_inflearn.py index ac0280f..e62047f 100755 --- a/logic_inflearn.py +++ b/logic_inflearn.py @@ -10,6 +10,7 @@ import time import re import random import urllib +import asyncio # import pip @@ -17,6 +18,8 @@ import urllib from urllib.parse import urlparse import json +import aiohttp + packages = ["beautifulsoup4"] for package in packages: try: @@ -32,6 +35,10 @@ from lxml import html, etree from bs4 import BeautifulSoup from urllib import parse +# from debugger import Debugger +# from debugger1 import timerun, yommilogger +from .debugger1 import timerun, yommi_logger + # import snoop # from snoop import spy @@ -564,7 +571,8 @@ class LogicInflearn(object): logger.error(traceback.format_exc()) @staticmethod - def get_title_info(code): + @yommi_logger(logging_type="debug") + def get_title_info_old(code): try: # if ( # LogicInflearn.current_data is not None @@ -740,6 +748,183 @@ class LogicInflearn(object): data["ret"] = "error" return data + @staticmethod + @yommi_logger(logging_type="debug") + def get_title_info(code): + try: + + url = "%s/course/%s" % (ModelSetting.get("inflearn_url"), parse.quote(code)) + logger.info(url) + html_content = LogicInflearn.get_html(url) + # sys.setrecursionlimit(10**7) + # logger.info(html_content) + # tree = html.fromstring(html_content) + # tree = etree.fromstring( + # html_content, parser=etree.XMLParser(huge_tree=True) + # ) + # tree1 = BeautifulSoup(html_content, "lxml") + # tree = etree.HTML(str(soup)) + # logger.info(tree) + data = {"code": code, "ret": False} + + soup = BeautifulSoup(html_content, "html.parser") + # logger.debug(soup.select_one("div.cd-header__thumbnail-cover")) + data["poster_url"] = soup.select_one( + "div.cd-header__thumbnail-cover div img" + )["src"] + data["title"] = soup.select_one("div.cd-header__title").text + main_title = soup.select_one("div.cd-header__title").text + # data["item_id"] = soup.select_one('meta[property="dable:item_id"]')[ + # "content" + # ] + # item_id = data["item_id"] + data["save_folder"] = data["title"] + data["season"] = "1" + + # tmp = soup.select("ul > a") + + # logger.debug(f"tmp1 size:=> {str(len(tmp))}") + curriculum_content = soup.find_all("a", {"class": "cd-accordion__unit"}) + preview_path = [] + # for i, elem in enumerate(curriculum_content): + # # print(elem) + # preview_path.append(elem["href"]) + # # print(f"{i}. {elem['href']}") + + first_item = curriculum_content[0]["href"] + + # 미리보기 가능 1번 동영상 뷰 페이지로 이동 + # self.getVideoInfo(preview_path[0]) + base_url = "https://www.inflearn.com" + url = base_url + parse.quote(first_item) + logger.debug(f"url::::: {url}") + + res_data = requests.get(url, timeout=20) + + if res_data.url != url: + # redirect occurred; likely symbol doesn't exist or cannot be found. + raise requests.TooManyRedirects() + + res_data.raise_for_status() + + # soup = BeautifulSoup(resData.text, "html.parser") + soup = BeautifulSoup(res_data.text, "html.parser") + + items = soup.find_all("div", attrs={"class": "unit-el"}) + # print(len(items)) + lecture_list = [] + temp = [] + + # print(type(items)) + program = ( + db.session.query(ModelInflearnProgram) + .filter_by(programcode=code) + .first() + ) + + if program is None: + program = ModelInflearnProgram(data) + db.session.add(program) + db.session.commit() + else: + data["save_folder"] = program.save_folder + data["season"] = program.season + + # curriculum_urls = [] + # for idx, item in enumerate(items): + # data_id = item["data-id"] + # api_url = f"{base_url}/api/course/{code}/lecture/{data_id}" + # curriculum_urls.append(api_url) + # + # ret_data = asyncio.run(get_lecture_infos(curriculum_urls)) + # + # logger.debug(f"ret_data():: ret_data=> {ret_data}") + + for idx, item in enumerate(items): + # + temp1 = {} + print("idx::", idx) + data_id = item["data-id"] + + run_time = "" + title = item.find("div", attrs={"class": "title"}).get_text() + if item.find("span", {"class": "runtime"}) is not None: + run_time = item.find("span", {"class": "runtime"}).get_text() + api_url = f"{base_url}/api/course/{code}/lecture/{data_id}" + + temp1["season"] = "1" + LogicInflearn.season = "1" + # logger.debug(api_url) + m3u8_info = LogicInflearn.getM3u8_info( + api_url, LogicInflearn.season, idx + ) + # print(api_url) + # print('type::::', type(m3u8_url)) + logger.debug(m3u8_info) + # ws.append( + # [ + # title, + # data_id, + # run_time, + # api_url, + # m3u8_info["name"], + # m3u8_info["hlsUrl"], + # ] + # ) + + # temp.append(title, data_id, run_time, api_url,m3u8_info['name'], m3u8_info['hlsUrl']) + # temp1['title'] = title + temp1["save_folder"] = Util.change_text_for_use_filename( + data["save_folder"] + ) + + # logger.debug(temp1["save_folder"]) + + tmp_save_path = ModelSetting.get("download_path") + if ModelSetting.get("auto_make_folder") == "True": + program_path = os.path.join(tmp_save_path, temp1["save_folder"]) + temp1["save_path"] = program_path + if ModelSetting.get("inflearn_auto_make_season_folder"): + temp1["save_path"] = os.path.join( + temp1["save_path"], "Season %s" % int(temp1["season"]) + ) + + temp1["title"] = title + temp1["data_id"] = data_id + temp1["item_id"] = m3u8_info["data_id"] + temp1["code"] = temp1["item_id"] + temp1["run_time"] = run_time + temp1["api_url"] = api_url + temp1["name"] = m3u8_info["name"] + temp1["filename"] = m3u8_info["filename"] + # logger.debug(temp1["name"]) + # logger.debug(temp1["filename"]) + temp1["url"] = m3u8_info["hlsUrl"] + # temp1["url"] = m3u8_info["hlsUrl"] + temp1["size"] = m3u8_info["size"] + temp.append(temp1) + + # print(temp) + # logger.info('data', data) + # LogicInflearn.current_data = temp + data["episode"] = temp + LogicInflearn.current_data = data + # logger.debug(data) + + return data + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + data["log"] = str(e) + data["ret"] = "error" + return data + except IndexError as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + data["log"] = str(e) + data["ret"] = "error" + return data + @staticmethod def getM3u8_info(url, season, idx): data_id = "" @@ -757,8 +942,8 @@ class LogicInflearn(object): try: if res_data["course"]["id"] is not None: data_id = res_data["course"]["id"] - if res_data["course"]["id"] is not None: - title = res_data["course"]["title"] + if res_data["course"]["_"]["current_unit"]["title"] is not None: + title = res_data["course"]["_"]["current_unit"]["title"] if res_data["newBOX"]["video"]["name"] is not None: name = res_data["newBOX"]["video"]["name"] filename = f"{title}.{name.split('.')[0]}.S{season.zfill(2)}.E{str(idx).zfill(3)}.{name.split('.')[-1]}" diff --git a/templates/inflearn_request.html b/templates/inflearn_request.html index a66b9ac..74cc0a6 100755 --- a/templates/inflearn_request.html +++ b/templates/inflearn_request.html @@ -28,7 +28,6 @@ let run_time = 0 - $(document).ready(function () { // console.log ('current_code::', {{arg['current_code']}}) // {#console.log(params)#} @@ -37,25 +36,35 @@ // console.log('current_data:: ', current_data) let now = Math.floor(new Date().getTime() / 1000) - if ("{{arg['current_code']}}" !== "" && current_data.data.code === "{{arg['current_code']}}") { - code = "{{arg['current_code']}}"; - document.getElementById("code").value = "{{arg['current_code']}}"; - document.getElementById("analysis_btn").click(); - - // {#$('[data-tooltip="true"]').tooltip();#} - // {#$('.bootstrap-tooltip').tooltip();#} - } - - // console.log('params.code:::> ', params.code) - if (params.code === "") { } else { document.getElementById("code").value = params.code; document.getElementById("analysis_btn").click(); } - console.log('run_time:::' ,run_time) - console.log('after_time:::' ,now - run_time) + + if ("{{arg['current_code']}}" !== "") { + if (current_data) { + if (current_data.data.code === "{{arg['current_code']}}") { + code = "{{arg['current_code']}}"; + document.getElementById("code").value = "{{arg['current_code']}}"; + document.getElementById("analysis_btn").click(); + } + + } else { + code = "{{arg['current_code']}}"; + document.getElementById("code").value = "{{arg['current_code']}}"; + document.getElementById("analysis_btn").click(); + } + // {#$('[data-tooltip="true"]').tooltip();#} + // {#$('.bootstrap-tooltip').tooltip();#} + } + + // console.log('params.code:::> ', params.code) + + + console.log('run_time:::', run_time) + console.log('after_time:::', now - run_time) });