인프런 파일명 변경 테스트 for loop 병렬 처리

This commit is contained in:
2022-05-16 14:25:43 +09:00
parent 492611a15f
commit bc191caff1

View File

@@ -18,9 +18,10 @@ import asyncio
from urllib.parse import urlparse from urllib.parse import urlparse
import json import json
import aiohttp import aiohttp
packages = ["beautifulsoup4"] packages = ["beautifulsoup4", "joblib"]
for package in packages: for package in packages:
try: try:
import package import package
@@ -31,6 +32,7 @@ for package in packages:
# third-party # third-party
import requests import requests
from joblib import Parallel, delayed
from lxml import html, etree from lxml import html, etree
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib import parse from urllib import parse
@@ -847,74 +849,20 @@ class LogicInflearn(object):
# #
# logger.debug(f"ret_data():: ret_data=> {ret_data}") # logger.debug(f"ret_data():: ret_data=> {ret_data}")
for idx, item in enumerate(items): temp1 = Parallel(n_jobs=4, prefer="threads")(
# delayed(LogicInflearn.parsing_info)(
temp1 = {} item, idx, base_url, code, main_title, data
print("idx::", idx)
data_id = item["data-id"]
run_time = ""
title = item.find("div", attrs={"class": "title"}).get_text()
if item.find("span", {"class": "runtime"}) is not None:
run_time = item.find("span", {"class": "runtime"}).get_text()
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
temp1["season"] = "1"
LogicInflearn.season = "1"
# logger.debug(api_url)
m3u8_info = LogicInflearn.getM3u8_info(
api_url, LogicInflearn.season, idx, main_title
) )
# print(api_url) for idx, item in enumerate(items)
# print('type::::', type(m3u8_url))
logger.debug(m3u8_info)
# ws.append(
# [
# title,
# data_id,
# run_time,
# api_url,
# m3u8_info["name"],
# m3u8_info["hlsUrl"],
# ]
# )
# temp.append(title, data_id, run_time, api_url,m3u8_info['name'], m3u8_info['hlsUrl'])
# temp1['title'] = title
temp1["save_folder"] = Util.change_text_for_use_filename(
data["save_folder"]
) )
# logger.debug(temp1["save_folder"]) logger.debug(temp1)
tmp_save_path = ModelSetting.get("download_path")
if ModelSetting.get("auto_make_folder") == "True":
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
temp1["save_path"] = program_path
if ModelSetting.get("inflearn_auto_make_season_folder"):
temp1["save_path"] = os.path.join(
temp1["save_path"], "Season %s" % int(temp1["season"])
)
temp1["title"] = title
temp1["data_id"] = data_id
temp1["item_id"] = m3u8_info["data_id"]
temp1["code"] = temp1["item_id"]
temp1["run_time"] = run_time
temp1["api_url"] = api_url
temp1["name"] = m3u8_info["name"]
temp1["filename"] = m3u8_info["filename"]
# logger.debug(temp1["name"])
# logger.debug(temp1["filename"])
temp1["url"] = m3u8_info["hlsUrl"]
# temp1["url"] = m3u8_info["hlsUrl"]
temp1["size"] = m3u8_info["size"]
temp.append(temp1) temp.append(temp1)
# print(temp) # print(temp)
# logger.info('data', data) # logger.info('data', data)
# LogicInflearn.current_data = temp # LogicInflearn.current_data = temp
data["episode"] = temp data["episode"] = temp1
LogicInflearn.current_data = data LogicInflearn.current_data = data
# logger.debug(data) # logger.debug(data)
@@ -982,6 +930,62 @@ class LogicInflearn(object):
"filename": filename, "filename": filename,
} }
@staticmethod
def parsing_info(item, idx, base_url, code, main_title, data):
#
temp1 = {}
# print("idx::", idx)
data_id = item["data-id"]
run_time = ""
title = item.find("div", attrs={"class": "title"}).get_text()
if item.find("span", {"class": "runtime"}) is not None:
run_time = item.find("span", {"class": "runtime"}).get_text()
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
temp1["season"] = "1"
LogicInflearn.season = "1"
# logger.debug(api_url)
m3u8_info = LogicInflearn.getM3u8_info(
api_url, LogicInflearn.season, idx, main_title
)
# print(api_url)
# print('type::::', type(m3u8_url))
logger.debug(m3u8_info)
# temp1['title'] = title
temp1["save_folder"] = Util.change_text_for_use_filename(data["save_folder"])
# logger.debug(temp1["save_folder"])
tmp_save_path = ModelSetting.get("download_path")
if ModelSetting.get("auto_make_folder") == "True":
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
temp1["save_path"] = program_path
if ModelSetting.get("inflearn_auto_make_season_folder"):
temp1["save_path"] = os.path.join(
temp1["save_path"], "Season %s" % int(temp1["season"])
)
temp1["title"] = title
temp1["data_id"] = data_id
temp1["item_id"] = m3u8_info["data_id"]
temp1["code"] = temp1["item_id"]
temp1["run_time"] = run_time
temp1["api_url"] = api_url
temp1["name"] = m3u8_info["name"]
temp1["filename"] = m3u8_info["filename"]
# logger.debug(temp1["name"])
# logger.debug(temp1["filename"])
temp1["url"] = m3u8_info["hlsUrl"]
# temp1["url"] = m3u8_info["hlsUrl"]
temp1["size"] = m3u8_info["size"]
# temp.append(temp1)
return temp1
@staticmethod @staticmethod
def getHtml(url, header): def getHtml(url, header):
o = parse.urlparse(url) o = parse.urlparse(url)