인프런 파일명 변경 테스트 for loop 병렬 처리
This commit is contained in:
@@ -18,9 +18,10 @@ import asyncio
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
packages = ["beautifulsoup4"]
|
packages = ["beautifulsoup4", "joblib"]
|
||||||
for package in packages:
|
for package in packages:
|
||||||
try:
|
try:
|
||||||
import package
|
import package
|
||||||
@@ -31,6 +32,7 @@ for package in packages:
|
|||||||
|
|
||||||
# third-party
|
# third-party
|
||||||
import requests
|
import requests
|
||||||
|
from joblib import Parallel, delayed
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from urllib import parse
|
from urllib import parse
|
||||||
@@ -847,74 +849,20 @@ class LogicInflearn(object):
|
|||||||
#
|
#
|
||||||
# logger.debug(f"ret_data():: ret_data=> {ret_data}")
|
# logger.debug(f"ret_data():: ret_data=> {ret_data}")
|
||||||
|
|
||||||
for idx, item in enumerate(items):
|
temp1 = Parallel(n_jobs=4, prefer="threads")(
|
||||||
#
|
delayed(LogicInflearn.parsing_info)(
|
||||||
temp1 = {}
|
item, idx, base_url, code, main_title, data
|
||||||
print("idx::", idx)
|
|
||||||
data_id = item["data-id"]
|
|
||||||
|
|
||||||
run_time = ""
|
|
||||||
title = item.find("div", attrs={"class": "title"}).get_text()
|
|
||||||
if item.find("span", {"class": "runtime"}) is not None:
|
|
||||||
run_time = item.find("span", {"class": "runtime"}).get_text()
|
|
||||||
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
|
|
||||||
|
|
||||||
temp1["season"] = "1"
|
|
||||||
LogicInflearn.season = "1"
|
|
||||||
# logger.debug(api_url)
|
|
||||||
m3u8_info = LogicInflearn.getM3u8_info(
|
|
||||||
api_url, LogicInflearn.season, idx, main_title
|
|
||||||
)
|
)
|
||||||
# print(api_url)
|
for idx, item in enumerate(items)
|
||||||
# print('type::::', type(m3u8_url))
|
)
|
||||||
logger.debug(m3u8_info)
|
|
||||||
# ws.append(
|
|
||||||
# [
|
|
||||||
# title,
|
|
||||||
# data_id,
|
|
||||||
# run_time,
|
|
||||||
# api_url,
|
|
||||||
# m3u8_info["name"],
|
|
||||||
# m3u8_info["hlsUrl"],
|
|
||||||
# ]
|
|
||||||
# )
|
|
||||||
|
|
||||||
# temp.append(title, data_id, run_time, api_url,m3u8_info['name'], m3u8_info['hlsUrl'])
|
logger.debug(temp1)
|
||||||
# temp1['title'] = title
|
|
||||||
temp1["save_folder"] = Util.change_text_for_use_filename(
|
|
||||||
data["save_folder"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# logger.debug(temp1["save_folder"])
|
|
||||||
|
|
||||||
tmp_save_path = ModelSetting.get("download_path")
|
|
||||||
if ModelSetting.get("auto_make_folder") == "True":
|
|
||||||
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
|
|
||||||
temp1["save_path"] = program_path
|
|
||||||
if ModelSetting.get("inflearn_auto_make_season_folder"):
|
|
||||||
temp1["save_path"] = os.path.join(
|
|
||||||
temp1["save_path"], "Season %s" % int(temp1["season"])
|
|
||||||
)
|
|
||||||
|
|
||||||
temp1["title"] = title
|
|
||||||
temp1["data_id"] = data_id
|
|
||||||
temp1["item_id"] = m3u8_info["data_id"]
|
|
||||||
temp1["code"] = temp1["item_id"]
|
|
||||||
temp1["run_time"] = run_time
|
|
||||||
temp1["api_url"] = api_url
|
|
||||||
temp1["name"] = m3u8_info["name"]
|
|
||||||
temp1["filename"] = m3u8_info["filename"]
|
|
||||||
# logger.debug(temp1["name"])
|
|
||||||
# logger.debug(temp1["filename"])
|
|
||||||
temp1["url"] = m3u8_info["hlsUrl"]
|
|
||||||
# temp1["url"] = m3u8_info["hlsUrl"]
|
|
||||||
temp1["size"] = m3u8_info["size"]
|
|
||||||
temp.append(temp1)
|
|
||||||
|
|
||||||
|
temp.append(temp1)
|
||||||
# print(temp)
|
# print(temp)
|
||||||
# logger.info('data', data)
|
# logger.info('data', data)
|
||||||
# LogicInflearn.current_data = temp
|
# LogicInflearn.current_data = temp
|
||||||
data["episode"] = temp
|
data["episode"] = temp1
|
||||||
LogicInflearn.current_data = data
|
LogicInflearn.current_data = data
|
||||||
# logger.debug(data)
|
# logger.debug(data)
|
||||||
|
|
||||||
@@ -982,6 +930,62 @@ class LogicInflearn(object):
|
|||||||
"filename": filename,
|
"filename": filename,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parsing_info(item, idx, base_url, code, main_title, data):
|
||||||
|
|
||||||
|
#
|
||||||
|
temp1 = {}
|
||||||
|
# print("idx::", idx)
|
||||||
|
data_id = item["data-id"]
|
||||||
|
|
||||||
|
run_time = ""
|
||||||
|
title = item.find("div", attrs={"class": "title"}).get_text()
|
||||||
|
if item.find("span", {"class": "runtime"}) is not None:
|
||||||
|
run_time = item.find("span", {"class": "runtime"}).get_text()
|
||||||
|
|
||||||
|
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
|
||||||
|
|
||||||
|
temp1["season"] = "1"
|
||||||
|
LogicInflearn.season = "1"
|
||||||
|
# logger.debug(api_url)
|
||||||
|
m3u8_info = LogicInflearn.getM3u8_info(
|
||||||
|
api_url, LogicInflearn.season, idx, main_title
|
||||||
|
)
|
||||||
|
# print(api_url)
|
||||||
|
# print('type::::', type(m3u8_url))
|
||||||
|
logger.debug(m3u8_info)
|
||||||
|
|
||||||
|
# temp1['title'] = title
|
||||||
|
temp1["save_folder"] = Util.change_text_for_use_filename(data["save_folder"])
|
||||||
|
|
||||||
|
# logger.debug(temp1["save_folder"])
|
||||||
|
|
||||||
|
tmp_save_path = ModelSetting.get("download_path")
|
||||||
|
if ModelSetting.get("auto_make_folder") == "True":
|
||||||
|
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
|
||||||
|
temp1["save_path"] = program_path
|
||||||
|
if ModelSetting.get("inflearn_auto_make_season_folder"):
|
||||||
|
temp1["save_path"] = os.path.join(
|
||||||
|
temp1["save_path"], "Season %s" % int(temp1["season"])
|
||||||
|
)
|
||||||
|
|
||||||
|
temp1["title"] = title
|
||||||
|
temp1["data_id"] = data_id
|
||||||
|
temp1["item_id"] = m3u8_info["data_id"]
|
||||||
|
temp1["code"] = temp1["item_id"]
|
||||||
|
temp1["run_time"] = run_time
|
||||||
|
temp1["api_url"] = api_url
|
||||||
|
temp1["name"] = m3u8_info["name"]
|
||||||
|
temp1["filename"] = m3u8_info["filename"]
|
||||||
|
# logger.debug(temp1["name"])
|
||||||
|
# logger.debug(temp1["filename"])
|
||||||
|
temp1["url"] = m3u8_info["hlsUrl"]
|
||||||
|
# temp1["url"] = m3u8_info["hlsUrl"]
|
||||||
|
temp1["size"] = m3u8_info["size"]
|
||||||
|
|
||||||
|
# temp.append(temp1)
|
||||||
|
return temp1
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getHtml(url, header):
|
def getHtml(url, header):
|
||||||
o = parse.urlparse(url)
|
o = parse.urlparse(url)
|
||||||
|
|||||||
Reference in New Issue
Block a user