anime-downloader bug fix

This commit is contained in:
2022-03-27 18:26:18 +09:00
parent 9bf9de053c
commit 3c321dd911
4 changed files with 677 additions and 88 deletions

View File

@@ -9,12 +9,25 @@
import os, sys, traceback, re, json, threading
from datetime import datetime
import copy
import hashlib
# third-party
import requests
from lxml import html
from urllib import parse
# third-party
from flask import request, render_template, jsonify
from sqlalchemy import or_, and_, func, not_, desc
from pip._internal import main
pkgs = ['beautifulsoup4', 'jsbeautifier']
for pkg in pkgs:
try:
import pkg
except ImportError:
main(['install', pkg])
from bs4 import BeautifulSoup
import jsbeautifier
# sjva 공용
from framework import db, scheduler, path_data, socketio
@@ -52,9 +65,11 @@ class LogicOhli24(LogicModuleBase):
}
current_headers = None
current_data = None
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/71.0.3578.98 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'Referer': ''
@@ -65,6 +80,7 @@ class LogicOhli24(LogicModuleBase):
def __init__(self, P):
super(LogicOhli24, self).__init__(P, 'setting', scheduler_desc='ani365 자동 다운로드')
self.name = 'ohli24'
self.queue = None
default_route_socketio(P, self)
@staticmethod
@@ -84,7 +100,7 @@ class LogicOhli24(LogicModuleBase):
arg['sub'] = self.name
if sub in ['setting', 'queue', 'list', 'request']:
if sub == 'request' and req.args.get('content_code') is not None:
arg['ani365_current_code'] = req.args.get('content_code')
arg['ohli24_current_code'] = req.args.get('content_code')
if sub == 'setting':
job_id = '%s_%s' % (self.P.package_name, self.name)
arg['scheduler'] = str(scheduler.is_include(job_id))
@@ -110,13 +126,41 @@ class LogicOhli24(LogicModuleBase):
elif sub == 'add_queue':
ret = {}
info = json.loads(request.form['data'])
logger.info('info:: %s', info)
ret['ret'] = self.add(info)
return jsonify(ret)
pass
elif sub == 'entity_list':
return jsonify(self.queue.get_entity_list())
elif sub == 'queue_command':
ret = self.queue.command(req.form['command'], int(req.form['entity_id']))
return jsonify(ret)
elif sub == 'add_queue_checked_list':
data = json.loads(request.form['data'])
def func():
count = 0
for tmp in data:
add_ret = self.add(tmp)
if add_ret.startswith('enqueue'):
self.socketio_callback('list_refresh', '')
count += 1
notify = {'type':'success', 'msg' : u'%s 개의 에피소드를 큐에 추가 하였습니다.' % count}
socketio.emit("notify", notify, namespace='/framework', broadcast=True)
thread = threading.Thread(target=func, args=())
thread.daemon = True
thread.start()
return jsonify('')
elif sub == 'web_list':
return jsonify(ModelOhli24Item.web_list(request))
elif sub == 'db_remove':
return jsonify(ModelOhli24Item.delete_by_id(req.form['id']))
except Exception as e:
P.logger.error('Exception:%s', e)
P.logger.error(traceback.format_exc())
def setting_save_after(self):
if self.queue.get_max_ffmpeg_count() != P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count'):
self.queue.set_max_ffmpeg_count(P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count'))
def get_series_info(self, code):
try:
if self.current_data is not None and 'code' in self.current_data and self.current_data['code'] == code:
@@ -127,6 +171,7 @@ class LogicOhli24(LogicModuleBase):
logger.info(f'code:::: {code}')
url = P.ModelSetting.get('ohli24_url') + '/c/' + code
logger.debug('url:::> %s', url)
# self.current_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)
# AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36',
# 'Referer': url }
@@ -136,15 +181,57 @@ class LogicOhli24(LogicModuleBase):
title = tree.xpath('//div[@class="view-title"]/h1/text()')[0]
# image = tree.xpath('//div[@class="view-info"]/div[@class="image"]/div/img')[0]['src']
image = tree.xpath('//div[@class="image"]/div/img/@src')[0]
image = image.replace('..', P.ModelSetting.get('ohli24_url'))
des_items = tree.xpath('//div[@class="list"]/p')
des = {}
des_key = ['_otit', '_dir', '_pub', '_tag', '_classifi', '_country', '_grade']
description_dict = {
'원제': '_otit',
'원작': '_org',
'감독': '_dir',
'각본': '_scr',
'캐릭터 디자인': '_character_design',
'음악': '_sound',
'제작사': '_pub',
'장르': '_tag',
'분류': '_classifi',
'제작국가': '_country',
'방영일': '_date',
'등급': '_grade'
}
list_body_li = tree.xpath('//ul[@class="list-body"]/li')
logger.info(list_body_li)
episodes = []
vi = None
for li in list_body_li:
title = li.xpath('.//a/text()')[0].strip()
thumbnail = image
logger.info(li.xpath('//a[@class="item-subject"]/@href'))
link = P.ModelSetting.get('ohli24_url') + li.xpath('//a[@class="item-subject"]/@href')[0]
date = li.xpath('.//div[@class="wr-date"]/text()')[0]
m = hashlib.md5(title.encode('utf-8'))
# _vi = hashlib.md5(title.encode('utf-8').hexdigest())
logger.info(m.hexdigest())
_vi = m.hexdigest()
episodes.append({
'title': title,
'link': link,
'thumbnail': image,
'date': date,
'day': date,
'_id': title,
'va': link,
'_vi': _vi,
'content_code': code,
})
logger.info('des_items length:: %s', len(des_items))
for idx, item in enumerate(des_items):
key = des_key[idx]
# key = des_key[idx]
span = item.xpath('.//span//text()')
logger.info(span)
key = description_dict[span[0]]
des[key] = item.xpath('.//span/text()')[1]
logger.info(f'des::>> {des}')
@@ -160,15 +247,13 @@ class LogicOhli24(LogicModuleBase):
'date': '2022.01.11 00:30 (화)',
'ser_description': ser_description,
'des': des,
'episode': [
{
'title': '녹을 먹는 비스코 5화',
'thumbnail': 'https://ohli24.net/data/editor/2201/6ced5f453ef2fe9efb7edfa0e9e12d19_1641871470_4041.jpg',
'date': '2022-02-08'
}
]
'episode': episodes
}
if P.ModelSetting.get_bool('ohli24_order_desc'):
data['episode'] = list(reversed(data['episode']))
data['list_order'] = 'desc'
return data
# logger.info(response_text)
@@ -177,13 +262,13 @@ class LogicOhli24(LogicModuleBase):
P.logger.error(traceback.format_exc())
return {'ret': 'exception', 'log': str(e)}
@staticmethod
def plugin_load():
# @staticmethod
def plugin_load(self):
try:
logger.debug('%s plugin_load', P.package_name)
# self.queue = FfmpegQueue(P, P.ModelSetting.get_int('ani365_max_ffmpeg_process_count'))
# self.current_data = None
# self.queue.queue_start()
self.queue = FfmpegQueue(P, P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count'))
self.current_data = None
self.queue.queue_start()
except Exception as e:
logger.error('Exception:%s', e)
@@ -206,7 +291,9 @@ class LogicOhli24(LogicModuleBase):
@staticmethod
def get_html(url, referer=None, stream=False, timeout=5):
data = ''
try:
if LogicOhli24.session is None:
LogicOhli24.session = requests.session()
@@ -219,18 +306,241 @@ class LogicOhli24(LogicModuleBase):
logger.error(traceback.format_exc())
return data
#########################################################
def add(self, episode_info):
if self.is_exist(episode_info):
return 'queue_exist'
else:
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info['_id'])
logger.debug('db_entity:::> %s', db_entity)
if db_entity is None:
entity = Ohli24QueueEntity(P, self, episode_info)
logger.debug('entity:::> %s', entity.as_dict())
ModelOhli24Item.append(entity.as_dict())
logger.debug('entity:: type >> %s', type(entity))
self.queue.add_queue(entity)
return 'enqueue_db_append'
elif db_entity.status != 'completed':
entity = Ohli24QueueEntity(P, self, episode_info)
self.queue.add_queue(entity)
return 'enqueue_db_exist'
else:
return 'db_completed'
def is_exist(self, info):
for e in self.queue.entity_list:
if e.info['_id'] == info['_id']:
return True
return False
class Ohli24QueueEntity(FfmpegQueueEntity):
def __init__(self, P, module_logic, info):
super(Ohli24QueueEntity, self).__init__(P, module_logic, info)
self._vi = None
self.url = None
self.epi_queue = None
self.filepath = None
self.savepath = None
self.quality = None
self.filename = None
self.vtt = None
self.season = 1
self.content_title = None
# Todo::: 임시주석처리
# self.make_episode_info()
self.srt_url = None
self.headers = None
# Todo::: 임시 주석 처리
self.make_episode_info()
# episode info
def refresh_status(self):
self.module_logic.socketio_callback('status', self.as_dict())
def info_dict(self, tmp):
# logger.debug('self.info::> %s', self.info)
for key, value in self.info.items():
tmp[key] = value
tmp['vtt'] = self.vtt
tmp['season'] = self.season
tmp['content_title'] = self.content_title
tmp['ohli24_info'] = self.info
tmp['epi_queue'] = self.epi_queue
return tmp
def donwload_completed(self):
db_entity = ModelOhli24Item.get_by_ohli24_id(self.info['_id'])
if db_entity is not None:
db_entity.status = 'completed'
db_entity.complated_time = datetime.now()
db_entity.save()
# Get episode info from OHLI24 site
def make_episode_info(self):
try:
# url = 'https://ohli24.net/e/' + self.info['va']
base_url = 'https://ohli24.net'
iframe_url = ''
# https://ohli24.net/e/%EB%85%B9%EC%9D%84%20%EB%A8%B9%EB%8A%94%20%EB%B9%84%EC%8A%A4%EC%BD%94%206%ED%99%94
url = self.info['va']
ourls = parse.urlparse(url)
headers = {
'referer': f'{ourls.scheme}://{ourls.netloc}',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36'
}
logger.debug('url:::> %s', url)
logger.info('self.info:::> %s', self.info)
text = requests.get(url, headers=headers).text
# logger.debug(text)
soup1 = BeautifulSoup(text, "lxml")
pattern = re.compile(r'url : \"\.\.(.*)\"')
script = soup1.find("script", text=pattern)
if script:
match = pattern.search(script.text)
if match:
iframe_url = match.group(1)
logger.info('iframe_url::> %s', iframe_url)
resp = requests.get(base_url + iframe_url, headers=headers, timeout=20).text
soup2 = BeautifulSoup(resp, 'lxml')
iframe_src = soup2.find('iframe')['src']
# print(resp1)
logger.debug('iframe_src:::> %s', iframe_src)
resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
# logger.info('resp1::>> %s', resp1)
soup3 = BeautifulSoup(resp1, "lxml")
# packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL)
s_pattern = re.compile(r'(eval.+)', re.MULTILINE | re.DOTALL)
packed_pattern = re.compile(r'if?.([^{}]+)\{.*(eval.+)\}.+else?.{.(eval.+)\}', re.DOTALL)
packed_script = soup3.find('script', text=s_pattern)
# packed_script = soup3.find('script')
# logger.info('packed_script>>> %s', packed_script.text)
unpack_script = None
if packed_script is not None:
# logger.debug('zzzzzzzzzzzz')
match = packed_pattern.search(packed_script.text)
# match = re.search(packed_pattern, packed_script.text)
# logger.debug("match::: %s", match.group())
unpack_script = jsbeautifier.beautify(match.group(3))
# logger.info('match groups:: %s', match.groups())
# logger.info('match group3:: %s', match.group(3))
# print('packed_script==>', packed_script)
logger.debug(unpack_script)
p1 = re.compile(r'(\"tracks\".*\])\,\"captions\"', re.MULTILINE | re.DOTALL)
m2 = re.search(r'(\"tracks\".*\]).*\"captions\"', unpack_script, flags=re.MULTILINE | re.DOTALL)
# print(m2.group(1))
dict_string = '{' + m2.group(1) + '}'
logger.info('dict_string::> %s', dict_string)
tracks = json.loads(dict_string)
self.srt_url = tracks['tracks'][0]['file']
logger.debug('srt_url::: %s', tracks['tracks'][0]['file'])
video_hash = iframe_src.split('/')
video_hashcode = re.sub(r'index\.php\?data=', '', video_hash[-1])
self._vi = video_hashcode
video_info_url = f'{video_hash[0]}//{video_hash[2]}/player/index.php?data={video_hashcode}&do=getVideo'
# print('hash:::', video_hash)
logger.debug('video_info_url::: %s', video_info_url)
headers = {
'referer': f'{iframe_src}',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36''Mozilla/5.0 (Macintosh; Intel '
'Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 '
'Whale/3.12.129.46 Safari/537.36',
"X-Requested-With": 'XMLHttpRequest'
}
# print(headers)
payload = {
"hash": video_hash[-1],
}
resp2 = requests.post(video_info_url, headers=headers, data=payload, timeout=20).json()
logger.debug('resp2::> %s', resp2)
hls_url = resp2['videoSource']
logger.debug('video_url::> %s', hls_url)
resp3 = requests.get(hls_url, headers=headers).text
# logger.debug(resp3)
# stream_url = hls_url.split('\n')[-1].strip()
stream_info = resp3.split('\n')[-2:]
# logger.debug('stream_url:: %s', stream_url)
logger.debug('stream_info:: %s', stream_info)
self.headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/71.0.3554.0 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36",
'Referer': 'https://ndoodle.xyz/video/03a3655fff3e9bdea48de9f49e938e32',
}
self.url = stream_info[1].strip()
match = re.compile(r'NAME="(?P<quality>.*?)"').search(stream_info[0])
self.quality = match.group('quality')
logger.info(self.quality)
match = re.compile(r'(?P<title>.*?)\s*((?P<season>\d+)%s)?\s*((?P<epi_no>\d+)%s)' % (u'', u'')).search(
self.info['title'])
if match:
self.content_title = match.group('title').strip()
if 'season' in match.groupdict() and match.group('season') is not None:
self.season = int(match.group('season'))
epi_no = int(match.group('epi_no'))
ret = '%s.S%sE%s.%s-OHNI24.mp4' % (
self.content_title, '0%s' % self.season if self.season < 10 else self.season,
'0%s' % epi_no if epi_no < 10 else epi_no, self.quality)
else:
self.content_title = self.info['title']
P.logger.debug('NOT MATCH')
ret = '%s.720p-OHNI24.mp4' % self.info['title']
# logger.info('self.content_title:: %s', self.content_title)
self.epi_queue = epi_no
self.filename = Util.change_text_for_use_filename(ret)
logger.info('self.filename::> %s', self.filename)
self.savepath = P.ModelSetting.get('ohli24_download_path')
logger.info('self.savepath::> %s', self.savepath)
# TODO: 완결 처리
if P.ModelSetting.get_bool('ohli24_auto_make_folder'):
if self.info['day'].find(u'완결') != -1:
folder_name = '%s %s' % (P.ModelSetting.get('ohli24_finished_insert'), self.content_title)
else:
folder_name = self.content_title
folder_name = Util.change_text_for_use_filename(folder_name.strip())
self.savepath = os.path.join(self.savepath, folder_name)
if P.ModelSetting.get_bool('ohli24_auto_make_season_folder'):
self.savepath = os.path.join(self.savepath, 'Season %s' % int(self.season))
self.filepath = os.path.join(self.savepath, self.filename)
if not os.path.exists(self.savepath):
os.makedirs(self.savepath)
from framework.common.util import write_file, convert_vtt_to_srt
srt_filepath = os.path.join(self.savepath, self.filename.replace('.mp4', '.ko.srt'))
if self.srt_url is not None and not os.path.exists(srt_filepath):
# vtt_data = requests.get(self.vtt, headers=headers).text
# srt_data = convert_vtt_to_srt(vtt_data)
srt_data = requests.get(self.srt_url, headers=headers).text
write_file(srt_data, srt_filepath)
except Exception as e:
P.logger.error('Exception:%s', e)
P.logger.error(traceback.format_exc())
pass
@@ -247,9 +557,9 @@ class ModelOhli24Item(db.Model):
episode_no = db.Column(db.Integer)
title = db.Column(db.String)
episode_title = db.Column(db.String)
ani365_va = db.Column(db.String)
ani365_vi = db.Column(db.String)
ani365_id = db.Column(db.String)
ohli24_va = db.Column(db.String)
ohli24_vi = db.Column(db.String)
ohli24_id = db.Column(db.String)
quality = db.Column(db.String)
filepath = db.Column(db.String)
filename = db.Column(db.String)
@@ -278,16 +588,16 @@ class ModelOhli24Item(db.Model):
db.session.commit()
@classmethod
def get_by_id(cls, id):
return db.session.query(cls).filter_by(id=id).first()
def get_by_id(cls, idx):
return db.session.query(cls).filter_by(id=idx).first()
@classmethod
def get_by_ani365_id(cls, ani365_id):
return db.session.query(cls).filter_by(ani365_id=ani365_id).first()
def get_by_ohli24_id(cls, ohli24_id):
return db.session.query(cls).filter_by(ohli24_id=ohli24_id).first()
@classmethod
def delete_by_id(cls, id):
db.session.query(cls).filter_by(id=id).delete()
def delete_by_id(cls, idx):
db.session.query(cls).filter_by(id=idx).delete()
db.session.commit()
return True
@@ -344,9 +654,9 @@ class ModelOhli24Item(db.Model):
item.episode_no = q['epi_queue']
item.title = q['content_title']
item.episode_title = q['title']
item.ani365_va = q['va']
item.ani365_vi = q['_vi']
item.ani365_id = q['_id']
item.ohli24_va = q['va']
item.ohli24_vi = q['_vi']
item.ohli24_id = q['_id']
item.quality = q['quality']
item.filepath = q['filepath']
item.filename = q['filename']
@@ -355,5 +665,5 @@ class ModelOhli24Item(db.Model):
item.vtt_url = q['vtt']
item.thumbnail = q['thumbnail']
item.status = 'wait'
item.ani365_info = q['ani365_info']
item.ohli24_info = q['ohli24_info']
item.save()