#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2022/02/08 3:44 PM # @Author : yommi # @Site : # @File : logic_ohli24 # @Software: PyCharm import os, sys, traceback, re, json, threading from datetime import datetime import copy import hashlib # third-party import requests from lxml import html from urllib import parse # third-party from flask import request, render_template, jsonify from sqlalchemy import or_, and_, func, not_, desc from pip._internal import main pkgs = ['beautifulsoup4', 'jsbeautifier'] for pkg in pkgs: try: import pkg except ImportError: main(['install', pkg]) from bs4 import BeautifulSoup import jsbeautifier # sjva 공용 from framework import db, scheduler, path_data, socketio from framework.util import Util from framework.common.util import headers from plugin import LogicModuleBase, FfmpegQueueEntity, FfmpegQueue, default_route_socketio from tool_base import d # 패키지 from .plugin import P logger = P.logger ######################################################### class LogicOhli24(LogicModuleBase): db_default = { 'ohli24_db_version': '1', 'ohli24_url': 'https://ohli24.net', 'ohli24_download_path': os.path.join(path_data, P.package_name, 'ohli24'), 'ohli24_auto_make_folder': 'True', 'ohli24_auto_make_season_folder': 'True', 'ohli24_finished_insert': u'[완결]', 'ohli24_max_ffmpeg_process_count': '1', 'ohli24_order_desc': 'False', 'ohli24_auto_start': 'False', 'ohli24_interval': '* 5 * * *', 'ohli24_auto_mode_all': 'False', 'ohli24_auto_code_list': 'all', 'ohli24_current_code': '', 'ohli24_uncompleted_auto_enqueue': 'False', 'ohli24_image_url_prefix_series': 'https://www.jetcloud.cc/series/', 'ohli24_image_url_prefix_episode': 'https://www.jetcloud-list.cc/thumbnail/', } current_headers = None current_data = None session = requests.Session() headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/71.0.3578.98 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', 'Referer': '' } useragent = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, ' 'like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36'} def __init__(self, P): super(LogicOhli24, self).__init__(P, 'setting', scheduler_desc='ani365 자동 다운로드') self.name = 'ohli24' self.queue = None default_route_socketio(P, self) @staticmethod def db_init(): pass # try: # for key, value in P.Logic.db_default.items(): # if db.session.query(ModelSetting).filter_by(key=key).count() == 0: # db.session.add(ModelSetting(key, value)) # db.session.commit() # except Exception as e: # logger.error('Exception:%s', e) # logger.error(traceback.format_exc()) def process_menu(self, sub, req): arg = P.ModelSetting.to_dict() arg['sub'] = self.name if sub in ['setting', 'queue', 'list', 'request']: if sub == 'request' and req.args.get('content_code') is not None: arg['ohli24_current_code'] = req.args.get('content_code') if sub == 'setting': job_id = '%s_%s' % (self.P.package_name, self.name) arg['scheduler'] = str(scheduler.is_include(job_id)) arg['is_running'] = str(scheduler.is_running(job_id)) return render_template( '{package_name}_{module_name}_{sub}.html'.format(package_name=P.package_name, module_name=self.name, sub=sub), arg=arg) return render_template('sample.html', title='%s - %s' % (P.package_name, sub)) # @staticmethod def process_ajax(self, sub, req): try: if sub == 'analysis': # code = req.form['code'] code = request.form['code'] data = [] # print(code) # logger.info("code::: %s", code) P.ModelSetting.set('ohli24_current_code', code) data = self.get_series_info(code) self.current_data = data return jsonify({'ret': 'success', 'data': data, 'code': code}) elif sub == 'add_queue': ret = {} info = json.loads(request.form['data']) logger.info('info:: %s', info) ret['ret'] = self.add(info) return jsonify(ret) elif sub == 'entity_list': return jsonify(self.queue.get_entity_list()) elif sub == 'queue_command': ret = self.queue.command(req.form['command'], int(req.form['entity_id'])) return jsonify(ret) elif sub == 'add_queue_checked_list': data = json.loads(request.form['data']) def func(): count = 0 for tmp in data: add_ret = self.add(tmp) if add_ret.startswith('enqueue'): self.socketio_callback('list_refresh', '') count += 1 notify = {'type':'success', 'msg' : u'%s 개의 에피소드를 큐에 추가 하였습니다.' % count} socketio.emit("notify", notify, namespace='/framework', broadcast=True) thread = threading.Thread(target=func, args=()) thread.daemon = True thread.start() return jsonify('') elif sub == 'web_list': return jsonify(ModelOhli24Item.web_list(request)) elif sub == 'db_remove': return jsonify(ModelOhli24Item.delete_by_id(req.form['id'])) except Exception as e: P.logger.error('Exception:%s', e) P.logger.error(traceback.format_exc()) def setting_save_after(self): if self.queue.get_max_ffmpeg_count() != P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count'): self.queue.set_max_ffmpeg_count(P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count')) def get_series_info(self, code): try: if self.current_data is not None and 'code' in self.current_data and self.current_data['code'] == code: return self.current_data if code.startswith('http'): code = code.split('c/')[1] logger.info(f'code:::: {code}') url = P.ModelSetting.get('ohli24_url') + '/c/' + code logger.debug('url:::> %s', url) # self.current_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) # AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36', # 'Referer': url } response_data = LogicOhli24.get_html(url, timeout=10) tree = html.fromstring(response_data) title = tree.xpath('//div[@class="view-title"]/h1/text()')[0] # image = tree.xpath('//div[@class="view-info"]/div[@class="image"]/div/img')[0]['src'] image = tree.xpath('//div[@class="image"]/div/img/@src')[0] image = image.replace('..', P.ModelSetting.get('ohli24_url')) des_items = tree.xpath('//div[@class="list"]/p') des = {} des_key = ['_otit', '_dir', '_pub', '_tag', '_classifi', '_country', '_grade'] description_dict = { '원제': '_otit', '원작': '_org', '감독': '_dir', '각본': '_scr', '캐릭터 디자인': '_character_design', '음악': '_sound', '제작사': '_pub', '장르': '_tag', '분류': '_classifi', '제작국가': '_country', '방영일': '_date', '등급': '_grade' } list_body_li = tree.xpath('//ul[@class="list-body"]/li') logger.info(list_body_li) episodes = [] vi = None for li in list_body_li: title = li.xpath('.//a/text()')[0].strip() thumbnail = image logger.info(li.xpath('//a[@class="item-subject"]/@href')) link = P.ModelSetting.get('ohli24_url') + li.xpath('//a[@class="item-subject"]/@href')[0] date = li.xpath('.//div[@class="wr-date"]/text()')[0] m = hashlib.md5(title.encode('utf-8')) # _vi = hashlib.md5(title.encode('utf-8').hexdigest()) logger.info(m.hexdigest()) _vi = m.hexdigest() episodes.append({ 'title': title, 'link': link, 'thumbnail': image, 'date': date, 'day': date, '_id': title, 'va': link, '_vi': _vi, 'content_code': code, }) logger.info('des_items length:: %s', len(des_items)) for idx, item in enumerate(des_items): # key = des_key[idx] span = item.xpath('.//span//text()') logger.info(span) key = description_dict[span[0]] des[key] = item.xpath('.//span/text()')[1] logger.info(f'des::>> {des}') image = image.replace('..', P.ModelSetting.get('ohli24_url')) logger.info('images:: %s', image) logger.info('title:: %s', title) ser_description = tree.xpath('//div[@class="view-stocon"]/div[@class="c"]/text()') data = { 'title': title, 'image': image, 'date': '2022.01.11 00:30 (화)', 'ser_description': ser_description, 'des': des, 'episode': episodes } if P.ModelSetting.get_bool('ohli24_order_desc'): data['episode'] = list(reversed(data['episode'])) data['list_order'] = 'desc' return data # logger.info(response_text) except Exception as e: P.logger.error('Exception:%s', e) P.logger.error(traceback.format_exc()) return {'ret': 'exception', 'log': str(e)} # @staticmethod def plugin_load(self): try: logger.debug('%s plugin_load', P.package_name) self.queue = FfmpegQueue(P, P.ModelSetting.get_int('ohli24_max_ffmpeg_process_count')) self.current_data = None self.queue.queue_start() except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) @staticmethod def plugin_unload(): try: logger.debug('%s plugin_unload', P.package_name) scheduler.remove_job('%s_recent' % P.package_name) except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) @staticmethod def reset_db() -> bool: db.session.query(ModelOhli24Item).delete() db.session.commit() return True @staticmethod def get_html(url, referer=None, stream=False, timeout=5): data = '' try: if LogicOhli24.session is None: LogicOhli24.session = requests.session() # logger.debug('get_html :%s', url) headers['Referer'] = '' if referer is None else referer page_content = LogicOhli24.session.get(url, headers=headers, timeout=timeout) data = page_content.text except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) return data ######################################################### def add(self, episode_info): if self.is_exist(episode_info): return 'queue_exist' else: db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info['_id']) logger.debug('db_entity:::> %s', db_entity) if db_entity is None: entity = Ohli24QueueEntity(P, self, episode_info) logger.debug('entity:::> %s', entity.as_dict()) ModelOhli24Item.append(entity.as_dict()) logger.debug('entity:: type >> %s', type(entity)) self.queue.add_queue(entity) return 'enqueue_db_append' elif db_entity.status != 'completed': entity = Ohli24QueueEntity(P, self, episode_info) self.queue.add_queue(entity) return 'enqueue_db_exist' else: return 'db_completed' def is_exist(self, info): for e in self.queue.entity_list: if e.info['_id'] == info['_id']: return True return False class Ohli24QueueEntity(FfmpegQueueEntity): def __init__(self, P, module_logic, info): super(Ohli24QueueEntity, self).__init__(P, module_logic, info) self._vi = None self.url = None self.epi_queue = None self.filepath = None self.savepath = None self.quality = None self.filename = None self.vtt = None self.season = 1 self.content_title = None self.srt_url = None self.headers = None # Todo::: 임시 주석 처리 self.make_episode_info() def refresh_status(self): self.module_logic.socketio_callback('status', self.as_dict()) def info_dict(self, tmp): # logger.debug('self.info::> %s', self.info) for key, value in self.info.items(): tmp[key] = value tmp['vtt'] = self.vtt tmp['season'] = self.season tmp['content_title'] = self.content_title tmp['ohli24_info'] = self.info tmp['epi_queue'] = self.epi_queue return tmp def donwload_completed(self): db_entity = ModelOhli24Item.get_by_ohli24_id(self.info['_id']) if db_entity is not None: db_entity.status = 'completed' db_entity.complated_time = datetime.now() db_entity.save() # Get episode info from OHLI24 site def make_episode_info(self): try: # url = 'https://ohli24.net/e/' + self.info['va'] base_url = 'https://ohli24.net' iframe_url = '' # https://ohli24.net/e/%EB%85%B9%EC%9D%84%20%EB%A8%B9%EB%8A%94%20%EB%B9%84%EC%8A%A4%EC%BD%94%206%ED%99%94 url = self.info['va'] ourls = parse.urlparse(url) headers = { 'referer': f'{ourls.scheme}://{ourls.netloc}', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36' } logger.debug('url:::> %s', url) logger.info('self.info:::> %s', self.info) text = requests.get(url, headers=headers).text # logger.debug(text) soup1 = BeautifulSoup(text, "lxml") pattern = re.compile(r'url : \"\.\.(.*)\"') script = soup1.find("script", text=pattern) if script: match = pattern.search(script.text) if match: iframe_url = match.group(1) logger.info('iframe_url::> %s', iframe_url) resp = requests.get(base_url + iframe_url, headers=headers, timeout=20).text soup2 = BeautifulSoup(resp, 'lxml') iframe_src = soup2.find('iframe')['src'] # print(resp1) logger.debug('iframe_src:::> %s', iframe_src) resp1 = requests.get(iframe_src, headers=headers, timeout=600).text # logger.info('resp1::>> %s', resp1) soup3 = BeautifulSoup(resp1, "lxml") # packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL) s_pattern = re.compile(r'(eval.+)', re.MULTILINE | re.DOTALL) packed_pattern = re.compile(r'if?.([^{}]+)\{.*(eval.+)\}.+else?.{.(eval.+)\}', re.DOTALL) packed_script = soup3.find('script', text=s_pattern) # packed_script = soup3.find('script') # logger.info('packed_script>>> %s', packed_script.text) unpack_script = None if packed_script is not None: # logger.debug('zzzzzzzzzzzz') match = packed_pattern.search(packed_script.text) # match = re.search(packed_pattern, packed_script.text) # logger.debug("match::: %s", match.group()) unpack_script = jsbeautifier.beautify(match.group(3)) # logger.info('match groups:: %s', match.groups()) # logger.info('match group3:: %s', match.group(3)) # print('packed_script==>', packed_script) logger.debug(unpack_script) p1 = re.compile(r'(\"tracks\".*\])\,\"captions\"', re.MULTILINE | re.DOTALL) m2 = re.search(r'(\"tracks\".*\]).*\"captions\"', unpack_script, flags=re.MULTILINE | re.DOTALL) # print(m2.group(1)) dict_string = '{' + m2.group(1) + '}' logger.info('dict_string::> %s', dict_string) tracks = json.loads(dict_string) self.srt_url = tracks['tracks'][0]['file'] logger.debug('srt_url::: %s', tracks['tracks'][0]['file']) video_hash = iframe_src.split('/') video_hashcode = re.sub(r'index\.php\?data=', '', video_hash[-1]) self._vi = video_hashcode video_info_url = f'{video_hash[0]}//{video_hash[2]}/player/index.php?data={video_hashcode}&do=getVideo' # print('hash:::', video_hash) logger.debug('video_info_url::: %s', video_info_url) headers = { 'referer': f'{iframe_src}', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36''Mozilla/5.0 (Macintosh; Intel ' 'Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 ' 'Whale/3.12.129.46 Safari/537.36', "X-Requested-With": 'XMLHttpRequest' } # print(headers) payload = { "hash": video_hash[-1], } resp2 = requests.post(video_info_url, headers=headers, data=payload, timeout=20).json() logger.debug('resp2::> %s', resp2) hls_url = resp2['videoSource'] logger.debug('video_url::> %s', hls_url) resp3 = requests.get(hls_url, headers=headers).text # logger.debug(resp3) # stream_url = hls_url.split('\n')[-1].strip() stream_info = resp3.split('\n')[-2:] # logger.debug('stream_url:: %s', stream_url) logger.debug('stream_info:: %s', stream_info) self.headers = { 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/71.0.3554.0 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36", 'Referer': 'https://ndoodle.xyz/video/03a3655fff3e9bdea48de9f49e938e32', } self.url = stream_info[1].strip() match = re.compile(r'NAME="(?P.*?)"').search(stream_info[0]) self.quality = match.group('quality') logger.info(self.quality) match = re.compile(r'(?P.*?)\s*((?P<season>\d+)%s)?\s*((?P<epi_no>\d+)%s)' % (u'기', u'화')).search( self.info['title']) if match: self.content_title = match.group('title').strip() if 'season' in match.groupdict() and match.group('season') is not None: self.season = int(match.group('season')) epi_no = int(match.group('epi_no')) ret = '%s.S%sE%s.%s-OHNI24.mp4' % ( self.content_title, '0%s' % self.season if self.season < 10 else self.season, '0%s' % epi_no if epi_no < 10 else epi_no, self.quality) else: self.content_title = self.info['title'] P.logger.debug('NOT MATCH') ret = '%s.720p-OHNI24.mp4' % self.info['title'] # logger.info('self.content_title:: %s', self.content_title) self.epi_queue = epi_no self.filename = Util.change_text_for_use_filename(ret) logger.info('self.filename::> %s', self.filename) self.savepath = P.ModelSetting.get('ohli24_download_path') logger.info('self.savepath::> %s', self.savepath) # TODO: 완결 처리 if P.ModelSetting.get_bool('ohli24_auto_make_folder'): if self.info['day'].find(u'완결') != -1: folder_name = '%s %s' % (P.ModelSetting.get('ohli24_finished_insert'), self.content_title) else: folder_name = self.content_title folder_name = Util.change_text_for_use_filename(folder_name.strip()) self.savepath = os.path.join(self.savepath, folder_name) if P.ModelSetting.get_bool('ohli24_auto_make_season_folder'): self.savepath = os.path.join(self.savepath, 'Season %s' % int(self.season)) self.filepath = os.path.join(self.savepath, self.filename) if not os.path.exists(self.savepath): os.makedirs(self.savepath) from framework.common.util import write_file, convert_vtt_to_srt srt_filepath = os.path.join(self.savepath, self.filename.replace('.mp4', '.ko.srt')) if self.srt_url is not None and not os.path.exists(srt_filepath): # vtt_data = requests.get(self.vtt, headers=headers).text # srt_data = convert_vtt_to_srt(vtt_data) srt_data = requests.get(self.srt_url, headers=headers).text write_file(srt_data, srt_filepath) except Exception as e: P.logger.error('Exception:%s', e) P.logger.error(traceback.format_exc()) pass class ModelOhli24Item(db.Model): __tablename__ = '{package_name}_ohli24_item'.format(package_name=P.package_name) __table_args__ = {'mysql_collate': 'utf8_general_ci'} __bind_key__ = P.package_name id = db.Column(db.Integer, primary_key=True) created_time = db.Column(db.DateTime) completed_time = db.Column(db.DateTime) reserved = db.Column(db.JSON) content_code = db.Column(db.String) season = db.Column(db.Integer) episode_no = db.Column(db.Integer) title = db.Column(db.String) episode_title = db.Column(db.String) ohli24_va = db.Column(db.String) ohli24_vi = db.Column(db.String) ohli24_id = db.Column(db.String) quality = db.Column(db.String) filepath = db.Column(db.String) filename = db.Column(db.String) savepath = db.Column(db.String) video_url = db.Column(db.String) vtt_url = db.Column(db.String) thumbnail = db.Column(db.String) status = db.Column(db.String) ohli24_info = db.Column(db.JSON) def __init__(self): self.created_time = datetime.now() def __repr__(self): return repr(self.as_dict()) def as_dict(self): ret = {x.name: getattr(self, x.name) for x in self.__table__.columns} ret['created_time'] = self.created_time.strftime('%Y-%m-%d %H:%M:%S') ret['completed_time'] = self.completed_time.strftime( '%Y-%m-%d %H:%M:%S') if self.completed_time is not None else None return ret def save(self): db.session.add(self) db.session.commit() @classmethod def get_by_id(cls, idx): return db.session.query(cls).filter_by(id=idx).first() @classmethod def get_by_ohli24_id(cls, ohli24_id): return db.session.query(cls).filter_by(ohli24_id=ohli24_id).first() @classmethod def delete_by_id(cls, idx): db.session.query(cls).filter_by(id=idx).delete() db.session.commit() return True @classmethod def web_list(cls, req): ret = {} page = int(req.form['page']) if 'page' in req.form else 1 page_size = 30 job_id = '' search = req.form['search_word'] if 'search_word' in req.form else '' option = req.form['option'] if 'option' in req.form else 'all' order = req.form['order'] if 'order' in req.form else 'desc' query = cls.make_query(search=search, order=order, option=option) count = query.count() query = query.limit(page_size).offset((page - 1) * page_size) lists = query.all() ret['list'] = [item.as_dict() for item in lists] ret['paging'] = Util.get_paging_info(count, page, page_size) return ret @classmethod def make_query(cls, search='', order='desc', option='all'): query = db.session.query(cls) if search is not None and search != '': if search.find('|') != -1: tmp = search.split('|') conditions = [] for tt in tmp: if tt != '': conditions.append(cls.filename.like('%' + tt.strip() + '%')) query = query.filter(or_(*conditions)) elif search.find(',') != -1: tmp = search.split(',') for tt in tmp: if tt != '': query = query.filter(cls.filename.like('%' + tt.strip() + '%')) else: query = query.filter(cls.filename.like('%' + search + '%')) if option == 'completed': query = query.filter(cls.status == 'completed') query = query.order_by(desc(cls.id)) if order == 'desc' else query.order_by(cls.id) return query @classmethod def get_list_uncompleted(cls): return db.session.query(cls).filter(cls.status != 'completed').all() @classmethod def append(cls, q): item = ModelOhli24Item() item.content_code = q['content_code'] item.season = q['season'] item.episode_no = q['epi_queue'] item.title = q['content_title'] item.episode_title = q['title'] item.ohli24_va = q['va'] item.ohli24_vi = q['_vi'] item.ohli24_id = q['_id'] item.quality = q['quality'] item.filepath = q['filepath'] item.filename = q['filename'] item.savepath = q['savepath'] item.video_url = q['url'] item.vtt_url = q['vtt'] item.thumbnail = q['thumbnail'] item.status = 'wait' item.ohli24_info = q['ohli24_info'] item.save()