Enigma2 plugin to to play various online streams (mostly Latvian).

util.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. # -*- coding: UTF-8 -*-
  2. # /*
  3. # * Copyright (C) 2011 Libor Zoubek,ivars777
  4. # *
  5. # *
  6. # * This Program is free software; you can redistribute it and/or modify
  7. # * it under the terms of the GNU General Public License as published by
  8. # * the Free Software Foundation; either version 2, or (at your option)
  9. # * any later version.
  10. # *
  11. # * This Program is distributed in the hope that it will be useful,
  12. # * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # * GNU General Public License for more details.
  15. # *
  16. # * You should have received a copy of the GNU General Public License
  17. # * along with this program; see the file COPYING. If not, write to
  18. # * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  19. # * http://www.gnu.org/copyleft/gpl.html
  20. # *
  21. # */
  22. import os
  23. import re
  24. import sys
  25. import urllib
  26. import urllib2
  27. import traceback
  28. import cookielib
  29. import requests
  30. from htmlentitydefs import name2codepoint as n2cp
  31. import HTMLParser
  32. import StringIO
  33. #import threading
  34. #import Queue
  35. import pickle
  36. import string
  37. import simplejson as json
  38. #from demjson import demjson
  39. #import demjson
  40. import json
  41. #from bs4 import BeautifulSoup
  42. UA = 'Mozilla/6.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.5) Gecko/2008092417 Firefox/3.0.3'
  43. LOG = 2
  44. _cookie_jar = None
  45. CACHE_COOKIES = 'cookies'
  46. def play_video(streams):
  47. if len(streams)>1:
  48. for i,s in enumerate(streams):
  49. print "%s: [%s,%s,%s] %s"%(i,s["quality"],s["lang"].encode("utf8"),s["type"],s["name"])
  50. a = raw_input("Select stram to play: ")
  51. try:
  52. n = int(a)
  53. except:
  54. n = 0
  55. if n>=len(streams):
  56. stream = streams[-1]
  57. else:
  58. stream = streams[n]
  59. else:
  60. stream = streams[0]
  61. stream = stream_change(stream)
  62. title = stream["name"]
  63. url = stream["url"]
  64. suburl = ""
  65. print url
  66. if "subs" in stream and stream["subs"]:
  67. suburl = stream["subs"][0]["url"]
  68. print "\n**Download subtitles %s - %s"%(title,suburl)
  69. subs = urllib2.urlopen(suburl).read()
  70. if subs:
  71. fname0 = re.sub("[/\n\r\t,]","_",title)
  72. subext = ".srt"
  73. subfile = os.path.join("",fname0+subext)
  74. if ".xml" in suburl:
  75. subs = ttaf2srt(subs)
  76. with open(subfile,"w") as f:
  77. f.write(subs)
  78. else:
  79. print "\n Error downloading subtitle %s"%suburl
  80. return player(url,stream["name"],suburl,stream["headers"])
  81. def player(url,title="",suburl="",headers={}):
  82. from subprocess import call
  83. print "\n**Play stream %s\n%s"%(title,url.encode("utf8"))
  84. cmd1 = [r"c:\Program Files\VideoLAN\VLC\vlc.exe",url,
  85. "--meta-title",title.decode("utf8").encode(sys.getfilesystemencoding()),
  86. "--http-user-agent","Enigma2"
  87. ]
  88. # gst-launch-1.0 -v souphttpsrc ssl-strict=false proxy=127.0.0.1:8888 extra-headers="Origin:adadadasd" location="http://bitdash-a.akamaihd.net/content/sintel/sintel.mpd" ! decodebin! autovideosink
  89. cmd2 = [
  90. r"C:\gstreamer\1.0\x86_64\bin\gst-launch-1.0","-v",
  91. "playbin", 'uri="%s"'%url,
  92. #"souphttpsrc", "ssl-strict=false",
  93. #"proxy=127.0.0.1:8888",
  94. #'location="%s"'%url,
  95. #'!decodebin!autovideosink'
  96. ]
  97. cmd = cmd1 if url.startswith("https") else cmd2
  98. ret = call(cmd)
  99. #if ret:
  100. #a = raw_input("*** Error, continue")
  101. return
  102. SPLIT_CHAR = "~"
  103. SPLIT_CODE = urllib.quote(SPLIT_CHAR)
  104. EQ_CODE = urllib.quote("=")
  105. COL_CODE = urllib.quote(":")
  106. SPACE_CODE = urllib.quote(" ")
  107. PROXY_URL = "http://localhost:88/"
  108. def stream_change(stream):
  109. #return stream # TODO
  110. if stream["surl"]:
  111. if not re.search("https*://(hqq|goo.\gl)",stream["surl"]):
  112. return stream
  113. stream["url"] = streamproxy_encode(stream["url"],stream["headers"])
  114. stream["headers"] = {}
  115. return stream
  116. else:
  117. return stream
  118. def streamproxy_encode(url,headers=[]):
  119. if not "?" in url:
  120. url = url+"?"
  121. url2 = url.replace(SPLIT_CHAR,SPLIT_CODE).replace(":",COL_CODE).replace(" ",SPACE_CODE)
  122. url2 = PROXY_URL + url2
  123. if headers:
  124. headers2 = []
  125. for h in headers:
  126. headers2.append("%s=%s"%(h,headers[h].replace("=",EQ_CODE).replace(SPLIT_CHAR,SPLIT_CODE).replace(" ",SPACE_CODE)))
  127. headers2 = SPLIT_CHAR.join(headers2)
  128. url2 = url2+SPLIT_CHAR+headers2
  129. return url2
  130. def streamproxy_decode(urlp):
  131. import urlparse
  132. path = urlp.replace(re.search("http://[^/]+",urlp).group(0),"")
  133. p = path.split(SPLIT_CHAR)
  134. url = urllib.unquote(p[0][1:])
  135. #headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  136. headers={}
  137. if len(p)>1:
  138. for h in p[1:]:
  139. #h = urllib.unquote()
  140. headers[h.split("=")[0]]=urllib.unquote(h.split("=")[1])
  141. return url,headers
  142. class Captions(object):
  143. def __init__(self,uri):
  144. self.subs = []
  145. self.styles = {}
  146. if uri.startswith("http"):
  147. r = requests.get(uri)
  148. if r.status_code == 200:
  149. self.loads(r.content)
  150. def loads(self,s):
  151. if "WEBVTT" in s[:s.find("\n")]: # vtt captions
  152. self.load_vtt(s)
  153. elif "<?xml" in s[:s.find("\n")]:
  154. self.load_ttaf(s)
  155. else:
  156. self.load_vtt(s) # TODO
  157. def load_ttaf(self,s):
  158. for r2 in re.findall("<style .+?/>", s):
  159. st = {}
  160. for a in re.findall(r'(\w+)="([^ "]+)"', r2):
  161. st[a[0]] = a[1]
  162. if a[0] == "id":
  163. sid = a[1]
  164. self.styles[sid] = st
  165. for r2 in re.findall("<p .+?</p>", s):
  166. sub = {}
  167. sub["begin"] = str2sec(re.search('begin="([^"]+)"', r2).group(1)) if re.search('begin="([^"]+)"', r2) else -1
  168. sub["end"] = str2sec(re.search('end="([^"]+)"', r2).group(1)) if re.search('end="([^"]+)"', r2) else -1
  169. sub["style"] = re.search('style="([^"]+)"', r2).group(1) if re.search('style="([^"]+)"', r2) else None
  170. sub["text"] = re.search("<p[^>]+>(.+)</p>", r2).group(1).replace("\n","")
  171. sub["text"] = re.sub("<br\s*?/>","\n",sub["text"])
  172. sub["text"] = re.sub("<.+?>"," ",sub["text"])
  173. self.subs.append(sub)
  174. pass
  175. def load_vtt(self,s):
  176. f = StringIO.StringIO(s)
  177. while True:
  178. line = f.readline()
  179. if not line:
  180. break
  181. m = re.search(r"([\d\.\,:]+)\s*-->\s*([\d\.\,\:]+)",line)
  182. if m:
  183. sub = {}
  184. sub["begin"] = str2sec(m.group(1))
  185. sub["end"] = str2sec(m.group(2))
  186. sub["style"] = None
  187. sub["text"] = []
  188. line = f.readline()
  189. while line.strip():
  190. txt = line.strip()
  191. if isinstance(txt,unicode):
  192. txt = txt.encode("utf8")
  193. sub["text"].append(txt)
  194. line = f.readline()
  195. sub["text"] = "\n".join(sub["text"])
  196. self.subs.append(sub)
  197. else:
  198. continue
  199. pass
  200. def str2sec(r):
  201. # Convert str time to miliseconds
  202. r= r.replace(",",".")
  203. m = re.search(r"(\d+\:)*(\d+)\:(\d+\.\d+)", r)
  204. if m:
  205. sec = int(m.group(1)[:-1])*60*60*1000 if m.group(1) else 0
  206. sec += int(m.group(2))*60*1000 + int(float(m.group(3))*1000)
  207. return sec
  208. else:
  209. return -1
  210. #c = Captions("http://195.13.216.2/mobile-vod/mp4:lb_barbecue_fr_lq.mp4/lb_barbecue_lv.vtt")
  211. #c = Captions("http://www.bbc.co.uk/iplayer/subtitles/ng/modav/bUnknown-0edd6227-0f38-411c-8d46-fa033c4c61c1_b05ql1s3_1479853893356.xml")
  212. #url = "http://195.13.216.2/mobile-vod/mp4:ac_now_you_see_me_2_en_lq.mp4/ac_now_you_see_me_2_lv.vtt"
  213. #c = Captions(url)
  214. #pass
  215. def ttaf2srt(s):
  216. out = u""
  217. i = 0
  218. for p,txt in re.findall("<p ([^>]+)>(.+?)</p>", s, re.DOTALL):
  219. i +=1
  220. begin = re.search('begin="(.+?)"',p).group(1)
  221. begin = begin.replace(".",",")
  222. end = re.search('end="(.+?)"',p).group(1)
  223. end = end.replace(".",",")
  224. txt2 = re.sub("<br */>","\n",txt)
  225. out += "%s\n%s --> %s\n%s\n\n"%(i,begin,end,txt2)
  226. return out
  227. def item():
  228. stream0 = {
  229. 'name': '',
  230. 'url': '',
  231. 'quality': '?',
  232. 'surl': '',
  233. 'subs': [],
  234. 'headers': {},
  235. "desc":"","img":"",
  236. "lang":"",
  237. "type":"",
  238. "resolver":"",
  239. "order":0
  240. }
  241. return stream0
  242. class _StringCookieJar(cookielib.LWPCookieJar):
  243. def __init__(self, string=None, filename=None, delayload=False, policy=None):
  244. cookielib.LWPCookieJar.__init__(self, filename, delayload, policy)
  245. if string and len(string) > 0:
  246. self._cookies = pickle.loads(str(string))
  247. def dump(self):
  248. return pickle.dumps(self._cookies)
  249. def init_urllib(cache=None):
  250. """
  251. Initializes urllib cookie handler
  252. """
  253. global _cookie_jar
  254. data = None
  255. if cache is not None:
  256. data = cache.get(CACHE_COOKIES)
  257. _cookie_jar = _StringCookieJar(data)
  258. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(_cookie_jar))
  259. urllib2.install_opener(opener)
  260. def cache_cookies(cache):
  261. """
  262. Saves cookies to cache
  263. """
  264. global _cookie_jar
  265. if _cookie_jar:
  266. cache.set(CACHE_COOKIES, _cookie_jar.dump())
  267. def request0(url, headers={}):
  268. debug('request: %s' % url)
  269. req = urllib2.Request(url, headers=headers)
  270. req.add_header('User-Agent', UA)
  271. try:
  272. response = urllib2.urlopen(req)
  273. data = response.read()
  274. response.close()
  275. except urllib2.HTTPError, error:
  276. data = error.read()
  277. debug('len(data) %s' % len(data))
  278. return data
  279. def request(url, headers={}):
  280. debug('request: %s' % url)
  281. #req = urllib2.Request(url, headers=headers)
  282. #req.add_header('User-Agent', UA)
  283. if 'User-Agent' not in headers:
  284. headers['User-Agent']= UA
  285. try:
  286. r = requests.get(url, headers=headers)
  287. data = r.content
  288. except:
  289. data = r.content
  290. debug('len(data) %s' % len(data))
  291. return data
  292. def post(url, data, headers={}):
  293. postdata = urllib.urlencode(data)
  294. #req = urllib2.Request(url, postdata, headers)
  295. #req.add_header('User-Agent', UA)
  296. import requests
  297. if 'User-Agent' not in headers:
  298. headers['User-Agent']= UA
  299. try:
  300. r = requests.post(url, data=postdata,headers=headers)
  301. data = r.content
  302. except urllib2.HTTPError, error:
  303. data = r.content
  304. return data
  305. def post0(url, data, headers={}):
  306. postdata = urllib.urlencode(data)
  307. req = urllib2.Request(url, postdata, headers)
  308. req.add_header('User-Agent', UA)
  309. try:
  310. response = urllib2.urlopen(req)
  311. data = response.read()
  312. response.close()
  313. except urllib2.HTTPError, error:
  314. data = error.read()
  315. return data
  316. def post_json(url, data, headers={}):
  317. postdata = json.dumps(data)
  318. headers['Content-Type'] = 'application/json'
  319. req = urllib2.Request(url, postdata, headers)
  320. req.add_header('User-Agent', UA)
  321. response = urllib2.urlopen(req)
  322. data = response.read()
  323. response.close()
  324. return data
  325. #def run_parallel_in_threads(target, args_list):
  326. #result = Queue.Queue()
  327. ## wrapper to collect return value in a Queue
  328. #def task_wrapper(*args):
  329. #result.put(target(*args))
  330. #threads = [threading.Thread(target=task_wrapper, args=args) for args in args_list]
  331. #for t in threads:
  332. #t.start()
  333. #for t in threads:
  334. #t.join()
  335. #return result
  336. def substr(data, start, end):
  337. i1 = data.find(start)
  338. i2 = data.find(end, i1)
  339. return data[i1:i2]
  340. def save_to_file(url, file):
  341. try:
  342. return save_data_to_file(request(url), file)
  343. except:
  344. traceback.print_exc()
  345. def save_data_to_file(data, file):
  346. try:
  347. f = open(file, 'wb')
  348. f.write(data)
  349. f.close()
  350. info('File %s saved' % file)
  351. return True
  352. except:
  353. traceback.print_exc()
  354. def read_file(file):
  355. if not os.path.exists(file):
  356. return ''
  357. f = open(file, 'r')
  358. data = f.read()
  359. f.close()
  360. return data
  361. def _substitute_entity(match):
  362. ent = match.group(3)
  363. if match.group(1) == '#':
  364. # decoding by number
  365. if match.group(2) == '':
  366. # number is in decimal
  367. return unichr(int(ent))
  368. elif match.group(2) == 'x':
  369. # number is in hex
  370. return unichr(int('0x' + ent, 16))
  371. else:
  372. # they were using a name
  373. cp = n2cp.get(ent)
  374. if cp:
  375. return unichr(cp)
  376. else:
  377. return match.group()
  378. def decode_html(data):
  379. if not type(data) == str:
  380. return data
  381. try:
  382. if not type(data) == unicode:
  383. data = unicode(data, 'utf-8', errors='ignore')
  384. entity_re = re.compile(r'&(#?)(x?)(\w+);')
  385. return entity_re.subn(_substitute_entity, data)[0]
  386. except:
  387. traceback.print_exc()
  388. print[data]
  389. return data
  390. def unescape(s0):
  391. #s2 = re.sub("&#\w+;",HTMLParser.HTMLParser().unescape("\1"),s)
  392. s0 = s0.replace("&amp;","&")
  393. for s in re.findall("&#\w+;",s0):
  394. s2 = HTMLParser.HTMLParser().unescape(s)
  395. if isinstance(s0,str):
  396. s2 = s2.encode("utf8")
  397. s0 = s0.replace(s,s2)
  398. pass
  399. return s0
  400. def debug(text):
  401. if LOG > 1:
  402. print('[DEBUG] ' + str([text]))
  403. def info(text):
  404. if LOG > 0:
  405. print('[INFO] ' + str([text]))
  406. def error(text):
  407. print('[ERROR] ' + str([text]))
  408. _diacritic_replace = {u'\u00f3': 'o',
  409. u'\u0213': '-',
  410. u'\u00e1': 'a',
  411. u'\u010d': 'c',
  412. u'\u010c': 'C',
  413. u'\u010f': 'd',
  414. u'\u010e': 'D',
  415. u'\u00e9': 'e',
  416. u'\u011b': 'e',
  417. u'\u00ed': 'i',
  418. u'\u0148': 'n',
  419. u'\u0159': 'r',
  420. u'\u0161': 's',
  421. u'\u0165': 't',
  422. u'\u016f': 'u',
  423. u'\u00fd': 'y',
  424. u'\u017e': 'z',
  425. u'\xed': 'i',
  426. u'\xe9': 'e',
  427. u'\xe1': 'a',
  428. }
  429. def replace_diacritic(string):
  430. ret = []
  431. for char in string:
  432. if char in _diacritic_replace:
  433. ret.append(_diacritic_replace[char])
  434. else:
  435. ret.append(char)
  436. return ''.join(ret)
  437. def params(url=None):
  438. if not url:
  439. url = sys.argv[2]
  440. param = {}
  441. paramstring = url
  442. if len(paramstring) >= 2:
  443. params = url
  444. cleanedparams = params.replace('?', '')
  445. if (params[len(params) - 1] == '/'):
  446. params = params[0:len(params) - 2]
  447. pairsofparams = cleanedparams.split('&')
  448. param = {}
  449. for i in range(len(pairsofparams)):
  450. splitparams = {}
  451. splitparams = pairsofparams[i].split('=')
  452. if (len(splitparams)) == 2:
  453. param[splitparams[0]] = splitparams[1]
  454. for p in param.keys():
  455. param[p] = param[p].decode('hex')
  456. return param
  457. def int_to_base(number, base):
  458. digs = string.digits + string.letters
  459. if number < 0:
  460. sign = -1
  461. elif number == 0:
  462. return digs[0]
  463. else:
  464. sign = 1
  465. number *= sign
  466. digits = []
  467. while number:
  468. digits.append(digs[number % base])
  469. number /= base
  470. if sign < 0:
  471. digits.append('-')
  472. digits.reverse()
  473. return ''.join(digits)
  474. def extract_jwplayer_setup(data):
  475. """
  476. Extracts jwplayer setup configuration and returns it as a dictionary.
  477. :param data: A string to extract the setup from
  478. :return: A dictionary containing the setup configuration
  479. """
  480. data = re.search(r'<script.+?}\(\'(.+)\',\d+,\d+,\'([\w\|]+)\'.*</script>', data, re.I | re.S)
  481. if data:
  482. replacements = data.group(2).split('|')
  483. data = data.group(1)
  484. for i in reversed(range(len(replacements))):
  485. if len(replacements[i]) > 0:
  486. data = re.sub(r'\b%s\b' % int_to_base(i, 36), replacements[i], data)
  487. data = re.search(r'\.setup\(([^\)]+?)\);', data)
  488. if data:
  489. return json.loads(data.group(1).decode('string_escape'))
  490. #return demjson.decode(data.group(1).decode('string_escape')) ### III
  491. return None
  492. #def parse_html(url):
  493. # return BeautifulSoup(request(url), 'html5lib', from_encoding='utf-8')
  494. if __name__ == "__main__":
  495. s = 'B\xc4\x93thovena D\xc4\x81rgumu Taka (2014)/Beethoven&#x27;s Treasure [LV]'
  496. #s = s.decode("utf8")
  497. #s=unescape(s)
  498. #url = "http://localhost:88/https://walterebert.com/playground/video/hls/ts/480x270.m3u8?token=xxxx~User-Agent=Enigma2~Cookie=xxxxx"
  499. url = "http://hyt4d6.vkcache.com/secip/0/UMQ3q2gNjTlOPnEVm3iTiA/ODAuMjMyLjI0MC42/1479610800/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8http://hyt4d6.vkcache.com/secip/0/Y-ZA1qRm8toplc0dN_L6_w/ODAuMjMyLjI0MC42/1479654000/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8"
  500. headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  501. urlp = streamproxy_encode(url,headers)
  502. print urlp
  503. player(urlp)
  504. pass