Enigma2 plugin to to play various online streams (mostly Latvian).

util.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. # -*- coding: UTF-8 -*-
  2. # /*
  3. # * Copyright (C) 2011 Libor Zoubek,ivars777
  4. # *
  5. # *
  6. # * This Program is free software; you can redistribute it and/or modify
  7. # * it under the terms of the GNU General Public License as published by
  8. # * the Free Software Foundation; either version 2, or (at your option)
  9. # * any later version.
  10. # *
  11. # * This Program is distributed in the hope that it will be useful,
  12. # * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # * GNU General Public License for more details.
  15. # *
  16. # * You should have received a copy of the GNU General Public License
  17. # * along with this program; see the file COPYING. If not, write to
  18. # * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  19. # * http://www.gnu.org/copyleft/gpl.html
  20. # *
  21. # */
  22. import os
  23. import re
  24. import sys
  25. import urllib
  26. import urllib2
  27. import traceback
  28. import cookielib
  29. import requests
  30. from htmlentitydefs import name2codepoint as n2cp
  31. import HTMLParser
  32. import StringIO
  33. #import threading
  34. #import Queue
  35. import pickle
  36. import string
  37. import simplejson as json
  38. #from demjson import demjson
  39. #import demjson
  40. import json
  41. #from bs4 import BeautifulSoup
  42. UA = 'Mozilla/6.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.5) Gecko/2008092417 Firefox/3.0.3'
  43. LOG = 2
  44. _cookie_jar = None
  45. CACHE_COOKIES = 'cookies'
  46. def play_video(streams):
  47. if len(streams)>1:
  48. for i,s in enumerate(streams):
  49. print "%s: [%s,%s,%s] %s"%(i,s["quality"],s["lang"].encode("utf8"),s["type"],s["name"])
  50. a = raw_input("Select stram to play: ")
  51. try:
  52. n = int(a)
  53. except:
  54. n = 0
  55. if n>=len(streams):
  56. stream = streams[-1]
  57. else:
  58. stream = streams[n]
  59. else:
  60. stream = streams[0]
  61. stream = stream_change(stream)
  62. title = stream["name"]
  63. url = stream["url"]
  64. suburl = ""
  65. print url
  66. if "subs" in stream and stream["subs"]:
  67. suburl = stream["subs"][0]["url"]
  68. print "\n**Download subtitles %s - %s"%(title,suburl)
  69. subs = urllib2.urlopen(suburl).read()
  70. if subs:
  71. fname0 = re.sub("[/\n\r\t,]","_",title)
  72. subext = ".srt"
  73. subfile = os.path.join("",fname0+subext)
  74. if ".xml" in suburl:
  75. subs = ttaf2srt(subs)
  76. with open(subfile,"w") as f:
  77. f.write(subs)
  78. else:
  79. print "\n Error downloading subtitle %s"%suburl
  80. return player(url,stream["name"],suburl,stream["headers"])
  81. def player(url,title="",suburl="",headers={}):
  82. from subprocess import call
  83. print "\n**Play stream %s\n%s"%(title,url.encode("utf8"))
  84. cmd1 = [r"c:\Program Files\VideoLAN\VLC\vlc.exe",url,
  85. "--meta-title",title.decode("utf8").encode(sys.getfilesystemencoding()),
  86. "--http-user-agent","Enigma2"
  87. ]
  88. # gst-launch-1.0 -v souphttpsrc ssl-strict=false proxy=127.0.0.1:8888 extra-headers="Origin:adadadasd" location="http://bitdash-a.akamaihd.net/content/sintel/sintel.mpd" ! decodebin! autovideosink
  89. cmd2 = [
  90. r"C:\gstreamer\1.0\x86_64\bin\gst-launch-1.0","-v",
  91. "playbin", 'uri="%s"'%url,
  92. #"souphttpsrc", "ssl-strict=false",
  93. #"proxy=127.0.0.1:8888",
  94. #'location="%s"'%url,
  95. #'!decodebin!autovideosink'
  96. ]
  97. cmd = cmd1 if url.startswith("https") else cmd2
  98. ret = call(cmd)
  99. #if ret:
  100. #a = raw_input("*** Error, continue")
  101. return
  102. SPLIT_CHAR = "~"
  103. SPLIT_CODE = urllib.quote(SPLIT_CHAR)
  104. EQ_CODE = urllib.quote("=")
  105. COL_CODE = urllib.quote(":")
  106. SPACE_CODE = urllib.quote(" ")
  107. PROXY_URL = "http://localhost:88/"
  108. def stream_change(stream):
  109. #return stream # TODO
  110. if stream["surl"]:
  111. if not re.search("https*://(hqq|goo.\gl)",stream["surl"]):
  112. return stream
  113. stream["url"] = streamproxy_encode(stream["url"],stream["headers"])
  114. stream["headers"] = {}
  115. return stream
  116. else:
  117. return stream
  118. def streamproxy_encode(url,headers=[]):
  119. if not "?" in url:
  120. url = url+"?"
  121. url2 = url.replace(SPLIT_CHAR,SPLIT_CODE).replace(":",COL_CODE).replace(" ",SPACE_CODE)
  122. url2 = PROXY_URL + url2
  123. if headers:
  124. headers2 = []
  125. for h in headers:
  126. headers2.append("%s=%s"%(h,headers[h].replace("=",EQ_CODE).replace(SPLIT_CHAR,SPLIT_CODE).replace(" ",SPACE_CODE)))
  127. headers2 = SPLIT_CHAR.join(headers2)
  128. url2 = url2+SPLIT_CHAR+headers2
  129. return url2
  130. def streamproxy_decode(urlp):
  131. import urlparse
  132. path = urlp.replace(re.search("http://[^/]+",urlp).group(0),"")
  133. p = path.split(SPLIT_CHAR)
  134. url = urllib.unquote(p[0][1:])
  135. #headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  136. headers={}
  137. if len(p)>1:
  138. for h in p[1:]:
  139. #h = urllib.unquote()
  140. headers[h.split("=")[0]]=urllib.unquote(h.split("=")[1])
  141. return url,headers
  142. class Captions(object):
  143. def __init__(self,uri):
  144. self.subs = []
  145. self.styles = {}
  146. if uri.startswith("http"):
  147. r = requests.get(uri)
  148. if r.status_code == 200:
  149. self.loads(r.content)
  150. def loads(self,s):
  151. if "WEBVTT" in s[:s.find("\n")]: # vtt captions
  152. self.load_vtt(s)
  153. elif "<?xml" in s[:s.find("\n")]:
  154. self.load_ttaf(s)
  155. else:
  156. self.load_vtt(s) # TODO
  157. def load_ttaf(self,s):
  158. for r2 in re.findall("<style .+?/>", s):
  159. st = {}
  160. for a in re.findall(r'(\w+)="([^ "]+)"', r2):
  161. st[a[0]] = a[1]
  162. if a[0] == "id":
  163. sid = a[1]
  164. self.styles[sid] = st
  165. for r2 in re.findall("<p .+?</p>", s):
  166. sub = {}
  167. sub["begin"] = str2sec(re.search('begin="([^"]+)"', r2).group(1)) if re.search('begin="([^"]+)"', r2) else -1
  168. sub["end"] = str2sec(re.search('end="([^"]+)"', r2).group(1)) if re.search('end="([^"]+)"', r2) else -1
  169. sub["style"] = re.search('style="([^"]+)"', r2).group(1) if re.search('style="([^"]+)"', r2) else None
  170. sub["text"] = re.search("<p[^>]+>(.+)</p>", r2).group(1).replace("\n","")
  171. sub["text"] = re.sub("<br\s*?/>","\n",sub["text"])
  172. sub["text"] = re.sub("<.+?>"," ",sub["text"])
  173. self.subs.append(sub)
  174. pass
  175. def load_vtt(self,s):
  176. f = StringIO.StringIO(s)
  177. while True:
  178. line = f.readline()
  179. if not line:
  180. break
  181. m = re.search(r"([\d\.\,:]+)\s*-->\s*([\d\.\,\:]+)",line)
  182. if m:
  183. sub = {}
  184. sub["begin"] = str2sec(m.group(1))
  185. sub["end"] = str2sec(m.group(2))
  186. sub["style"] = None
  187. sub["text"] = []
  188. line = f.readline()
  189. while line.strip():
  190. txt = line.strip()
  191. if isinstance(txt,unicode):
  192. txt = txt.encode("utf8")
  193. sub["text"].append(txt)
  194. line = f.readline()
  195. sub["text"] = "\n".join(sub["text"])
  196. self.subs.append(sub)
  197. else:
  198. continue
  199. pass
  200. def str2sec(r):
  201. # Convert str time to miliseconds
  202. r= r.replace(",",".")
  203. m = re.search(r"(\d+\:)*(\d+)\:(\d+\.\d+)", r)
  204. if m:
  205. sec = int(m.group(1)[:-1])*60*60*1000 if m.group(1) else 0
  206. sec += int(m.group(2))*60*1000 + int(float(m.group(3))*1000)
  207. return sec
  208. else:
  209. return -1
  210. #c = Captions("http://195.13.216.2/mobile-vod/mp4:lb_barbecue_fr_lq.mp4/lb_barbecue_lv.vtt")
  211. #c = Captions("http://www.bbc.co.uk/iplayer/subtitles/ng/modav/bUnknown-0edd6227-0f38-411c-8d46-fa033c4c61c1_b05ql1s3_1479853893356.xml")
  212. #url = "http://195.13.216.2/mobile-vod/mp4:ac_now_you_see_me_2_en_lq.mp4/ac_now_you_see_me_2_lv.vtt"
  213. #c = Captions(url)
  214. #pass
  215. def ttaf2srt(s):
  216. out = u""
  217. i = 0
  218. for p,txt in re.findall("<p ([^>]+)>(.+?)</p>", s, re.DOTALL):
  219. i +=1
  220. begin = re.search('begin="(.+?)"',p).group(1)
  221. begin = begin.replace(".",",")
  222. end = re.search('end="(.+?)"',p).group(1)
  223. end = end.replace(".",",")
  224. txt2 = re.sub("<br */>","\n",txt)
  225. out += "%s\n%s --> %s\n%s\n\n"%(i,begin,end,txt2)
  226. return out
  227. def item():
  228. stream0 = {'name': '', 'url': '', 'quality': '???', 'surl': '', 'subs': '', 'headers': {},"desc":"","img":"","lang":"","type":"","order":0}
  229. return stream0
  230. class _StringCookieJar(cookielib.LWPCookieJar):
  231. def __init__(self, string=None, filename=None, delayload=False, policy=None):
  232. cookielib.LWPCookieJar.__init__(self, filename, delayload, policy)
  233. if string and len(string) > 0:
  234. self._cookies = pickle.loads(str(string))
  235. def dump(self):
  236. return pickle.dumps(self._cookies)
  237. def init_urllib(cache=None):
  238. """
  239. Initializes urllib cookie handler
  240. """
  241. global _cookie_jar
  242. data = None
  243. if cache is not None:
  244. data = cache.get(CACHE_COOKIES)
  245. _cookie_jar = _StringCookieJar(data)
  246. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(_cookie_jar))
  247. urllib2.install_opener(opener)
  248. def cache_cookies(cache):
  249. """
  250. Saves cookies to cache
  251. """
  252. global _cookie_jar
  253. if _cookie_jar:
  254. cache.set(CACHE_COOKIES, _cookie_jar.dump())
  255. def request0(url, headers={}):
  256. debug('request: %s' % url)
  257. req = urllib2.Request(url, headers=headers)
  258. req.add_header('User-Agent', UA)
  259. try:
  260. response = urllib2.urlopen(req)
  261. data = response.read()
  262. response.close()
  263. except urllib2.HTTPError, error:
  264. data = error.read()
  265. debug('len(data) %s' % len(data))
  266. return data
  267. def request(url, headers={}):
  268. debug('request: %s' % url)
  269. #req = urllib2.Request(url, headers=headers)
  270. #req.add_header('User-Agent', UA)
  271. if 'User-Agent' not in headers:
  272. headers['User-Agent']= UA
  273. try:
  274. r = requests.get(url, headers=headers)
  275. data = r.content
  276. except:
  277. data = r.content
  278. debug('len(data) %s' % len(data))
  279. return data
  280. def post(url, data, headers={}):
  281. postdata = urllib.urlencode(data)
  282. #req = urllib2.Request(url, postdata, headers)
  283. #req.add_header('User-Agent', UA)
  284. import requests
  285. if 'User-Agent' not in headers:
  286. headers['User-Agent']= UA
  287. try:
  288. r = requests.post(url, data=postdata,headers=headers)
  289. data = r.content
  290. except urllib2.HTTPError, error:
  291. data = r.content
  292. return data
  293. def post0(url, data, headers={}):
  294. postdata = urllib.urlencode(data)
  295. req = urllib2.Request(url, postdata, headers)
  296. req.add_header('User-Agent', UA)
  297. try:
  298. response = urllib2.urlopen(req)
  299. data = response.read()
  300. response.close()
  301. except urllib2.HTTPError, error:
  302. data = error.read()
  303. return data
  304. def post_json(url, data, headers={}):
  305. postdata = json.dumps(data)
  306. headers['Content-Type'] = 'application/json'
  307. req = urllib2.Request(url, postdata, headers)
  308. req.add_header('User-Agent', UA)
  309. response = urllib2.urlopen(req)
  310. data = response.read()
  311. response.close()
  312. return data
  313. #def run_parallel_in_threads(target, args_list):
  314. #result = Queue.Queue()
  315. ## wrapper to collect return value in a Queue
  316. #def task_wrapper(*args):
  317. #result.put(target(*args))
  318. #threads = [threading.Thread(target=task_wrapper, args=args) for args in args_list]
  319. #for t in threads:
  320. #t.start()
  321. #for t in threads:
  322. #t.join()
  323. #return result
  324. def substr(data, start, end):
  325. i1 = data.find(start)
  326. i2 = data.find(end, i1)
  327. return data[i1:i2]
  328. def save_to_file(url, file):
  329. try:
  330. return save_data_to_file(request(url), file)
  331. except:
  332. traceback.print_exc()
  333. def save_data_to_file(data, file):
  334. try:
  335. f = open(file, 'wb')
  336. f.write(data)
  337. f.close()
  338. info('File %s saved' % file)
  339. return True
  340. except:
  341. traceback.print_exc()
  342. def read_file(file):
  343. if not os.path.exists(file):
  344. return ''
  345. f = open(file, 'r')
  346. data = f.read()
  347. f.close()
  348. return data
  349. def _substitute_entity(match):
  350. ent = match.group(3)
  351. if match.group(1) == '#':
  352. # decoding by number
  353. if match.group(2) == '':
  354. # number is in decimal
  355. return unichr(int(ent))
  356. elif match.group(2) == 'x':
  357. # number is in hex
  358. return unichr(int('0x' + ent, 16))
  359. else:
  360. # they were using a name
  361. cp = n2cp.get(ent)
  362. if cp:
  363. return unichr(cp)
  364. else:
  365. return match.group()
  366. def decode_html(data):
  367. if not type(data) == str:
  368. return data
  369. try:
  370. if not type(data) == unicode:
  371. data = unicode(data, 'utf-8', errors='ignore')
  372. entity_re = re.compile(r'&(#?)(x?)(\w+);')
  373. return entity_re.subn(_substitute_entity, data)[0]
  374. except:
  375. traceback.print_exc()
  376. print[data]
  377. return data
  378. def unescape(s0):
  379. #s2 = re.sub("&#\w+;",HTMLParser.HTMLParser().unescape("\1"),s)
  380. s0 = s0.replace("&amp;","&")
  381. for s in re.findall("&#\w+;",s0):
  382. s2 = HTMLParser.HTMLParser().unescape(s)
  383. if isinstance(s0,str):
  384. s2 = s2.encode("utf8")
  385. s0 = s0.replace(s,s2)
  386. pass
  387. return s0
  388. def debug(text):
  389. if LOG > 1:
  390. print('[DEBUG] ' + str([text]))
  391. def info(text):
  392. if LOG > 0:
  393. print('[INFO] ' + str([text]))
  394. def error(text):
  395. print('[ERROR] ' + str([text]))
  396. _diacritic_replace = {u'\u00f3': 'o',
  397. u'\u0213': '-',
  398. u'\u00e1': 'a',
  399. u'\u010d': 'c',
  400. u'\u010c': 'C',
  401. u'\u010f': 'd',
  402. u'\u010e': 'D',
  403. u'\u00e9': 'e',
  404. u'\u011b': 'e',
  405. u'\u00ed': 'i',
  406. u'\u0148': 'n',
  407. u'\u0159': 'r',
  408. u'\u0161': 's',
  409. u'\u0165': 't',
  410. u'\u016f': 'u',
  411. u'\u00fd': 'y',
  412. u'\u017e': 'z',
  413. u'\xed': 'i',
  414. u'\xe9': 'e',
  415. u'\xe1': 'a',
  416. }
  417. def replace_diacritic(string):
  418. ret = []
  419. for char in string:
  420. if char in _diacritic_replace:
  421. ret.append(_diacritic_replace[char])
  422. else:
  423. ret.append(char)
  424. return ''.join(ret)
  425. def params(url=None):
  426. if not url:
  427. url = sys.argv[2]
  428. param = {}
  429. paramstring = url
  430. if len(paramstring) >= 2:
  431. params = url
  432. cleanedparams = params.replace('?', '')
  433. if (params[len(params) - 1] == '/'):
  434. params = params[0:len(params) - 2]
  435. pairsofparams = cleanedparams.split('&')
  436. param = {}
  437. for i in range(len(pairsofparams)):
  438. splitparams = {}
  439. splitparams = pairsofparams[i].split('=')
  440. if (len(splitparams)) == 2:
  441. param[splitparams[0]] = splitparams[1]
  442. for p in param.keys():
  443. param[p] = param[p].decode('hex')
  444. return param
  445. def int_to_base(number, base):
  446. digs = string.digits + string.letters
  447. if number < 0:
  448. sign = -1
  449. elif number == 0:
  450. return digs[0]
  451. else:
  452. sign = 1
  453. number *= sign
  454. digits = []
  455. while number:
  456. digits.append(digs[number % base])
  457. number /= base
  458. if sign < 0:
  459. digits.append('-')
  460. digits.reverse()
  461. return ''.join(digits)
  462. def extract_jwplayer_setup(data):
  463. """
  464. Extracts jwplayer setup configuration and returns it as a dictionary.
  465. :param data: A string to extract the setup from
  466. :return: A dictionary containing the setup configuration
  467. """
  468. data = re.search(r'<script.+?}\(\'(.+)\',\d+,\d+,\'([\w\|]+)\'.*</script>', data, re.I | re.S)
  469. if data:
  470. replacements = data.group(2).split('|')
  471. data = data.group(1)
  472. for i in reversed(range(len(replacements))):
  473. if len(replacements[i]) > 0:
  474. data = re.sub(r'\b%s\b' % int_to_base(i, 36), replacements[i], data)
  475. data = re.search(r'\.setup\(([^\)]+?)\);', data)
  476. if data:
  477. return json.loads(data.group(1).decode('string_escape'))
  478. #return demjson.decode(data.group(1).decode('string_escape')) ### III
  479. return None
  480. #def parse_html(url):
  481. # return BeautifulSoup(request(url), 'html5lib', from_encoding='utf-8')
  482. if __name__ == "__main__":
  483. s = 'B\xc4\x93thovena D\xc4\x81rgumu Taka (2014)/Beethoven&#x27;s Treasure [LV]'
  484. #s = s.decode("utf8")
  485. #s=unescape(s)
  486. #url = "http://localhost:88/https://walterebert.com/playground/video/hls/ts/480x270.m3u8?token=xxxx~User-Agent=Enigma2~Cookie=xxxxx"
  487. url = "http://hyt4d6.vkcache.com/secip/0/UMQ3q2gNjTlOPnEVm3iTiA/ODAuMjMyLjI0MC42/1479610800/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8http://hyt4d6.vkcache.com/secip/0/Y-ZA1qRm8toplc0dN_L6_w/ODAuMjMyLjI0MC42/1479654000/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8"
  488. headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  489. urlp = streamproxy_encode(url,headers)
  490. print urlp
  491. player(urlp)
  492. pass