Enigma2 plugin to to play various online streams (mostly Latvian).

util.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. # -*- coding: UTF-8 -*-
  2. # /*
  3. # * Copyright (C) 2011 Libor Zoubek,ivars777
  4. # *
  5. # *
  6. # * This Program is free software; you can redistribute it and/or modify
  7. # * it under the terms of the GNU General Public License as published by
  8. # * the Free Software Foundation; either version 2, or (at your option)
  9. # * any later version.
  10. # *
  11. # * This Program is distributed in the hope that it will be useful,
  12. # * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # * GNU General Public License for more details.
  15. # *
  16. # * You should have received a copy of the GNU General Public License
  17. # * along with this program; see the file COPYING. If not, write to
  18. # * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  19. # * http://www.gnu.org/copyleft/gpl.html
  20. # *
  21. # */
  22. import os
  23. import re
  24. import sys
  25. import urllib
  26. import urllib2
  27. import traceback
  28. import cookielib
  29. import requests
  30. from htmlentitydefs import name2codepoint as n2cp
  31. import HTMLParser
  32. import StringIO
  33. #import threading
  34. #import Queue
  35. import pickle
  36. import string
  37. import simplejson as json
  38. #from demjson import demjson
  39. #import demjson
  40. import json
  41. #from bs4 import BeautifulSoup
  42. UA = 'Mozilla/6.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.5) Gecko/2008092417 Firefox/3.0.3'
  43. LOG = 2
  44. _cookie_jar = None
  45. CACHE_COOKIES = 'cookies'
  46. def play_video(streams):
  47. if len(streams)>1:
  48. for i,s in enumerate(streams):
  49. print "%s: [%s,%s,%s] %s"%(i,s["quality"],s["lang"].encode("utf8"),s["type"],s["name"])
  50. a = raw_input("Select stram to play: ")
  51. try:
  52. n = int(a)
  53. except:
  54. n = 0
  55. if n>=len(streams):
  56. stream = streams[-1]
  57. else:
  58. stream = streams[n]
  59. else:
  60. stream = streams[0]
  61. title = stream["name"]
  62. url = stream["url"]
  63. suburl = ""
  64. print url
  65. if "subs" in stream and stream["subs"]:
  66. suburl = stream["subs"][0]["url"]
  67. print "\n**Download subtitles %s - %s"%(title,suburl)
  68. subs = urllib2.urlopen(suburl).read()
  69. if subs:
  70. fname0 = re.sub("[/\n\r\t,]","_",title)
  71. subext = ".srt"
  72. subfile = os.path.join("",fname0+subext)
  73. if ".xml" in suburl:
  74. subs = ttaf2srt(subs)
  75. with open(subfile,"w") as f:
  76. f.write(subs)
  77. else:
  78. print "\n Error downloading subtitle %s"%suburl
  79. return player(url,stream["name"],suburl)
  80. def player(url,title="",suburl=""):
  81. from subprocess import call
  82. print "\n**Play stream %s\n%s"%(title,url.encode("utf8"))
  83. cmd1 = [r"c:\Program Files\VideoLAN\VLC\vlc.exe",url,
  84. "--meta-title",title.decode("utf8").encode(sys.getfilesystemencoding()),
  85. "--http-user-agent","Enigma2"
  86. ]
  87. # gst-launch-1.0 -v souphttpsrc ssl-strict=false proxy=127.0.0.1:8888 extra-headers="Origin:adadadasd" location="http://bitdash-a.akamaihd.net/content/sintel/sintel.mpd" ! decodebin! autovideosink
  88. cmd2 = [
  89. r"C:\gstreamer\1.0\x86_64\bin\gst-launch-1.0","-v",
  90. "playbin", 'uri="%s"'%url,
  91. #"souphttpsrc", "ssl-strict=false",
  92. #"proxy=127.0.0.1:8888",
  93. #'location="%s"'%url,
  94. #'!decodebin!autovideosink'
  95. ]
  96. cmd = cmd1 if url.startswith("https") else cmd2
  97. ret = call(cmd)
  98. #if ret:
  99. #a = raw_input("*** Error, continue")
  100. return
  101. SPLIT_CHAR = "~"
  102. SPLIT_CODE = "%7E"
  103. EQ_CODE = "%3D"
  104. COL_CODE = "%3A"
  105. PROXY_URL = "http://localhost:88/"
  106. def streamproxy_encode(url,headers=[]):
  107. if not "?" in url:
  108. url = url+"?"
  109. url2 = url.replace(SPLIT_CHAR,SPLIT_CODE).replace(":",COL_CODE)
  110. url2 = PROXY_URL + url2
  111. if headers:
  112. headers2 = []
  113. for h in headers:
  114. headers2.append("%s=%s"%(h,headers[h].replace("=",EQ_CODE).replace(SPLIT_CHAR,SPLIT_CODE)))
  115. headers2 = SPLIT_CHAR.join(headers2)
  116. url2 = url2+SPLIT_CHAR+headers2
  117. return url2
  118. def streamproxy_decode(urlp):
  119. import urlparse
  120. path = urlp.replace(re.search("http://[^/]+",urlp).group(0),"")
  121. p = path.split(SPLIT_CHAR)
  122. url = urllib.unquote(p[0][1:])
  123. #headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  124. headers={}
  125. if len(p)>1:
  126. for h in p[1:]:
  127. headers[h.split("=")[0]]=urllib.unquote(h.split("=")[1])
  128. return url,headers
  129. class Captions(object):
  130. def __init__(self,uri):
  131. self.subs = []
  132. self.styles = {}
  133. if uri.startswith("http"):
  134. r = requests.get(uri)
  135. if r.status_code == 200:
  136. self.loads(r.content)
  137. def loads(self,s):
  138. if "WEBVTT" in s[:s.find("\n")]: # vtt captions
  139. self.load_vtt(s)
  140. elif "<?xml" in s[:s.find("\n")]:
  141. self.load_ttaf(s)
  142. else:
  143. return
  144. def load_ttaf(self,s):
  145. for r2 in re.findall("<style .+?/>", s):
  146. st = {}
  147. for a in re.findall(r'(\w+)="([^ "]+)"', r2):
  148. st[a[0]] = a[1]
  149. if a[0] == "id":
  150. sid = a[1]
  151. self.styles[sid] = st
  152. for r2 in re.findall("<p .+?</p>", s):
  153. sub = {}
  154. sub["begin"] = str2sec(re.search('begin="([^"]+)"', r2).group(1)) if re.search('begin="([^"]+)"', r2) else -1
  155. sub["end"] = str2sec(re.search('end="([^"]+)"', r2).group(1)) if re.search('end="([^"]+)"', r2) else -1
  156. sub["style"] = re.search('style="([^"]+)"', r2).group(1) if re.search('style="([^"]+)"', r2) else None
  157. sub["text"] = re.search("<p[^>]+>(.+)</p>", r2).group(1).replace("\n","")
  158. sub["text"] = re.sub("<br\s*?/>","\n",sub["text"])
  159. sub["text"] = re.sub("<.+?>"," ",sub["text"])
  160. self.subs.append(sub)
  161. pass
  162. def load_vtt(self,s):
  163. f = StringIO.StringIO(s)
  164. while True:
  165. line = f.readline()
  166. if not line:
  167. break
  168. m = re.search(r"([\d\.\,:]+)\s*-->\s*([\d\.\,\:]+)",line)
  169. if m:
  170. sub = {}
  171. sub["begin"] = str2sec(m.group(1))
  172. sub["end"] = str2sec(m.group(2))
  173. sub["style"] = None
  174. sub["text"] = []
  175. line = f.readline()
  176. while line.strip():
  177. txt = line.strip()
  178. if isinstance(txt,unicode):
  179. txt = txt.encode("utf8")
  180. sub["text"].append(txt)
  181. line = f.readline()
  182. sub["text"] = "\n".join(sub["text"])
  183. self.subs.append(sub)
  184. else:
  185. continue
  186. pass
  187. def str2sec(r):
  188. # Convert str time to miliseconds
  189. r= r.replace(",",".")
  190. m = re.search(r"(\d+\:)*(\d+)\:(\d+\.\d+)", r)
  191. if m:
  192. sec = int(m.group(1)[:-1])*60*60*1000 if m.group(1) else 0
  193. sec += int(m.group(2))*60*1000 + int(float(m.group(3))*1000)
  194. return sec
  195. else:
  196. return -1
  197. #c = Captions("http://195.13.216.2/mobile-vod/mp4:lb_barbecue_fr_lq.mp4/lb_barbecue_lv.vtt")
  198. #c = Captions("http://www.bbc.co.uk/iplayer/subtitles/ng/modav/bUnknown-0edd6227-0f38-411c-8d46-fa033c4c61c1_b05ql1s3_1479853893356.xml")
  199. #url = "http://195.13.216.2/mobile-vod/mp4:ac_now_you_see_me_2_en_lq.mp4/ac_now_you_see_me_2_lv.vtt"
  200. #c = Captions(url)
  201. #pass
  202. def ttaf2srt(s):
  203. out = u""
  204. i = 0
  205. for p,txt in re.findall("<p ([^>]+)>(.+?)</p>", s, re.DOTALL):
  206. i +=1
  207. begin = re.search('begin="(.+?)"',p).group(1)
  208. begin = begin.replace(".",",")
  209. end = re.search('end="(.+?)"',p).group(1)
  210. end = end.replace(".",",")
  211. txt2 = re.sub("<br */>","\n",txt)
  212. out += "%s\n%s --> %s\n%s\n\n"%(i,begin,end,txt2)
  213. return out
  214. def item():
  215. stream0 = {'name': '', 'url': '', 'quality': '???', 'surl': '', 'subs': '', 'headers': {},"desc":"","img":"","lang":"","type":"","order":0}
  216. return stream0
  217. class _StringCookieJar(cookielib.LWPCookieJar):
  218. def __init__(self, string=None, filename=None, delayload=False, policy=None):
  219. cookielib.LWPCookieJar.__init__(self, filename, delayload, policy)
  220. if string and len(string) > 0:
  221. self._cookies = pickle.loads(str(string))
  222. def dump(self):
  223. return pickle.dumps(self._cookies)
  224. def init_urllib(cache=None):
  225. """
  226. Initializes urllib cookie handler
  227. """
  228. global _cookie_jar
  229. data = None
  230. if cache is not None:
  231. data = cache.get(CACHE_COOKIES)
  232. _cookie_jar = _StringCookieJar(data)
  233. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(_cookie_jar))
  234. urllib2.install_opener(opener)
  235. def cache_cookies(cache):
  236. """
  237. Saves cookies to cache
  238. """
  239. global _cookie_jar
  240. if _cookie_jar:
  241. cache.set(CACHE_COOKIES, _cookie_jar.dump())
  242. def request0(url, headers={}):
  243. debug('request: %s' % url)
  244. req = urllib2.Request(url, headers=headers)
  245. req.add_header('User-Agent', UA)
  246. try:
  247. response = urllib2.urlopen(req)
  248. data = response.read()
  249. response.close()
  250. except urllib2.HTTPError, error:
  251. data = error.read()
  252. debug('len(data) %s' % len(data))
  253. return data
  254. def request(url, headers={}):
  255. debug('request: %s' % url)
  256. #req = urllib2.Request(url, headers=headers)
  257. #req.add_header('User-Agent', UA)
  258. if 'User-Agent' not in headers:
  259. headers['User-Agent']= UA
  260. try:
  261. r = requests.get(url, headers=headers)
  262. data = r.content
  263. except:
  264. data = r.content
  265. debug('len(data) %s' % len(data))
  266. return data
  267. def post(url, data, headers={}):
  268. postdata = urllib.urlencode(data)
  269. #req = urllib2.Request(url, postdata, headers)
  270. #req.add_header('User-Agent', UA)
  271. import requests
  272. if 'User-Agent' not in headers:
  273. headers['User-Agent']= UA
  274. try:
  275. r = requests.post(url, data=postdata,headers=headers)
  276. data = r.content
  277. except urllib2.HTTPError, error:
  278. data = r.content
  279. return data
  280. def post0(url, data, headers={}):
  281. postdata = urllib.urlencode(data)
  282. req = urllib2.Request(url, postdata, headers)
  283. req.add_header('User-Agent', UA)
  284. try:
  285. response = urllib2.urlopen(req)
  286. data = response.read()
  287. response.close()
  288. except urllib2.HTTPError, error:
  289. data = error.read()
  290. return data
  291. def post_json(url, data, headers={}):
  292. postdata = json.dumps(data)
  293. headers['Content-Type'] = 'application/json'
  294. req = urllib2.Request(url, postdata, headers)
  295. req.add_header('User-Agent', UA)
  296. response = urllib2.urlopen(req)
  297. data = response.read()
  298. response.close()
  299. return data
  300. #def run_parallel_in_threads(target, args_list):
  301. #result = Queue.Queue()
  302. ## wrapper to collect return value in a Queue
  303. #def task_wrapper(*args):
  304. #result.put(target(*args))
  305. #threads = [threading.Thread(target=task_wrapper, args=args) for args in args_list]
  306. #for t in threads:
  307. #t.start()
  308. #for t in threads:
  309. #t.join()
  310. #return result
  311. def substr(data, start, end):
  312. i1 = data.find(start)
  313. i2 = data.find(end, i1)
  314. return data[i1:i2]
  315. def save_to_file(url, file):
  316. try:
  317. return save_data_to_file(request(url), file)
  318. except:
  319. traceback.print_exc()
  320. def save_data_to_file(data, file):
  321. try:
  322. f = open(file, 'wb')
  323. f.write(data)
  324. f.close()
  325. info('File %s saved' % file)
  326. return True
  327. except:
  328. traceback.print_exc()
  329. def read_file(file):
  330. if not os.path.exists(file):
  331. return ''
  332. f = open(file, 'r')
  333. data = f.read()
  334. f.close()
  335. return data
  336. def _substitute_entity(match):
  337. ent = match.group(3)
  338. if match.group(1) == '#':
  339. # decoding by number
  340. if match.group(2) == '':
  341. # number is in decimal
  342. return unichr(int(ent))
  343. elif match.group(2) == 'x':
  344. # number is in hex
  345. return unichr(int('0x' + ent, 16))
  346. else:
  347. # they were using a name
  348. cp = n2cp.get(ent)
  349. if cp:
  350. return unichr(cp)
  351. else:
  352. return match.group()
  353. def decode_html(data):
  354. if not type(data) == str:
  355. return data
  356. try:
  357. if not type(data) == unicode:
  358. data = unicode(data, 'utf-8', errors='ignore')
  359. entity_re = re.compile(r'&(#?)(x?)(\w+);')
  360. return entity_re.subn(_substitute_entity, data)[0]
  361. except:
  362. traceback.print_exc()
  363. print[data]
  364. return data
  365. def unescape(s0):
  366. #s2 = re.sub("&#\w+;",HTMLParser.HTMLParser().unescape("\1"),s)
  367. s0 = s0.replace("&amp;","&")
  368. for s in re.findall("&#\w+;",s0):
  369. s2 = HTMLParser.HTMLParser().unescape(s)
  370. if isinstance(s0,str):
  371. s2 = s2.encode("utf8")
  372. s0 = s0.replace(s,s2)
  373. pass
  374. return s0
  375. def debug(text):
  376. if LOG > 1:
  377. print('[DEBUG] ' + str([text]))
  378. def info(text):
  379. if LOG > 0:
  380. print('[INFO] ' + str([text]))
  381. def error(text):
  382. print('[ERROR] ' + str([text]))
  383. _diacritic_replace = {u'\u00f3': 'o',
  384. u'\u0213': '-',
  385. u'\u00e1': 'a',
  386. u'\u010d': 'c',
  387. u'\u010c': 'C',
  388. u'\u010f': 'd',
  389. u'\u010e': 'D',
  390. u'\u00e9': 'e',
  391. u'\u011b': 'e',
  392. u'\u00ed': 'i',
  393. u'\u0148': 'n',
  394. u'\u0159': 'r',
  395. u'\u0161': 's',
  396. u'\u0165': 't',
  397. u'\u016f': 'u',
  398. u'\u00fd': 'y',
  399. u'\u017e': 'z',
  400. u'\xed': 'i',
  401. u'\xe9': 'e',
  402. u'\xe1': 'a',
  403. }
  404. def replace_diacritic(string):
  405. ret = []
  406. for char in string:
  407. if char in _diacritic_replace:
  408. ret.append(_diacritic_replace[char])
  409. else:
  410. ret.append(char)
  411. return ''.join(ret)
  412. def params(url=None):
  413. if not url:
  414. url = sys.argv[2]
  415. param = {}
  416. paramstring = url
  417. if len(paramstring) >= 2:
  418. params = url
  419. cleanedparams = params.replace('?', '')
  420. if (params[len(params) - 1] == '/'):
  421. params = params[0:len(params) - 2]
  422. pairsofparams = cleanedparams.split('&')
  423. param = {}
  424. for i in range(len(pairsofparams)):
  425. splitparams = {}
  426. splitparams = pairsofparams[i].split('=')
  427. if (len(splitparams)) == 2:
  428. param[splitparams[0]] = splitparams[1]
  429. for p in param.keys():
  430. param[p] = param[p].decode('hex')
  431. return param
  432. def int_to_base(number, base):
  433. digs = string.digits + string.letters
  434. if number < 0:
  435. sign = -1
  436. elif number == 0:
  437. return digs[0]
  438. else:
  439. sign = 1
  440. number *= sign
  441. digits = []
  442. while number:
  443. digits.append(digs[number % base])
  444. number /= base
  445. if sign < 0:
  446. digits.append('-')
  447. digits.reverse()
  448. return ''.join(digits)
  449. def extract_jwplayer_setup(data):
  450. """
  451. Extracts jwplayer setup configuration and returns it as a dictionary.
  452. :param data: A string to extract the setup from
  453. :return: A dictionary containing the setup configuration
  454. """
  455. data = re.search(r'<script.+?}\(\'(.+)\',\d+,\d+,\'([\w\|]+)\'.*</script>', data, re.I | re.S)
  456. if data:
  457. replacements = data.group(2).split('|')
  458. data = data.group(1)
  459. for i in reversed(range(len(replacements))):
  460. if len(replacements[i]) > 0:
  461. data = re.sub(r'\b%s\b' % int_to_base(i, 36), replacements[i], data)
  462. data = re.search(r'\.setup\(([^\)]+?)\);', data)
  463. if data:
  464. return json.loads(data.group(1).decode('string_escape'))
  465. #return demjson.decode(data.group(1).decode('string_escape')) ### III
  466. return None
  467. #def parse_html(url):
  468. # return BeautifulSoup(request(url), 'html5lib', from_encoding='utf-8')
  469. if __name__ == "__main__":
  470. s = 'B\xc4\x93thovena D\xc4\x81rgumu Taka (2014)/Beethoven&#x27;s Treasure [LV]'
  471. #s = s.decode("utf8")
  472. #s=unescape(s)
  473. #url = "http://localhost:88/https://walterebert.com/playground/video/hls/ts/480x270.m3u8?token=xxxx~User-Agent=Enigma2~Cookie=xxxxx"
  474. url = "http://hyt4d6.vkcache.com/secip/0/UMQ3q2gNjTlOPnEVm3iTiA/ODAuMjMyLjI0MC42/1479610800/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8http://hyt4d6.vkcache.com/secip/0/Y-ZA1qRm8toplc0dN_L6_w/ODAuMjMyLjI0MC42/1479654000/hls-vod-s3/flv/api/files/videos/2015/09/11/144197748923a22.mp4.m3u8"
  475. headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/47.0.2526.70 Mobile/13C71 Safari/601.1.46"}
  476. urlp = streamproxy_encode(url,headers)
  477. print urlp
  478. player(urlp)
  479. pass