Enigma2 plugin to to play various online streams (mostly Latvian).

parser.py 7.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # coding: utf-8
  2. # Copyright 2014 Globo.com Player authors. All rights reserved.
  3. # Use of this source code is governed by a MIT License
  4. # license that can be found in the LICENSE file.
  5. import arrow
  6. import datetime
  7. import itertools
  8. import re
  9. from m3u8 import protocol
  10. '''
  11. http://tools.ietf.org/html/draft-pantos-http-live-streaming-08#section-3.2
  12. http://stackoverflow.com/questions/2785755/how-to-split-but-ignore-separators-in-quoted-strings-in-python
  13. '''
  14. ATTRIBUTELISTPATTERN = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
  15. def cast_date_time(value):
  16. return arrow.get(value).datetime
  17. def parse(content):
  18. '''
  19. Given a M3U8 playlist content returns a dictionary with all data found
  20. '''
  21. data = {
  22. 'is_variant': False,
  23. 'is_endlist': False,
  24. 'is_i_frames_only': False,
  25. 'playlist_type': None,
  26. 'playlists': [],
  27. 'iframe_playlists': [],
  28. 'segments': [],
  29. 'media': [],
  30. }
  31. state = {
  32. 'expect_segment': False,
  33. 'expect_playlist': False,
  34. }
  35. for line in string_to_lines(content):
  36. line = line.strip()
  37. if line.startswith(protocol.ext_x_byterange):
  38. _parse_byterange(line, state)
  39. state['expect_segment'] = True
  40. elif state['expect_segment']:
  41. _parse_ts_chunk(line, data, state)
  42. state['expect_segment'] = False
  43. elif state['expect_playlist']:
  44. _parse_variant_playlist(line, data, state)
  45. state['expect_playlist'] = False
  46. elif line.startswith(protocol.ext_x_targetduration):
  47. _parse_simple_parameter(line, data, float)
  48. elif line.startswith(protocol.ext_x_media_sequence):
  49. _parse_simple_parameter(line, data, int)
  50. elif line.startswith(protocol.ext_x_program_date_time):
  51. _, program_date_time = _parse_simple_parameter_raw_value(line, cast_date_time)
  52. if not data.get('program_date_time'):
  53. data['program_date_time'] = program_date_time
  54. state['current_program_date_time'] = program_date_time
  55. elif line.startswith(protocol.ext_x_discontinuity):
  56. state['discontinuity'] = True
  57. elif line.startswith(protocol.ext_x_version):
  58. _parse_simple_parameter(line, data)
  59. elif line.startswith(protocol.ext_x_allow_cache):
  60. _parse_simple_parameter(line, data)
  61. elif line.startswith(protocol.ext_x_key):
  62. state['current_key'] = _parse_key(line)
  63. data['key'] = data.get('key', state['current_key'])
  64. elif line.startswith(protocol.extinf):
  65. _parse_extinf(line, data, state)
  66. state['expect_segment'] = True
  67. elif line.startswith(protocol.ext_x_stream_inf):
  68. state['expect_playlist'] = True
  69. _parse_stream_inf(line, data, state)
  70. elif line.startswith(protocol.ext_x_i_frame_stream_inf):
  71. _parse_i_frame_stream_inf(line, data)
  72. elif line.startswith(protocol.ext_x_media):
  73. _parse_media(line, data, state)
  74. elif line.startswith(protocol.ext_x_playlist_type):
  75. _parse_simple_parameter(line, data)
  76. elif line.startswith(protocol.ext_i_frames_only):
  77. data['is_i_frames_only'] = True
  78. elif line.startswith(protocol.ext_x_endlist):
  79. data['is_endlist'] = True
  80. return data
  81. def _parse_key(line):
  82. params = ATTRIBUTELISTPATTERN.split(line.replace(protocol.ext_x_key + ':', ''))[1::2]
  83. key = {}
  84. for param in params:
  85. name, value = param.split('=', 1)
  86. key[normalize_attribute(name)] = remove_quotes(value)
  87. return key
  88. def _parse_extinf(line, data, state):
  89. duration, title = line.replace(protocol.extinf + ':', '').split(',')
  90. state['segment'] = {'duration': float(duration), 'title': remove_quotes(title)}
  91. def _parse_ts_chunk(line, data, state):
  92. segment = state.pop('segment')
  93. if state.get('current_program_date_time'):
  94. segment['program_date_time'] = state['current_program_date_time']
  95. state['current_program_date_time'] += datetime.timedelta(seconds=segment['duration'])
  96. segment['uri'] = line
  97. segment['discontinuity'] = state.pop('discontinuity', False)
  98. if state.get('current_key'):
  99. segment['key'] = state['current_key']
  100. data['segments'].append(segment)
  101. def _parse_attribute_list(prefix, line, atribute_parser):
  102. params = ATTRIBUTELISTPATTERN.split(line.replace(prefix + ':', ''))[1::2]
  103. attributes = {}
  104. for param in params:
  105. name, value = param.split('=', 1)
  106. name = normalize_attribute(name)
  107. if name in atribute_parser:
  108. value = atribute_parser[name](value)
  109. attributes[name] = value
  110. return attributes
  111. def _parse_stream_inf(line, data, state):
  112. data['is_variant'] = True
  113. atribute_parser = remove_quotes_parser('codecs', 'audio', 'video', 'subtitles')
  114. atribute_parser["program_id"] = int
  115. atribute_parser["bandwidth"] = int
  116. state['stream_info'] = _parse_attribute_list(protocol.ext_x_stream_inf, line, atribute_parser)
  117. def _parse_i_frame_stream_inf(line, data):
  118. atribute_parser = remove_quotes_parser('codecs', 'uri')
  119. atribute_parser["program_id"] = int
  120. atribute_parser["bandwidth"] = int
  121. iframe_stream_info = _parse_attribute_list(protocol.ext_x_i_frame_stream_inf, line, atribute_parser)
  122. iframe_playlist = {'uri': iframe_stream_info.pop('uri'),
  123. 'iframe_stream_info': iframe_stream_info}
  124. data['iframe_playlists'].append(iframe_playlist)
  125. def _parse_media(line, data, state):
  126. quoted = remove_quotes_parser('uri', 'group_id', 'language', 'name', 'characteristics')
  127. media = _parse_attribute_list(protocol.ext_x_media, line, quoted)
  128. data['media'].append(media)
  129. def _parse_variant_playlist(line, data, state):
  130. playlist = {'uri': line,
  131. 'stream_info': state.pop('stream_info')}
  132. data['playlists'].append(playlist)
  133. def _parse_byterange(line, state):
  134. state['segment']['byterange'] = line.replace(protocol.ext_x_byterange + ':', '')
  135. def _parse_simple_parameter_raw_value(line, cast_to=str, normalize=False):
  136. param, value = line.split(':', 1)
  137. param = normalize_attribute(param.replace('#EXT-X-', ''))
  138. if normalize:
  139. value = normalize_attribute(value)
  140. return param, cast_to(value)
  141. def _parse_and_set_simple_parameter_raw_value(line, data, cast_to=str, normalize=False):
  142. param, value = _parse_simple_parameter_raw_value(line, cast_to, normalize)
  143. data[param] = value
  144. return data[param]
  145. def _parse_simple_parameter(line, data, cast_to=str):
  146. return _parse_and_set_simple_parameter_raw_value(line, data, cast_to, True)
  147. def string_to_lines(string):
  148. return string.strip().replace('\r\n', '\n').split('\n')
  149. def remove_quotes_parser(*attrs):
  150. return dict(zip(attrs, itertools.repeat(remove_quotes)))
  151. def remove_quotes(string):
  152. '''
  153. Remove quotes from string.
  154. Ex.:
  155. "foo" -> foo
  156. 'foo' -> foo
  157. 'foo -> 'foo
  158. '''
  159. quotes = ('"', "'")
  160. if string and string[0] in quotes and string[-1] in quotes:
  161. return string[1:-1]
  162. return string
  163. def normalize_attribute(attribute):
  164. return attribute.replace('-', '_').lower().strip()
  165. def is_url(uri):
  166. return re.match(r'https?://', uri) is not None