12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- #!/usr/bin/env python3
-
- """
- Usage:
- ttaf2srt subtitlefilettafinput.xml > output.srt
-
- From https://github.com/haraldF/ttaf2srt
- edited for 'SWR - Pälzisch im Abgang' subtitles
- www.swr.de/paelzisch-im-abgang/
- and 'Tatort' subtitles.
- """
- """
- From https://github.com/haraldF/ttaf2srt
-
- ttaf2srt
-
- Simple python script to convert ttaf subtitles to srt subtitles.
- Note - only tested on German 'Tatort' subtitles.
- Note2 - if using vlc or mplayer, make sure to specify 'utf8' as encoding, otherwise, special characters will not render correctly.
- """
- import sys
- from xml.dom import minidom
-
- def dumpText(item):
- for child in item.childNodes:
- if child.nodeType == child.TEXT_NODE:
- print(child.nodeValue, end="")
- elif child.nodeType == child.ELEMENT_NODE:
- if child.nodeName == "tt:br":
- print()
- elif child.nodeName == "tt:span":
- print("<font color=\"" + styles[child.getAttribute("style")] + "\">", end="")
- dumpText(child)
- print("</font>", end="")
- else:
- print("Unknown Node: " + child.nodeName, file=sys.stderr)
-
- def dumpHeader(item, subCount):
- print(subCount)
- begin = item.getAttribute("begin")
- end = item.getAttribute("end")
- # ### this is a silly hack - for some reason, my ttaf files all start at hour 10? Resetting
- # the hour makes it work again
- begin = '0' + begin[1:]
- end = '0' + end[1:]
- print(begin + " --> " + end)
-
- def parseStyles(styles):
- result = {}
- for style in styles:
- result[style.getAttribute('xml:id')] = style.getAttribute('tts:color')
- return result
-
- def ttaf2srt(fname):
- with open(fname) as f:
- xmldoc = f.read().replace('\n', ' ').replace('\r', '')
- xmldoc = minidom.parseString(xmldoc)
-
- header = xmldoc.getElementsByTagName('tt:head')
- if len(header):
- styling = header[0].getElementsByTagName('tt:styling')
- if len(styling):
- styles = parseStyles(styling[0].getElementsByTagName('tt:style'))
-
- body = xmldoc.getElementsByTagName('tt:body')
- itemlist = body[0].getElementsByTagName('tt:p')
- subCount = 0
-
- for item in itemlist:
- if item.hasAttribute('xml:id'):
- dumpHeader(item, subCount)
- subCount += 1
- color = styles[item.getAttribute("style")]
- if color:
- print("<font color=\"" + color + "\">", end="")
- dumpText(item)
- if color:
- print("</font>", end="")
- print("\n")
|