#!/usr/bin/env python3
"""
Usage:
ttaf2srt subtitlefilettafinput.xml > output.srt
From https://github.com/haraldF/ttaf2srt
edited for 'SWR - PƤlzisch im Abgang' subtitles
www.swr.de/paelzisch-im-abgang/
and 'Tatort' subtitles.
"""
"""
From https://github.com/haraldF/ttaf2srt
ttaf2srt
Simple python script to convert ttaf subtitles to srt subtitles.
Note - only tested on German 'Tatort' subtitles.
Note2 - if using vlc or mplayer, make sure to specify 'utf8' as encoding, otherwise, special characters will not render correctly.
"""
import sys
from xml.dom import minidom
def dumpText(item):
for child in item.childNodes:
if child.nodeType == child.TEXT_NODE:
print(child.nodeValue, end="")
elif child.nodeType == child.ELEMENT_NODE:
if child.nodeName == "tt:br":
print()
elif child.nodeName == "tt:span":
print("", end="")
dumpText(child)
print("", end="")
else:
print("Unknown Node: " + child.nodeName, file=sys.stderr)
def dumpHeader(item, subCount):
print(subCount)
begin = item.getAttribute("begin")
end = item.getAttribute("end")
# ### this is a silly hack - for some reason, my ttaf files all start at hour 10? Resetting
# the hour makes it work again
begin = '0' + begin[1:]
end = '0' + end[1:]
print(begin + " --> " + end)
def parseStyles(styles):
result = {}
for style in styles:
result[style.getAttribute('xml:id')] = style.getAttribute('tts:color')
return result
def ttaf2srt(fname):
with open(fname) as f:
xmldoc = f.read().replace('\n', ' ').replace('\r', '')
xmldoc = minidom.parseString(xmldoc)
header = xmldoc.getElementsByTagName('tt:head')
if len(header):
styling = header[0].getElementsByTagName('tt:styling')
if len(styling):
styles = parseStyles(styling[0].getElementsByTagName('tt:style'))
body = xmldoc.getElementsByTagName('tt:body')
itemlist = body[0].getElementsByTagName('tt:p')
subCount = 0
for item in itemlist:
if item.hasAttribute('xml:id'):
dumpHeader(item, subCount)
subCount += 1
color = styles[item.getAttribute("style")]
if color:
print("", end="")
dumpText(item)
if color:
print("", end="")
print("\n")