#!/usr/bin/env python3 """ Usage: ttaf2srt subtitlefilettafinput.xml > output.srt From https://github.com/haraldF/ttaf2srt edited for 'SWR - PƤlzisch im Abgang' subtitles www.swr.de/paelzisch-im-abgang/ and 'Tatort' subtitles. """ """ From https://github.com/haraldF/ttaf2srt ttaf2srt Simple python script to convert ttaf subtitles to srt subtitles. Note - only tested on German 'Tatort' subtitles. Note2 - if using vlc or mplayer, make sure to specify 'utf8' as encoding, otherwise, special characters will not render correctly. """ import sys from xml.dom import minidom def dumpText(item): for child in item.childNodes: if child.nodeType == child.TEXT_NODE: print(child.nodeValue, end="") elif child.nodeType == child.ELEMENT_NODE: if child.nodeName == "tt:br": print() elif child.nodeName == "tt:span": print("", end="") dumpText(child) print("", end="") else: print("Unknown Node: " + child.nodeName, file=sys.stderr) def dumpHeader(item, subCount): print(subCount) begin = item.getAttribute("begin") end = item.getAttribute("end") # ### this is a silly hack - for some reason, my ttaf files all start at hour 10? Resetting # the hour makes it work again begin = '0' + begin[1:] end = '0' + end[1:] print(begin + " --> " + end) def parseStyles(styles): result = {} for style in styles: result[style.getAttribute('xml:id')] = style.getAttribute('tts:color') return result def ttaf2srt(fname): with open(fname) as f: xmldoc = f.read().replace('\n', ' ').replace('\r', '') xmldoc = minidom.parseString(xmldoc) header = xmldoc.getElementsByTagName('tt:head') if len(header): styling = header[0].getElementsByTagName('tt:styling') if len(styling): styles = parseStyles(styling[0].getElementsByTagName('tt:style')) body = xmldoc.getElementsByTagName('tt:body') itemlist = body[0].getElementsByTagName('tt:p') subCount = 0 for item in itemlist: if item.hasAttribute('xml:id'): dumpHeader(item, subCount) subCount += 1 color = styles[item.getAttribute("style")] if color: print("", end="") dumpText(item) if color: print("", end="") print("\n")