diff options
author | Jordan <me@jordan.im> | 2020-04-19 15:05:32 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2020-04-19 15:05:32 -0700 |
commit | c58cc26bab83c6c6a1192c637edfac6a7ede9276 (patch) | |
tree | f4c17f799b5ba9facf40d144d6abe8ffed8a40c6 | |
parent | a06816b9c33d0d7d0ef35211c16b5592e0801433 (diff) | |
download | roka-c58cc26bab83c6c6a1192c637edfac6a7ede9276.tar.gz roka-c58cc26bab83c6c6a1192c637edfac6a7ede9276.zip |
remove XML-invalid unicode chars, escape book title
-rwxr-xr-x | run.py | 32 |
1 files changed, 31 insertions, 1 deletions
@@ -4,6 +4,7 @@ import json import mimetypes import os import re +import sys import xml.etree.cElementTree as ET from collections import OrderedDict from operator import getitem @@ -49,6 +50,35 @@ def escape(s): s = s.replace('>', '>') s = s.replace('\'', '"') + # https://stackoverflow.com/a/22273639 + illegal_unichrs = [ + (0x00, 0x08), + (0x0B, 0x0C), + (0x0E, 0x1F), + (0x7F, 0x84), + (0x86, 0x9F), + (0xFDD0, 0xFDDF), + (0xFFFE, 0xFFFF) + ] + + if sys.maxunicode >= 0x10000: + illegal_unichrs.extend( + [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), + (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), + (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), + (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), + (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), + (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), + (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), + (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)] + ) + + illegal_ranges = ["%s-%s" % (chr(low), chr(high)) + for (low, high) in illegal_unichrs] + illegal_xml_chars_RE = re.compile(u'[%s]' % u''.join(illegal_ranges)) + + s = illegal_xml_chars_RE.sub('', s) + return s @app.route('/') @@ -95,7 +125,7 @@ def list_books(): channel = ET.SubElement(rss, 'channel') book_title = ET.SubElement(channel, 'title') - book_title.text = books[a]['title'] + book_title.text = escape(books[a]['title']) # sort by track number, alphanumerically if track is absent track_list = [] # account for duplicates |