From c58cc26bab83c6c6a1192c637edfac6a7ede9276 Mon Sep 17 00:00:00 2001 From: Jordan Date: Sun, 19 Apr 2020 15:05:32 -0700 Subject: remove XML-invalid unicode chars, escape book title --- run.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/run.py b/run.py index 9978f83..e8b2c56 100755 --- a/run.py +++ b/run.py @@ -4,6 +4,7 @@ import json import mimetypes import os import re +import sys import xml.etree.cElementTree as ET from collections import OrderedDict from operator import getitem @@ -49,6 +50,35 @@ def escape(s): s = s.replace('>', '>') s = s.replace('\'', '"') + # https://stackoverflow.com/a/22273639 + illegal_unichrs = [ + (0x00, 0x08), + (0x0B, 0x0C), + (0x0E, 0x1F), + (0x7F, 0x84), + (0x86, 0x9F), + (0xFDD0, 0xFDDF), + (0xFFFE, 0xFFFF) + ] + + if sys.maxunicode >= 0x10000: + illegal_unichrs.extend( + [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), + (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), + (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), + (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), + (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), + (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), + (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), + (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)] + ) + + illegal_ranges = ["%s-%s" % (chr(low), chr(high)) + for (low, high) in illegal_unichrs] + illegal_xml_chars_RE = re.compile(u'[%s]' % u''.join(illegal_ranges)) + + s = illegal_xml_chars_RE.sub('', s) + return s @app.route('/') @@ -95,7 +125,7 @@ def list_books(): channel = ET.SubElement(rss, 'channel') book_title = ET.SubElement(channel, 'title') - book_title.text = books[a]['title'] + book_title.text = escape(books[a]['title']) # sort by track number, alphanumerically if track is absent track_list = [] # account for duplicates -- cgit v1.2.3-54-g00ecf