aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-04-19 15:05:32 -0700
committerJordan <me@jordan.im>2020-04-19 15:05:32 -0700
commitc58cc26bab83c6c6a1192c637edfac6a7ede9276 (patch)
treef4c17f799b5ba9facf40d144d6abe8ffed8a40c6
parenta06816b9c33d0d7d0ef35211c16b5592e0801433 (diff)
downloadroka-c58cc26bab83c6c6a1192c637edfac6a7ede9276.tar.gz
roka-c58cc26bab83c6c6a1192c637edfac6a7ede9276.zip
remove XML-invalid unicode chars, escape book title
-rwxr-xr-xrun.py32
1 files changed, 31 insertions, 1 deletions
diff --git a/run.py b/run.py
index 9978f83..e8b2c56 100755
--- a/run.py
+++ b/run.py
@@ -4,6 +4,7 @@ import json
import mimetypes
import os
import re
+import sys
import xml.etree.cElementTree as ET
from collections import OrderedDict
from operator import getitem
@@ -49,6 +50,35 @@ def escape(s):
s = s.replace('>', '&gt;')
s = s.replace('\'', '&quot;')
+ # https://stackoverflow.com/a/22273639
+ illegal_unichrs = [
+ (0x00, 0x08),
+ (0x0B, 0x0C),
+ (0x0E, 0x1F),
+ (0x7F, 0x84),
+ (0x86, 0x9F),
+ (0xFDD0, 0xFDDF),
+ (0xFFFE, 0xFFFF)
+ ]
+
+ if sys.maxunicode >= 0x10000:
+ illegal_unichrs.extend(
+ [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
+ (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
+ (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
+ (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
+ (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
+ (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
+ (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
+ (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)]
+ )
+
+ illegal_ranges = ["%s-%s" % (chr(low), chr(high))
+ for (low, high) in illegal_unichrs]
+ illegal_xml_chars_RE = re.compile(u'[%s]' % u''.join(illegal_ranges))
+
+ s = illegal_xml_chars_RE.sub('', s)
+
return s
@app.route('/')
@@ -95,7 +125,7 @@ def list_books():
channel = ET.SubElement(rss, 'channel')
book_title = ET.SubElement(channel, 'title')
- book_title.text = books[a]['title']
+ book_title.text = escape(books[a]['title'])
# sort by track number, alphanumerically if track is absent
track_list = [] # account for duplicates