aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-05-28 12:44:51 -0700
committerJordan <me@jordan.im>2020-05-28 12:44:51 -0700
commitecfa70f53576ec70ede543f5dfd395bd4951d1df (patch)
treede2e130b0d76ac1e8dbc3657063d43a0c59c4011
parentf7a2fdfb809cd5de237fd85610c0b6a4f102ae2e (diff)
downloadroka-ecfa70f53576ec70ede543f5dfd395bd4951d1df.tar.gz
roka-ecfa70f53576ec70ede543f5dfd395bd4951d1df.zip
cleanup, reload cache between requests (TODO: use getmtime())
-rwxr-xr-xrebuild.py112
-rwxr-xr-xrun.py53
2 files changed, 92 insertions, 73 deletions
diff --git a/rebuild.py b/rebuild.py
index 903d8ec..45a52e1 100755
--- a/rebuild.py
+++ b/rebuild.py
@@ -19,62 +19,60 @@ def get_books(root_path, cache=None):
raise ValueError('root path does not exist: %s' % root_path)
# '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023
- cached = {}
+ existing_books = {}
if cache:
for k, _ in cache.items():
- cached[cache[k]['path']] = k
+ existing_books[cache[k]['path']] = k
- books = dict()
book_dirs = list()
for root, dirs, _ in os.walk(root_path):
for d in dirs:
book_dirs.append(os.path.join(root, d))
+ books = dict()
for book_path in book_dirs:
- print('[+] processing: %s' % book_path)
-
- # if already cached, populate _books with existing k/v
- if book_path in cached:
- _hash = cached[book_path]
+ # if already cached, populate books with existing k/v
+ if book_path in existing_books:
+ _hash = existing_books[book_path]
books[_hash] = cache[_hash]
continue
-
book = is_book(book_path)
- if book: books[book[0]] = book[1]
+ if book:
+ books[book[0]] = book[1]
return books
def is_book(book_path):
- # initial set of attributes to be populated
+ # book attributes to be populated
book = {
- 'duration': 0,
- 'path': book_path,
- 'files': dict(),
- 'size_bytes': 0,
- 'size_str': None,
+ 'author': None,
+ 'duration': 0,
+ 'duration_str': None,
+ 'files': dict(),
+ 'path': book_path,
+ 'size_bytes': 0,
+ 'size_str': None,
+ 'title': None
}
# hash of each file in directory w/ MP3 extension
folder_hash = hashlib.md5()
- is_book = False
# a book_path is only a book if it contains at least one MP3
+ is_book = False
for f in os.listdir(book_path):
file_path = os.path.join(book_path, f)
-
- # must be MP3 file, ignore anything else
if not os.path.isfile(file_path) or not f.endswith('.mp3'):
continue
-
- # skip if no duration attribute (required)
tag = TinyTag.get(file_path)
if not tag.duration:
continue
# previous conditions met, we're a book! :D
is_book = True
+ print('[+] processing: %s' % book_path)
- # update folder hash with MD5 of current file
+ # update collective hash of folder with MD5 of current file
BLOCK = 1024
file_hash = hashlib.md5()
with open(file_path, 'rb') as f:
@@ -85,46 +83,42 @@ def is_book(book_path):
folder_hash.update(data)
file_hash.update(data)
- # populate per-file and book attribute
- mp3 = dict()
- mp3['path'] = file_path
+ # per-MP3 atributes, some values are populated conditionally
+ mp3 = {
+ 'album': None,
+ 'author': None,
+ 'duration': tag.duration,
+ 'duration_str': None,
+ 'filename': os.path.split(file_path)[1],
+ 'path': file_path,
+ 'size_bytes': None,
+ 'title': None,
+ 'track': None
+ }
+
+ mp3['album'] = validate(tag.album, os.path.split(book_path)[1])
+ mp3['author'] = validate(tag.artist, 'Unknown')
mp3['duration'] = tag.duration
- mp3['filename'] = os.path.split(file_path)[1]
-
- # attribute values must be populated and non-space
- if tag.title and not tag.title.isspace():
- mp3['title'] = tag.title
- else:
- mp3['title'] = os.path.split(file_path)[1]
-
- # we overwrite existing book title/author in assuming MP3 tags are
- # consistent between MP3s, perhaps we shouldn't
- if tag.album and not tag.album.isspace():
- mp3['album'] = tag.album
- book['title'] = tag.album
- else:
- mp3['album'] = os.path.split(book_path)[1]
- book['title'] = os.path.split(book_path)[1]
-
- if tag.artist and not tag.artist.isspace():
- mp3['author'] = tag.artist
- book['author'] = tag.artist
- else:
- mp3['author'] = 'Unknown'
- book['author'] = 'Unknown'
- mp3['duration'] = tag.duration
+ # 1 day, 10:59:58
+ duration_str = str(timedelta(seconds=mp3['duration']))
+ mp3['duration_str'] = duration_str.split('.')[0]
+
+ mp3['title'] = validate(tag.title, os.path.split(file_path)[1])
mp3['track'] = tag.track
mp3['size_bytes'] = tag.filesize
- duration_str = str(timedelta(seconds=mp3['duration']))
- mp3['duration_str'] = duration_str.split('.')[0]
+ # we assume author and album attributes are unchanged between MP3s
+ book['author'] = mp3['author']
+ book['title'] = mp3['album']
- # increment book total size/duration, store MP3
+ # increment book total size/duration
book['duration'] += tag.duration
- book['files'][file_hash.hexdigest()] = mp3
book['size_bytes'] += tag.filesize
+ # hexdigest: MP3 dict
+ book['files'][file_hash.hexdigest()] = mp3
+
# if we're a book, store formatted book size and duration
if is_book:
folder_hash = folder_hash.hexdigest()
@@ -164,6 +158,18 @@ def read_cache(json_path):
return books
+def validate(v, b):
+ '''
+ Returns :v: if v and v.isspace(), otherwise b
+
+ :v: preferred value
+ :b: backup value
+ '''
+ if v and not v.isspace():
+ return v
+ else:
+ return b
+
if __name__ == '__main__':
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
CACHE_PATH = os.path.join(ABS_PATH, 'cache')
diff --git a/run.py b/run.py
index d73545b..90e61c0 100755
--- a/run.py
+++ b/run.py
@@ -10,6 +10,7 @@ from collections import OrderedDict
from operator import getitem
from datetime import date, timedelta
from flask import Flask, request, Response, render_template, send_file
+from xml.dom import minidom
abs_path = os.path.dirname(os.path.abspath(__file__))
app = Flask(__name__)
@@ -17,20 +18,24 @@ app.config.from_pyfile(os.path.join(abs_path, 'app.cfg'))
cache_path = os.path.join(abs_path, 'cache')
json_path = os.path.join(cache_path, 'audiobooks.json')
-# populate books object from JSON cache sorted by title
-if os.path.exists(json_path):
- try:
- with open(json_path, 'r') as cache:
- books = json.load(cache)
- books = OrderedDict(sorted(
- books.items(),
- key=lambda x: x[1]['title']
- ))
-
- except Exception:
- raise ValueError('error loading JSON cache')
-else:
- raise ValueError('cache not found, run rebuild.py')
+def read_cache(json_path):
+ '''
+ Populate books dict from cache at :json_path:
+ '''
+ if os.path.exists(json_path):
+ try:
+ with open(json_path, 'r') as cache:
+ books = json.load(cache)
+ books = OrderedDict(sorted(
+ books.items(),
+ key=lambda x: x[1]['title']
+ ))
+ except Exception:
+ raise ValueError('error loading JSON cache')
+ else:
+ raise ValueError('cache not found, run rebuild.py')
+
+ return books
def check_auth(username, password):
'''
@@ -58,7 +63,8 @@ def escape(s):
(0x7F, 0x84),
(0x86, 0x9F),
(0xFDD0, 0xFDDF),
- (0xFFFE, 0xFFFF)
+ (0xFFFE, 0xFFFF),
+ (0xA9, 0xA9)
]
if sys.maxunicode >= 0x10000:
@@ -81,6 +87,14 @@ def escape(s):
return s
+def prettify(elem):
+ '''
+ Make our RSS feed picturesque :)
+ '''
+ xml_str = ET.tostring(elem, encoding='utf8', method='xml')
+ xml_dom = minidom.parseString(xml_str)
+ return xml_dom.toprettyxml(indent=' ')
+
@app.route('/')
def list_books():
'''
@@ -91,6 +105,8 @@ def list_books():
Listing of audiobooks returned if no params provided
'''
+ books = read_cache(json_path)
+
a = request.args.get('a') # audiobook hash
f = request.args.get('f') # file hash
@@ -175,7 +191,7 @@ def list_books():
duration = ET.SubElement(item, 'itunes:duration')
duration.text = str(books[a]['files'][f]['duration_str'])
- guid = ET.SubElement(item, 'guid')
+ guid = ET.SubElement(item, 'guid', isPermaLink='false')
guid.text = f # file hash
# pubDate descending, day decremented w/ each iteration
@@ -188,10 +204,7 @@ def list_books():
}
ET.SubElement(item, 'enclosure', enc_attr)
- return Response(
- ET.tostring(rss, encoding='utf8', method='xml'),
- mimetype='text/xml'
- )
+ return Response(prettify(rss), mimetype='text/xml')
else:
auth = request.authorization
if not auth or not check_auth(auth.username, auth.password):