From f7055594aac17b2d5fd4e936a5924ece3e68cd63 Mon Sep 17 00:00:00 2001 From: Jordan Date: Wed, 23 Sep 2020 21:43:11 -0700 Subject: update name to roka, support --scan, cleanup --- README | 11 +-- lib/books.py | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/util.py | 2 +- rebuild.py | 212 ------------------------------------------------------ roka.py | 67 +++++++++++++++++ run.py | 53 -------------- uwsgi.ini.example | 2 +- 7 files changed, 287 insertions(+), 272 deletions(-) create mode 100644 lib/books.py delete mode 100755 rebuild.py create mode 100755 roka.py delete mode 100755 run.py diff --git a/README b/README index cf9254b..fac2e80 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ -audiobook-rss: stream directory of audiobooks to podcasting apps via RSS +roka: stream directory of audiobooks to podcasting apps via RSS -demo (no audio): https://demo.jordan.im/audiobook-rss/ -iOS podcast app: https://demo.jordan.im/audiobook-rss/apple-podcasts.png +demo (no audio): https://demo.jordan.im/roka/ +iOS podcast app: https://demo.jordan.im/roka/apple-podcasts.png installation ------------ @@ -12,9 +12,10 @@ b) install python dependencies flask and uwsgi $ pip install --user flask uwsgi -c) execute rebuild.py to populate audiobook JSON cache +c) run roka.py with --scan to populate audiobook JSON cache (can be re-run to + update cache upon download of new books) - $ ./rebuild.py + $ ./roka.py --scan d) execute uwsgi.sh to start the server diff --git a/lib/books.py b/lib/books.py new file mode 100644 index 0000000..86ab4b6 --- /dev/null +++ b/lib/books.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 + +import datetime +import hashlib +import json +import math +import os +from datetime import timedelta +from flask import Flask +from lib.tinytag import TinyTag + +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) +CACHE_PATH = os.path.join(ABS_PATH, '../', 'cache') +JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json') + +# use Flask's config parser, configparser would be hacky +APP = Flask(__name__) +APP.config.from_pyfile(os.path.join(ABS_PATH, '../', 'app.cfg')) + +class Books: + def __init__(self): + ''' + Book-related handlers (r/w cache) and track discovery + ''' + if os.path.exists(JSON_PATH): + self._cache = self._read_cache() + else: + self._cache = {} + + def _get_dirs(self, path): + ''' + Return list of directories recursively discovered in :path: + ''' + ret = list() + for root, dirs, _ in os.walk(path): + for d in dirs: + ret.append(os.path.join(root, d)) + + return ret + + def _get_path_hash_dict(self): + ''' + Return dict of book paths and their hash from cache, used to check paths + against existing cache + + '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023 + ''' + ret = {} + for k, _ in self._cache.items(): + path = self._cache[k]['path'] + if os.path.exists(path): + ret[path] = k + + return ret + + def write_cache(self): + ''' + Dump contents of :books: to :json_path: + ''' + if not os.path.exists(CACHE_PATH): + os.mkdir(CACHE_PATH) + with open(JSON_PATH, 'w') as cache: + json.dump(self.books, cache, indent=4) + + def _read_cache(self): + ''' + Return dict of existing cache + ''' + with open(JSON_PATH, 'r') as cache: + data = json.load(cache) + + return data + + def _validate(self, v, b): + ''' + Returns :v: if :v: and v.isspace(), otherwise :b: + ''' + if v and not v.isspace(): + return v + + return b + + def _log(self, msg): + ''' + Prints :msg: with formatted ISO-8601 date + ''' + now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + print('%s %s' % (now, msg)) + + def scan_books(self): + ''' + Discover audiobooks under :root_path: and populate books object + + :cache: existing JSON cache, used to determine which content is new + (existing content is not re-hashed) + ''' + ex = self._get_path_hash_dict() + dirs = self._get_dirs(APP.config['ROOT_PATH']) + + books = dict() + for path in dirs: + if path in ex: + _hash = ex[path] + books[_hash] = self._cache[_hash] + continue + book = self._check_dir(path) + if book: + books[book[0]] = book[1] + + self.books = books + + def _check_dir(self, path): + ''' + Determine if :path: contains (supported) audio files; return populated + book dict or None + ''' + ext = ['mp3'] # m4b seems to be unsupported by Apple + is_book = False + + # book attributes to be populated + book = { + 'author': None, + 'duration': 0, + 'duration_str': None, + 'files': dict(), + 'path': path, + 'size_bytes': 0, + 'size_str': None, + 'title': None + } + + # hash of each supported track in directory path + folder_hash = hashlib.md5() + + for f in sorted(os.listdir(path)): + # must be a file and have a supported extension + file_path = os.path.join(path, f) + if not os.path.isfile(file_path) or not f.split('.')[-1] in ext: + continue + + # tracks at minimum must have a duration tag (required by podcast + # apps) + tag = TinyTag.get(file_path) + if not tag.duration: + continue + + # previous conditions met, we've found at least one track + is_book = True + self._log(f) + + # hash track (used as a key) and update folder hash + file_hash = hashlib.md5() + with open(file_path, 'rb') as f: + while True: + data = f.read(1024) + if not data: + break + folder_hash.update(data) + file_hash.update(data) + + # 1 day, 10:59:58 + duration_str = str(timedelta(seconds=tag.duration)) + + # per-file atributes, some values are populated conditionally + track = { + 'album': self._validate(tag.album, os.path.split(path)[1]), + 'author': self._validate(tag.artist, 'Unknown'), + 'duration': tag.duration, + 'duration_str': duration_str.split('.')[0], + 'filename': os.path.split(file_path)[1], + 'path': file_path, + 'size_bytes': tag.filesize, + 'title': self._validate(tag.title, os.path.split(file_path)[1]), + 'track': tag.track + } + + # we assume author and album attributes are unchanged between tracks + book['author'] = track['author'] + book['title'] = track['album'] + + # increment book total size/duration + book['duration'] += tag.duration + book['size_bytes'] += tag.filesize + + # hexdigest: track dict + book['files'][file_hash.hexdigest()] = track + + # final book processing routine; update total size, duration + if is_book: + folder_hash = folder_hash.hexdigest() + total_size = book['size_bytes'] + + # bytes -> readable file size, used in audiobook index + try: + _i = int(math.floor(math.log(total_size, 1024))) + _p = math.pow(1024, _i) + _s = round(total_size / _p, 2) + except: + _i = 1 + _s = 0 + + # e.g. 1.48 GB + SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB') + book['size_str'] = '%s %s' % (str(_s), SIZES[_i]) + + # e.g. 2 days, 5:47:47 + duration_str = str(timedelta(seconds=book['duration'])) + book['duration_str'] = duration_str.split('.')[0] + + return (folder_hash, book) + + return None diff --git a/lib/util.py b/lib/util.py index fbddb2e..a2f982d 100644 --- a/lib/util.py +++ b/lib/util.py @@ -23,7 +23,7 @@ def read_cache(json_path): except Exception: raise ValueError('error loading JSON cache') else: - raise ValueError('cache not found, run rebuild.py') + raise ValueError('cache not found, run ./roka.py --scan') return books diff --git a/rebuild.py b/rebuild.py deleted file mode 100755 index d9144ca..0000000 --- a/rebuild.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python3 - -import datetime -import hashlib -import json -import math -import os -from datetime import timedelta -from flask import Flask -from lib.tinytag import TinyTag - -ABS_PATH = os.path.dirname(os.path.abspath(__file__)) -CACHE_PATH = os.path.join(ABS_PATH, 'cache') -JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json') - -# use Flask's config parser, configparser would be hacky -APP = Flask(__name__) -APP.config.from_pyfile(os.path.join(ABS_PATH, 'app.cfg')) - -class Books: - def __init__(self): - ''' - Book-related handlers (r/w cache) and track discovery - ''' - if os.path.exists(JSON_PATH): - self._cache = self._read_cache() - else: - self._cache = {} - - self.books = self._get_books() - self._write_cache() - - def _get_dirs(self, path): - ''' - Return list of directories recursively discovered in :path: - ''' - ret = list() - for root, dirs, _ in os.walk(path): - for d in dirs: - ret.append(os.path.join(root, d)) - return ret - - def _get_path_hash_dict(self): - ''' - Return dict of book paths and their hash from cache, used to check paths - against existing cache - - '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023 - ''' - ret = {} - for k, _ in self._cache.items(): - path = self._cache[k]['path'] - if os.path.exists(path): - ret[path] = k - return ret - - def _write_cache(self): - ''' - Dump contents of :books: to :json_path: - ''' - if not os.path.exists(CACHE_PATH): - os.mkdir(CACHE_PATH) - with open(JSON_PATH, 'w') as cache: - json.dump(self.books, cache, indent=4) - - def _read_cache(self): - ''' - Return dict of existing cache - ''' - with open(JSON_PATH, 'r') as cache: - data = json.load(cache) - return data - - def _validate(self, v, b): - ''' - Returns :v: if :v: and v.isspace(), otherwise :b: - ''' - if v and not v.isspace(): - return v - return b - - def _log(self, msg): - ''' - Prints :msg: with formatted ISO-8601 date - ''' - now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") - print('%s %s' % (now, msg)) - - def _get_books(self): - ''' - Discover audiobooks under :root_path: and populate books object - - :cache: existing JSON cache, used to determine which content is new - (existing content is not re-hashed) - ''' - ex = self._get_path_hash_dict() - dirs = self._get_dirs(APP.config['ROOT_PATH']) - - books = dict() - for path in dirs: - if path in ex: - _hash = ex[path] - books[_hash] = self._cache[_hash] - continue - book = self._check_dir(path) - if book: - books[book[0]] = book[1] - return books - - def _check_dir(self, path): - ''' - Determine if :path: contains (supported) audio files; return populated - book dict or None - ''' - ext = ['mp3'] # m4b seems to be unsupported by Apple - is_book = False - - # book attributes to be populated - book = { - 'author': None, - 'duration': 0, - 'duration_str': None, - 'files': dict(), - 'path': path, - 'size_bytes': 0, - 'size_str': None, - 'title': None - } - - # hash of each supported track in directory path - folder_hash = hashlib.md5() - - for f in sorted(os.listdir(path)): - # must be a file and have a supported extension - file_path = os.path.join(path, f) - if not os.path.isfile(file_path) or not f.split('.')[-1] in ext: - continue - - # tracks at minimum must have a duration tag (required by podcast - # apps) - tag = TinyTag.get(file_path) - if not tag.duration: - continue - - # previous conditions met, we've found at least one track - is_book = True - self._log(f) - - # hash track (used as a key) and update folder hash - file_hash = hashlib.md5() - with open(file_path, 'rb') as f: - while True: - data = f.read(1024) - if not data: - break - folder_hash.update(data) - file_hash.update(data) - - # 1 day, 10:59:58 - duration_str = str(timedelta(seconds=tag.duration)) - - # per-file atributes, some values are populated conditionally - track = { - 'album': self._validate(tag.album, os.path.split(path)[1]), - 'author': self._validate(tag.artist, 'Unknown'), - 'duration': tag.duration, - 'duration_str': duration_str.split('.')[0], - 'filename': os.path.split(file_path)[1], - 'path': file_path, - 'size_bytes': tag.filesize, - 'title': self._validate(tag.title, os.path.split(file_path)[1]), - 'track': tag.track - } - - # we assume author and album attributes are unchanged between tracks - book['author'] = track['author'] - book['title'] = track['album'] - - # increment book total size/duration - book['duration'] += tag.duration - book['size_bytes'] += tag.filesize - - # hexdigest: track dict - book['files'][file_hash.hexdigest()] = track - - # final book processing routine; update total size, duration - if is_book: - folder_hash = folder_hash.hexdigest() - total_size = book['size_bytes'] - - # bytes -> readable file size, used in audiobook index - try: - _i = int(math.floor(math.log(total_size, 1024))) - _p = math.pow(1024, _i) - _s = round(total_size / _p, 2) - except: - _i = 1 - _s = 0 - - # e.g. 1.48 GB - SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB') - book['size_str'] = '%s %s' % (str(_s), SIZES[_i]) - - # e.g. 2 days, 5:47:47 - duration_str = str(timedelta(seconds=book['duration'])) - book['duration_str'] = duration_str.split('.')[0] - return (folder_hash, book) - - return None - -if __name__ == '__main__': - books = Books() diff --git a/roka.py b/roka.py new file mode 100755 index 0000000..4c4da57 --- /dev/null +++ b/roka.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import argparse +import os +from flask import Flask, request, Response, render_template, send_file +from lib.books import Books +from lib.util import check_auth, escape, generate_rss, read_cache + +abs_path = os.path.dirname(os.path.abspath(__file__)) +app = Flask(__name__) +app.config.from_pyfile(os.path.join(abs_path, 'app.cfg')) +cache_path = os.path.join(abs_path, 'cache') +json_path = os.path.join(cache_path, 'audiobooks.json') + +@app.route('/') +def list_books(): + ''' + Book listing and audiobook RSS/file download + + :a: audiobook hash; if provided without :f: (file) return RSS + :f: file hash; requires associated audiobook (:a:) to download + + Listing of audiobooks returned if no params provided + ''' + books = read_cache(json_path) + + a = request.args.get('a') # audiobook hash + f = request.args.get('f') # file hash + + # audiobook and file parameters provided: serve up file + if a and f: + if not books.get(a) or not books[a]['files'].get(f): + return 'book or file not found', 404 + + f_path = books[a]['files'][f]['path'] + return send_file(f_path, conditional=True) + + # serve up audiobook RSS feed; only audiobook hash provided + elif a: + if not books.get(a): + return 'book not found', 404 + + rss = generate_rss(request, books) + return Response(rss, mimetype='text/xml') + + else: + auth = request.authorization + if not auth or not check_auth(app, auth.username, auth.password): + form = {'WWW-Authenticate': 'Basic realm="o/"'} + return Response('unauthorized', 401, form) + + return render_template('index.html', books=books) + +if __name__ == '__main__': + desc = 'roka: listen to audiobooks with podcast apps via RSS' + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--scan', dest='scan', action='store_true', + help='scan audiobooks directory for new books', + required=False) + args = parser.parse_args() + + if args.scan: + books = Books() + books.scan_books() + books.write_cache() + else: + app.run(host='127.0.0.1', port='8085', threaded=True) diff --git a/run.py b/run.py deleted file mode 100755 index e48f19f..0000000 --- a/run.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 - -import os -from flask import Flask, request, Response, render_template, send_file -from lib.util import check_auth, escape, generate_rss, read_cache - -abs_path = os.path.dirname(os.path.abspath(__file__)) -app = Flask(__name__) -app.config.from_pyfile(os.path.join(abs_path, 'app.cfg')) -cache_path = os.path.join(abs_path, 'cache') -json_path = os.path.join(cache_path, 'audiobooks.json') - -@app.route('/') -def list_books(): - ''' - Book listing and audiobook RSS/file download - - :a: audiobook hash; if provided without :f: (file) return RSS - :f: file hash; requires associated audiobook (:a:) to download - - Listing of audiobooks returned if no params provided - ''' - books = read_cache(json_path) - - a = request.args.get('a') # audiobook hash - f = request.args.get('f') # file hash - - # audiobook and file parameters provided: serve up file - if a and f: - if not books.get(a) or not books[a]['files'].get(f): - return 'book or file not found', 404 - - f_path = books[a]['files'][f]['path'] - return send_file(f_path, conditional=True) - - # serve up audiobook RSS feed; only audiobook hash provided - elif a: - if not books.get(a): - return 'book not found', 404 - - rss = generate_rss(request, books) - return Response(rss, mimetype='text/xml') - - else: - auth = request.authorization - if not auth or not check_auth(app, auth.username, auth.password): - form = {'WWW-Authenticate': 'Basic realm="o/"'} - return Response('unauthorized', 401, form) - - return render_template('index.html', books=books) - -if __name__ == '__main__': - app.run(host='127.0.0.1', port='8085', threaded=True) diff --git a/uwsgi.ini.example b/uwsgi.ini.example index 49dee9c..2a2093f 100644 --- a/uwsgi.ini.example +++ b/uwsgi.ini.example @@ -2,6 +2,6 @@ http = 127.0.0.1:8085 processes = 2 threads = 4 -wsgi-file = run.py +wsgi-file = roka.py callable = app master = true -- cgit v1.2.3-54-g00ecf