From f7055594aac17b2d5fd4e936a5924ece3e68cd63 Mon Sep 17 00:00:00 2001
From: Jordan <me@jordan.im>
Date: Wed, 23 Sep 2020 21:43:11 -0700
Subject: update name to roka, support --scan, cleanup

---
 README            |  11 +--
 lib/books.py      | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/util.py       |   2 +-
 rebuild.py        | 212 ------------------------------------------------------
 roka.py           |  67 +++++++++++++++++
 run.py            |  53 --------------
 uwsgi.ini.example |   2 +-
 7 files changed, 287 insertions(+), 272 deletions(-)
 create mode 100644 lib/books.py
 delete mode 100755 rebuild.py
 create mode 100755 roka.py
 delete mode 100755 run.py

diff --git a/README b/README
index cf9254b..fac2e80 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
-audiobook-rss: stream directory of audiobooks to podcasting apps via RSS
+roka: stream directory of audiobooks to podcasting apps via RSS
 
-demo (no audio): https://demo.jordan.im/audiobook-rss/
-iOS podcast app: https://demo.jordan.im/audiobook-rss/apple-podcasts.png
+demo (no audio): https://demo.jordan.im/roka/
+iOS podcast app: https://demo.jordan.im/roka/apple-podcasts.png
 
 installation
 ------------
@@ -12,9 +12,10 @@ b) install python dependencies flask and uwsgi
 
         $ pip install --user flask uwsgi
 
-c) execute rebuild.py to populate audiobook JSON cache
+c) run roka.py with --scan to populate audiobook JSON cache (can be re-run to
+   update cache upon download of new books)
 
-        $ ./rebuild.py
+        $ ./roka.py --scan
 
 d) execute uwsgi.sh to start the server
 
diff --git a/lib/books.py b/lib/books.py
new file mode 100644
index 0000000..86ab4b6
--- /dev/null
+++ b/lib/books.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+
+import datetime
+import hashlib
+import json
+import math
+import os
+from datetime import timedelta
+from flask import Flask
+from lib.tinytag import TinyTag
+
+ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+CACHE_PATH = os.path.join(ABS_PATH, '../', 'cache')
+JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json')
+
+# use Flask's config parser, configparser would be hacky
+APP = Flask(__name__)
+APP.config.from_pyfile(os.path.join(ABS_PATH, '../', 'app.cfg'))
+
+class Books:
+    def __init__(self):
+        '''
+        Book-related handlers (r/w cache) and track discovery
+        '''
+        if os.path.exists(JSON_PATH):
+            self._cache = self._read_cache()
+        else:
+            self._cache = {}
+
+    def _get_dirs(self, path):
+        '''
+        Return list of directories recursively discovered in :path:
+        '''
+        ret = list()
+        for root, dirs, _ in os.walk(path):
+            for d in dirs:
+                ret.append(os.path.join(root, d))
+
+        return ret
+
+    def _get_path_hash_dict(self):
+        '''
+        Return dict of book paths and their hash from cache, used to check paths
+        against existing cache
+
+        '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023
+        '''
+        ret = {}
+        for k, _ in self._cache.items():
+            path = self._cache[k]['path']
+            if os.path.exists(path):
+                ret[path] = k
+
+        return ret
+
+    def write_cache(self):
+        '''
+        Dump contents of :books: to :json_path:
+        '''
+        if not os.path.exists(CACHE_PATH):
+            os.mkdir(CACHE_PATH)
+        with open(JSON_PATH, 'w') as cache:
+            json.dump(self.books, cache, indent=4)
+
+    def _read_cache(self):
+        '''
+        Return dict of existing cache
+        '''
+        with open(JSON_PATH, 'r') as cache:
+            data = json.load(cache)
+
+        return data
+
+    def _validate(self, v, b):
+        '''
+        Returns :v: if :v: and v.isspace(), otherwise :b:
+        '''
+        if v and not v.isspace():
+            return v
+
+        return b
+
+    def _log(self, msg):
+        '''
+        Prints :msg: with formatted ISO-8601 date
+        '''
+        now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+        print('%s %s' % (now, msg))
+
+    def scan_books(self):
+        '''
+        Discover audiobooks under :root_path: and populate books object
+
+        :cache: existing JSON cache, used to determine which content is new
+                (existing content is not re-hashed)
+        '''
+        ex = self._get_path_hash_dict()
+        dirs = self._get_dirs(APP.config['ROOT_PATH'])
+
+        books = dict()
+        for path in dirs:
+            if path in ex:
+                _hash = ex[path]
+                books[_hash] = self._cache[_hash]
+                continue
+            book = self._check_dir(path)
+            if book:
+                books[book[0]] = book[1]
+
+        self.books = books
+
+    def _check_dir(self, path):
+        '''
+        Determine if :path: contains (supported) audio files; return populated
+        book dict or None
+        '''
+        ext = ['mp3'] # m4b seems to be unsupported by Apple
+        is_book = False
+
+        # book attributes to be populated
+        book = {
+            'author':       None,
+            'duration':     0,
+            'duration_str': None,
+            'files':        dict(),
+            'path':         path,
+            'size_bytes':   0,
+            'size_str':     None,
+            'title':        None
+        }
+
+        # hash of each supported track in directory path
+        folder_hash = hashlib.md5()
+
+        for f in sorted(os.listdir(path)):
+            # must be a file and have a supported extension
+            file_path = os.path.join(path, f)
+            if not os.path.isfile(file_path) or not f.split('.')[-1] in ext:
+                continue
+
+            # tracks at minimum must have a duration tag (required by podcast
+            # apps)
+            tag = TinyTag.get(file_path)
+            if not tag.duration:
+                continue
+
+            # previous conditions met, we've found at least one track
+            is_book = True
+            self._log(f)
+
+            # hash track (used as a key) and update folder hash
+            file_hash = hashlib.md5()
+            with open(file_path, 'rb') as f:
+                while True:
+                    data = f.read(1024)
+                    if not data:
+                        break
+                    folder_hash.update(data)
+                    file_hash.update(data)
+
+            # 1 day, 10:59:58
+            duration_str = str(timedelta(seconds=tag.duration))
+
+            # per-file atributes, some values are populated conditionally
+            track = {
+                'album':        self._validate(tag.album, os.path.split(path)[1]),
+                'author':       self._validate(tag.artist, 'Unknown'),
+                'duration':     tag.duration,
+                'duration_str': duration_str.split('.')[0],
+                'filename':     os.path.split(file_path)[1],
+                'path':         file_path,
+                'size_bytes':   tag.filesize,
+                'title':        self._validate(tag.title, os.path.split(file_path)[1]),
+                'track':        tag.track
+            }
+
+            # we assume author and album attributes are unchanged between tracks
+            book['author'] = track['author']
+            book['title'] = track['album']
+
+            # increment book total size/duration
+            book['duration'] += tag.duration
+            book['size_bytes'] += tag.filesize
+
+            # hexdigest: track dict
+            book['files'][file_hash.hexdigest()] = track
+
+        # final book processing routine; update total size, duration
+        if is_book:
+            folder_hash = folder_hash.hexdigest()
+            total_size = book['size_bytes']
+
+            # bytes -> readable file size, used in audiobook index
+            try:
+                _i = int(math.floor(math.log(total_size, 1024)))
+                _p = math.pow(1024, _i)
+                _s = round(total_size / _p, 2)
+            except:
+                _i = 1
+                _s = 0
+
+            # e.g. 1.48 GB
+            SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
+            book['size_str'] = '%s %s' % (str(_s), SIZES[_i])
+
+            # e.g. 2 days, 5:47:47
+            duration_str = str(timedelta(seconds=book['duration']))
+            book['duration_str'] = duration_str.split('.')[0]
+
+            return (folder_hash, book)
+
+        return None
diff --git a/lib/util.py b/lib/util.py
index fbddb2e..a2f982d 100644
--- a/lib/util.py
+++ b/lib/util.py
@@ -23,7 +23,7 @@ def read_cache(json_path):
         except Exception:
             raise ValueError('error loading JSON cache')
     else:
-        raise ValueError('cache not found, run rebuild.py')
+        raise ValueError('cache not found, run ./roka.py --scan')
 
     return books
 
diff --git a/rebuild.py b/rebuild.py
deleted file mode 100755
index d9144ca..0000000
--- a/rebuild.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3
-
-import datetime
-import hashlib
-import json
-import math
-import os
-from datetime import timedelta
-from flask import Flask
-from lib.tinytag import TinyTag
-
-ABS_PATH = os.path.dirname(os.path.abspath(__file__))
-CACHE_PATH = os.path.join(ABS_PATH, 'cache')
-JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json')
-
-# use Flask's config parser, configparser would be hacky
-APP = Flask(__name__)
-APP.config.from_pyfile(os.path.join(ABS_PATH, 'app.cfg'))
-
-class Books:
-    def __init__(self):
-        '''
-        Book-related handlers (r/w cache) and track discovery
-        '''
-        if os.path.exists(JSON_PATH):
-            self._cache = self._read_cache()
-        else:
-            self._cache = {}
-
-        self.books = self._get_books()
-        self._write_cache()
-
-    def _get_dirs(self, path):
-        '''
-        Return list of directories recursively discovered in :path:
-        '''
-        ret = list()
-        for root, dirs, _ in os.walk(path):
-            for d in dirs:
-                ret.append(os.path.join(root, d))
-        return ret
-
-    def _get_path_hash_dict(self):
-        '''
-        Return dict of book paths and their hash from cache, used to check paths
-        against existing cache
-
-        '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023
-        '''
-        ret = {}
-        for k, _ in self._cache.items():
-            path = self._cache[k]['path']
-            if os.path.exists(path):
-                ret[path] = k
-        return ret
-
-    def _write_cache(self):
-        '''
-        Dump contents of :books: to :json_path:
-        '''
-        if not os.path.exists(CACHE_PATH):
-            os.mkdir(CACHE_PATH)
-        with open(JSON_PATH, 'w') as cache:
-            json.dump(self.books, cache, indent=4)
-
-    def _read_cache(self):
-        '''
-        Return dict of existing cache
-        '''
-        with open(JSON_PATH, 'r') as cache:
-            data = json.load(cache)
-        return data
-
-    def _validate(self, v, b):
-        '''
-        Returns :v: if :v: and v.isspace(), otherwise :b:
-        '''
-        if v and not v.isspace():
-            return v
-        return b
-
-    def _log(self, msg):
-        '''
-        Prints :msg: with formatted ISO-8601 date
-        '''
-        now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
-        print('%s %s' % (now, msg))
-
-    def _get_books(self):
-        '''
-        Discover audiobooks under :root_path: and populate books object
-
-        :cache: existing JSON cache, used to determine which content is new
-                (existing content is not re-hashed)
-        '''
-        ex = self._get_path_hash_dict()
-        dirs = self._get_dirs(APP.config['ROOT_PATH'])
-
-        books = dict()
-        for path in dirs:
-            if path in ex:
-                _hash = ex[path]
-                books[_hash] = self._cache[_hash]
-                continue
-            book = self._check_dir(path)
-            if book:
-                books[book[0]] = book[1]
-        return books
-
-    def _check_dir(self, path):
-        '''
-        Determine if :path: contains (supported) audio files; return populated
-        book dict or None
-        '''
-        ext = ['mp3'] # m4b seems to be unsupported by Apple
-        is_book = False
-
-        # book attributes to be populated
-        book = {
-            'author':       None,
-            'duration':     0,
-            'duration_str': None,
-            'files':        dict(),
-            'path':         path,
-            'size_bytes':   0,
-            'size_str':     None,
-            'title':        None
-        }
-
-        # hash of each supported track in directory path
-        folder_hash = hashlib.md5()
-
-        for f in sorted(os.listdir(path)):
-            # must be a file and have a supported extension
-            file_path = os.path.join(path, f)
-            if not os.path.isfile(file_path) or not f.split('.')[-1] in ext:
-                continue
-
-            # tracks at minimum must have a duration tag (required by podcast
-            # apps)
-            tag = TinyTag.get(file_path)
-            if not tag.duration:
-                continue
-
-            # previous conditions met, we've found at least one track
-            is_book = True
-            self._log(f)
-
-            # hash track (used as a key) and update folder hash
-            file_hash = hashlib.md5()
-            with open(file_path, 'rb') as f:
-                while True:
-                    data = f.read(1024)
-                    if not data:
-                        break
-                    folder_hash.update(data)
-                    file_hash.update(data)
-
-            # 1 day, 10:59:58
-            duration_str = str(timedelta(seconds=tag.duration))
-
-            # per-file atributes, some values are populated conditionally
-            track = {
-                'album':        self._validate(tag.album, os.path.split(path)[1]),
-                'author':       self._validate(tag.artist, 'Unknown'),
-                'duration':     tag.duration,
-                'duration_str': duration_str.split('.')[0],
-                'filename':     os.path.split(file_path)[1],
-                'path':         file_path,
-                'size_bytes':   tag.filesize,
-                'title':        self._validate(tag.title, os.path.split(file_path)[1]),
-                'track':        tag.track
-            }
-
-            # we assume author and album attributes are unchanged between tracks
-            book['author'] = track['author']
-            book['title'] = track['album']
-
-            # increment book total size/duration
-            book['duration'] += tag.duration
-            book['size_bytes'] += tag.filesize
-
-            # hexdigest: track dict
-            book['files'][file_hash.hexdigest()] = track
-
-        # final book processing routine; update total size, duration
-        if is_book:
-            folder_hash = folder_hash.hexdigest()
-            total_size = book['size_bytes']
-
-            # bytes -> readable file size, used in audiobook index
-            try:
-                _i = int(math.floor(math.log(total_size, 1024)))
-                _p = math.pow(1024, _i)
-                _s = round(total_size / _p, 2)
-            except:
-                _i = 1
-                _s = 0
-
-            # e.g. 1.48 GB
-            SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
-            book['size_str'] = '%s %s' % (str(_s), SIZES[_i])
-
-            # e.g. 2 days, 5:47:47
-            duration_str = str(timedelta(seconds=book['duration']))
-            book['duration_str'] = duration_str.split('.')[0]
-            return (folder_hash, book)
-
-        return None
-
-if __name__ == '__main__':
-    books = Books()
diff --git a/roka.py b/roka.py
new file mode 100755
index 0000000..4c4da57
--- /dev/null
+++ b/roka.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+from flask import Flask, request, Response, render_template, send_file
+from lib.books import Books
+from lib.util import check_auth, escape, generate_rss, read_cache
+
+abs_path = os.path.dirname(os.path.abspath(__file__))
+app = Flask(__name__)
+app.config.from_pyfile(os.path.join(abs_path, 'app.cfg'))
+cache_path = os.path.join(abs_path, 'cache')
+json_path = os.path.join(cache_path, 'audiobooks.json')
+
+@app.route('/')
+def list_books():
+    '''
+    Book listing and audiobook RSS/file download
+
+    :a: audiobook hash; if provided without :f: (file) return RSS
+    :f: file hash; requires associated audiobook (:a:) to download
+
+    Listing of audiobooks returned if no params provided
+    '''
+    books = read_cache(json_path)
+
+    a = request.args.get('a') # audiobook hash
+    f = request.args.get('f') # file hash
+
+    # audiobook and file parameters provided: serve up file
+    if a and f:
+        if not books.get(a) or not books[a]['files'].get(f):
+            return 'book or file not found', 404
+
+        f_path = books[a]['files'][f]['path']
+        return send_file(f_path, conditional=True)
+
+    # serve up audiobook RSS feed; only audiobook hash provided
+    elif a:
+        if not books.get(a):
+            return 'book not found', 404
+
+        rss = generate_rss(request, books)
+        return Response(rss, mimetype='text/xml')
+
+    else:
+        auth = request.authorization
+        if not auth or not check_auth(app, auth.username, auth.password):
+            form = {'WWW-Authenticate': 'Basic realm="o/"'}
+            return Response('unauthorized', 401, form)
+
+        return render_template('index.html', books=books)
+
+if __name__ == '__main__':
+    desc = 'roka: listen to audiobooks with podcast apps via RSS'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('--scan', dest='scan', action='store_true',
+                        help='scan audiobooks directory for new books',
+                        required=False)
+    args = parser.parse_args()
+
+    if args.scan:
+        books = Books()
+        books.scan_books()
+        books.write_cache()
+    else:
+        app.run(host='127.0.0.1', port='8085', threaded=True)
diff --git a/run.py b/run.py
deleted file mode 100755
index e48f19f..0000000
--- a/run.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-from flask import Flask, request, Response, render_template, send_file
-from lib.util import check_auth, escape, generate_rss, read_cache
-
-abs_path = os.path.dirname(os.path.abspath(__file__))
-app = Flask(__name__)
-app.config.from_pyfile(os.path.join(abs_path, 'app.cfg'))
-cache_path = os.path.join(abs_path, 'cache')
-json_path = os.path.join(cache_path, 'audiobooks.json')
-
-@app.route('/')
-def list_books():
-    '''
-    Book listing and audiobook RSS/file download
-
-    :a: audiobook hash; if provided without :f: (file) return RSS
-    :f: file hash; requires associated audiobook (:a:) to download
-
-    Listing of audiobooks returned if no params provided
-    '''
-    books = read_cache(json_path)
-
-    a = request.args.get('a') # audiobook hash
-    f = request.args.get('f') # file hash
-
-    # audiobook and file parameters provided: serve up file
-    if a and f:
-        if not books.get(a) or not books[a]['files'].get(f):
-            return 'book or file not found', 404
-
-        f_path = books[a]['files'][f]['path']
-        return send_file(f_path, conditional=True)
-
-    # serve up audiobook RSS feed; only audiobook hash provided
-    elif a:
-        if not books.get(a):
-            return 'book not found', 404
-
-        rss = generate_rss(request, books)
-        return Response(rss, mimetype='text/xml')
-
-    else:
-        auth = request.authorization
-        if not auth or not check_auth(app, auth.username, auth.password):
-            form = {'WWW-Authenticate': 'Basic realm="o/"'}
-            return Response('unauthorized', 401, form)
-
-        return render_template('index.html', books=books)
-
-if __name__ == '__main__':
-    app.run(host='127.0.0.1', port='8085', threaded=True)
diff --git a/uwsgi.ini.example b/uwsgi.ini.example
index 49dee9c..2a2093f 100644
--- a/uwsgi.ini.example
+++ b/uwsgi.ini.example
@@ -2,6 +2,6 @@
 http      = 127.0.0.1:8085
 processes = 2
 threads   = 4
-wsgi-file = run.py
+wsgi-file = roka.py
 callable  = app
 master    = true
-- 
cgit v1.2.3-54-g00ecf