From ecfa70f53576ec70ede543f5dfd395bd4951d1df Mon Sep 17 00:00:00 2001 From: Jordan Date: Thu, 28 May 2020 12:44:51 -0700 Subject: cleanup, reload cache between requests (TODO: use getmtime()) --- rebuild.py | 112 ++++++++++++++++++++++++++++++++----------------------------- run.py | 53 ++++++++++++++++++----------- 2 files changed, 92 insertions(+), 73 deletions(-) diff --git a/rebuild.py b/rebuild.py index 903d8ec..45a52e1 100755 --- a/rebuild.py +++ b/rebuild.py @@ -19,62 +19,60 @@ def get_books(root_path, cache=None): raise ValueError('root path does not exist: %s' % root_path) # '/home/user/audiobooks/book': d815c7a3cc11f08558b4d91ca93de023 - cached = {} + existing_books = {} if cache: for k, _ in cache.items(): - cached[cache[k]['path']] = k + existing_books[cache[k]['path']] = k - books = dict() book_dirs = list() for root, dirs, _ in os.walk(root_path): for d in dirs: book_dirs.append(os.path.join(root, d)) + books = dict() for book_path in book_dirs: - print('[+] processing: %s' % book_path) - - # if already cached, populate _books with existing k/v - if book_path in cached: - _hash = cached[book_path] + # if already cached, populate books with existing k/v + if book_path in existing_books: + _hash = existing_books[book_path] books[_hash] = cache[_hash] continue - book = is_book(book_path) - if book: books[book[0]] = book[1] + if book: + books[book[0]] = book[1] return books def is_book(book_path): - # initial set of attributes to be populated + # book attributes to be populated book = { - 'duration': 0, - 'path': book_path, - 'files': dict(), - 'size_bytes': 0, - 'size_str': None, + 'author': None, + 'duration': 0, + 'duration_str': None, + 'files': dict(), + 'path': book_path, + 'size_bytes': 0, + 'size_str': None, + 'title': None } # hash of each file in directory w/ MP3 extension folder_hash = hashlib.md5() - is_book = False # a book_path is only a book if it contains at least one MP3 + is_book = False for f in os.listdir(book_path): file_path = os.path.join(book_path, f) - - # must be MP3 file, ignore anything else if not os.path.isfile(file_path) or not f.endswith('.mp3'): continue - - # skip if no duration attribute (required) tag = TinyTag.get(file_path) if not tag.duration: continue # previous conditions met, we're a book! :D is_book = True + print('[+] processing: %s' % book_path) - # update folder hash with MD5 of current file + # update collective hash of folder with MD5 of current file BLOCK = 1024 file_hash = hashlib.md5() with open(file_path, 'rb') as f: @@ -85,46 +83,42 @@ def is_book(book_path): folder_hash.update(data) file_hash.update(data) - # populate per-file and book attribute - mp3 = dict() - mp3['path'] = file_path + # per-MP3 atributes, some values are populated conditionally + mp3 = { + 'album': None, + 'author': None, + 'duration': tag.duration, + 'duration_str': None, + 'filename': os.path.split(file_path)[1], + 'path': file_path, + 'size_bytes': None, + 'title': None, + 'track': None + } + + mp3['album'] = validate(tag.album, os.path.split(book_path)[1]) + mp3['author'] = validate(tag.artist, 'Unknown') mp3['duration'] = tag.duration - mp3['filename'] = os.path.split(file_path)[1] - - # attribute values must be populated and non-space - if tag.title and not tag.title.isspace(): - mp3['title'] = tag.title - else: - mp3['title'] = os.path.split(file_path)[1] - - # we overwrite existing book title/author in assuming MP3 tags are - # consistent between MP3s, perhaps we shouldn't - if tag.album and not tag.album.isspace(): - mp3['album'] = tag.album - book['title'] = tag.album - else: - mp3['album'] = os.path.split(book_path)[1] - book['title'] = os.path.split(book_path)[1] - - if tag.artist and not tag.artist.isspace(): - mp3['author'] = tag.artist - book['author'] = tag.artist - else: - mp3['author'] = 'Unknown' - book['author'] = 'Unknown' - mp3['duration'] = tag.duration + # 1 day, 10:59:58 + duration_str = str(timedelta(seconds=mp3['duration'])) + mp3['duration_str'] = duration_str.split('.')[0] + + mp3['title'] = validate(tag.title, os.path.split(file_path)[1]) mp3['track'] = tag.track mp3['size_bytes'] = tag.filesize - duration_str = str(timedelta(seconds=mp3['duration'])) - mp3['duration_str'] = duration_str.split('.')[0] + # we assume author and album attributes are unchanged between MP3s + book['author'] = mp3['author'] + book['title'] = mp3['album'] - # increment book total size/duration, store MP3 + # increment book total size/duration book['duration'] += tag.duration - book['files'][file_hash.hexdigest()] = mp3 book['size_bytes'] += tag.filesize + # hexdigest: MP3 dict + book['files'][file_hash.hexdigest()] = mp3 + # if we're a book, store formatted book size and duration if is_book: folder_hash = folder_hash.hexdigest() @@ -164,6 +158,18 @@ def read_cache(json_path): return books +def validate(v, b): + ''' + Returns :v: if v and v.isspace(), otherwise b + + :v: preferred value + :b: backup value + ''' + if v and not v.isspace(): + return v + else: + return b + if __name__ == '__main__': ABS_PATH = os.path.dirname(os.path.abspath(__file__)) CACHE_PATH = os.path.join(ABS_PATH, 'cache') diff --git a/run.py b/run.py index d73545b..90e61c0 100755 --- a/run.py +++ b/run.py @@ -10,6 +10,7 @@ from collections import OrderedDict from operator import getitem from datetime import date, timedelta from flask import Flask, request, Response, render_template, send_file +from xml.dom import minidom abs_path = os.path.dirname(os.path.abspath(__file__)) app = Flask(__name__) @@ -17,20 +18,24 @@ app.config.from_pyfile(os.path.join(abs_path, 'app.cfg')) cache_path = os.path.join(abs_path, 'cache') json_path = os.path.join(cache_path, 'audiobooks.json') -# populate books object from JSON cache sorted by title -if os.path.exists(json_path): - try: - with open(json_path, 'r') as cache: - books = json.load(cache) - books = OrderedDict(sorted( - books.items(), - key=lambda x: x[1]['title'] - )) - - except Exception: - raise ValueError('error loading JSON cache') -else: - raise ValueError('cache not found, run rebuild.py') +def read_cache(json_path): + ''' + Populate books dict from cache at :json_path: + ''' + if os.path.exists(json_path): + try: + with open(json_path, 'r') as cache: + books = json.load(cache) + books = OrderedDict(sorted( + books.items(), + key=lambda x: x[1]['title'] + )) + except Exception: + raise ValueError('error loading JSON cache') + else: + raise ValueError('cache not found, run rebuild.py') + + return books def check_auth(username, password): ''' @@ -58,7 +63,8 @@ def escape(s): (0x7F, 0x84), (0x86, 0x9F), (0xFDD0, 0xFDDF), - (0xFFFE, 0xFFFF) + (0xFFFE, 0xFFFF), + (0xA9, 0xA9) ] if sys.maxunicode >= 0x10000: @@ -81,6 +87,14 @@ def escape(s): return s +def prettify(elem): + ''' + Make our RSS feed picturesque :) + ''' + xml_str = ET.tostring(elem, encoding='utf8', method='xml') + xml_dom = minidom.parseString(xml_str) + return xml_dom.toprettyxml(indent=' ') + @app.route('/') def list_books(): ''' @@ -91,6 +105,8 @@ def list_books(): Listing of audiobooks returned if no params provided ''' + books = read_cache(json_path) + a = request.args.get('a') # audiobook hash f = request.args.get('f') # file hash @@ -175,7 +191,7 @@ def list_books(): duration = ET.SubElement(item, 'itunes:duration') duration.text = str(books[a]['files'][f]['duration_str']) - guid = ET.SubElement(item, 'guid') + guid = ET.SubElement(item, 'guid', isPermaLink='false') guid.text = f # file hash # pubDate descending, day decremented w/ each iteration @@ -188,10 +204,7 @@ def list_books(): } ET.SubElement(item, 'enclosure', enc_attr) - return Response( - ET.tostring(rss, encoding='utf8', method='xml'), - mimetype='text/xml' - ) + return Response(prettify(rss), mimetype='text/xml') else: auth = request.authorization if not auth or not check_auth(auth.username, auth.password): -- cgit v1.2.3-54-g00ecf