From 2df5addacd82ac7463ff6d3ec6754b21dab71737 Mon Sep 17 00:00:00 2001 From: Jordan Date: Sun, 5 Apr 2020 20:20:41 -0700 Subject: initial commit --- rebuild.py | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100755 rebuild.py (limited to 'rebuild.py') diff --git a/rebuild.py b/rebuild.py new file mode 100755 index 0000000..55b0d50 --- /dev/null +++ b/rebuild.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +import hashlib +import json +import math +import os +from datetime import timedelta +from flask import Flask +from lib.tinytag import TinyTag + +def get_books(root_path): + ''' + Discover audiobooks under :root_path: and populate books object + ''' + if not os.path.exists(root_path): + raise ValueError('root path does not exist: %s' % root_path) + + SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB') + _books = dict() + book_dirs = list() + for root, dirs, _ in os.walk(root_path): + for d in dirs: + book_dirs.append(os.path.join(root, d)) + + for book_path in book_dirs: + print('[+] processing: %s' % book_path) + + # initial set of attributes to be populated + book = { + 'duration': 0, + 'path': book_path, + 'files': dict(), + 'size_bytes': 0, + 'size_str': None, + } + + # hash of each file in directory w/ MP3 extension + folder_hash = hashlib.md5() + is_book = False + + # a book_path is only a book if it contains at least one MP3 + for f in os.listdir(book_path): + file_path = os.path.join(book_path, f) + if not os.path.isfile(file_path) or not f.endswith('.mp3'): + continue + + # update folder hash with MD5 of current file + BLOCK = 1024 + file_hash = hashlib.md5() + with open(file_path, 'rb') as f: + while True: + data = f.read(BLOCK) + if not data: + break + folder_hash.update(data) + file_hash.update(data) + + # skip if no duration attribute (required) + tag = TinyTag.get(file_path) + if not tag.duration: + continue + is_book = True + + # populate file-specific attributes + attr = dict() + attr['path'] = file_path + attr['duration'] = tag.duration + if tag.title: + attr['title'] = tag.title + else: + attr['title'] = file_path.split('/')[-1] + if tag.album: + attr['album'] = tag.album + book['title'] = tag.album + else: + attr['album'] = book_path.split('/')[-1] + book['title'] = book_path.split('/')[-1] + if tag.artist: + attr['author'] = tag.artist + book['author'] = tag.artist + else: + attr['author'] = 'Unknown' + book['author'] = 'Unknown' + + attr['duration'] = tag.duration + attr['track'] = tag.track + attr['size_bytes'] = tag.filesize + + duration_str = str(timedelta(seconds=attr['duration'])) + attr['duration_str'] = duration_str.split('.')[0] + + book['duration'] += tag.duration + book['files'][file_hash.hexdigest()] = attr + book['size_bytes'] += tag.filesize + + if is_book: + folder_hash = folder_hash.hexdigest() + + total_size = book['size_bytes'] + try: + _i = int(math.floor(math.log(total_size, 1024))) + _p = math.pow(1024, _i) + _s = round(total_size / _p, 2) + except: + _i = 1 + _s = 0 + + # e.g. 1.48 GB + book['size_str'] = '%s %s' % (str(_s), SIZES[_i]) + + # e.g. 2 days, 5:47:47 + duration_str = str(timedelta(seconds=book['duration'])) + book['duration_str'] = duration_str.split('.')[0] + + _books[folder_hash] = book + + return _books + +def write_cache(books, json_path): + ''' + Dump contents of :books: to :json_path: + ''' + cache_path = os.path.dirname(json_path) + if not os.path.exists(cache_path): + os.mkdir(cache_path) + with open(json_path, 'w') as f: + json.dump(books, f, indent=4) + +if __name__ == '__main__': + ABS_PATH = os.path.dirname(os.path.abspath(__file__)) + CACHE_PATH = os.path.join(ABS_PATH, 'cache') + JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json') + + # use Flask's config parser, configparser would be hacky + APP = Flask(__name__) + APP.config.from_pyfile(os.path.join(ABS_PATH, 'app.cfg')) + + BOOKS = get_books(APP.config['ROOT_PATH']) + write_cache(BOOKS, JSON_PATH) -- cgit v1.2.3-54-g00ecf