rebuild.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

#!/usr/bin/env python3

import hashlib
import json
import math
import os
from datetime import timedelta
from flask import Flask
from lib.tinytag import TinyTag

def get_books(root_path):
    '''
    Discover audiobooks under :root_path: and populate books object
    '''
    if not os.path.exists(root_path):
        raise ValueError('root path does not exist: %s' % root_path)

    SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
    _books = dict()
    book_dirs = list()
    for root, dirs, _ in os.walk(root_path):
        for d in dirs:
            book_dirs.append(os.path.join(root, d))

    for book_path in book_dirs:
        print('[+] processing: %s' % book_path)

        # initial set of attributes to be populated
        book = {
            'duration': 0,
            'path': book_path,
            'files': dict(),
            'size_bytes': 0,
            'size_str': None,
        }

        # hash of each file in directory w/ MP3 extension
        folder_hash = hashlib.md5()
        is_book = False

        # a book_path is only a book if it contains at least one MP3
        for f in os.listdir(book_path):
            file_path = os.path.join(book_path, f)
            if not os.path.isfile(file_path) or not f.endswith('.mp3'):
                continue

            # update folder hash with MD5 of current file
            BLOCK = 1024
            file_hash = hashlib.md5()
            with open(file_path, 'rb') as f:
                while True:
                    data = f.read(BLOCK)
                    if not data:
                        break
                    folder_hash.update(data)
                    file_hash.update(data)

            # skip if no duration attribute (required)
            tag = TinyTag.get(file_path)
            if not tag.duration:
                continue
            is_book = True

            # populate file-specific attributes
            attr = dict()
            attr['path'] = file_path
            attr['duration'] = tag.duration
            if tag.title:
                attr['title'] = tag.title
            else:
                attr['title'] = file_path.split('/')[-1]
            if tag.album:
                attr['album'] = tag.album
                book['title'] = tag.album
            else:
                attr['album'] = book_path.split('/')[-1]
                book['title'] = book_path.split('/')[-1]
            if tag.artist:
                attr['author'] = tag.artist
                book['author'] = tag.artist
            else:
                attr['author'] = 'Unknown'
                book['author'] = 'Unknown'

            attr['duration'] = tag.duration
            attr['track'] = tag.track
            attr['size_bytes'] = tag.filesize

            duration_str = str(timedelta(seconds=attr['duration']))
            attr['duration_str'] = duration_str.split('.')[0]

            book['duration'] += tag.duration
            book['files'][file_hash.hexdigest()] = attr
            book['size_bytes'] += tag.filesize

        if is_book:
            folder_hash = folder_hash.hexdigest()

            total_size = book['size_bytes']
            try:
                _i = int(math.floor(math.log(total_size, 1024)))
                _p = math.pow(1024, _i)
                _s = round(total_size / _p, 2)
            except:
                _i = 1
                _s = 0

            # e.g. 1.48 GB
            book['size_str'] = '%s %s' % (str(_s), SIZES[_i])

            # e.g. 2 days, 5:47:47
            duration_str = str(timedelta(seconds=book['duration']))
            book['duration_str'] = duration_str.split('.')[0]

            _books[folder_hash] = book

    return _books

def write_cache(books, json_path):
    '''
    Dump contents of :books: to :json_path:
    '''
    cache_path = os.path.dirname(json_path)
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)
    with open(json_path, 'w') as f:
        json.dump(books, f, indent=4)

if __name__ == '__main__':
    ABS_PATH = os.path.dirname(os.path.abspath(__file__))
    CACHE_PATH = os.path.join(ABS_PATH, 'cache')
    JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json')

    # use Flask's config parser, configparser would be hacky
    APP = Flask(__name__)
    APP.config.from_pyfile(os.path.join(ABS_PATH, 'app.cfg'))

    BOOKS = get_books(APP.config['ROOT_PATH'])
    write_cache(BOOKS, JSON_PATH)