aboutsummaryrefslogtreecommitdiff
path: root/rebuild.py
blob: 48022721f8199e5ef7c9346e6706258c594e319d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3

import hashlib
import json
import math
import os
from datetime import timedelta
from flask import Flask
from lib.tinytag import TinyTag

def get_books(root_path):
    '''
    Discover audiobooks under :root_path: and populate books object
    '''
    if not os.path.exists(root_path):
        raise ValueError('root path does not exist: %s' % root_path)

    SIZES = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
    _books = dict()
    book_dirs = list()
    for root, dirs, _ in os.walk(root_path):
        for d in dirs:
            book_dirs.append(os.path.join(root, d))

    for book_path in book_dirs:
        print('[+] processing: %s' % book_path)

        # initial set of attributes to be populated
        book = {
            'duration': 0,
            'path': book_path,
            'files': dict(),
            'size_bytes': 0,
            'size_str': None,
        }

        # hash of each file in directory w/ MP3 extension
        folder_hash = hashlib.md5()
        is_book = False

        # a book_path is only a book if it contains at least one MP3
        for f in os.listdir(book_path):
            file_path = os.path.join(book_path, f)

            # must be MP3 file, ignore anything else
            if not os.path.isfile(file_path) or not f.endswith('.mp3'):
                continue

            # skip if no duration attribute (required)
            tag = TinyTag.get(file_path)
            if not tag.duration:
                continue

            # previous conditions met, we're a book! :D
            is_book = True

            # update folder hash with MD5 of current file
            BLOCK = 1024
            file_hash = hashlib.md5()
            with open(file_path, 'rb') as f:
                while True:
                    data = f.read(BLOCK)
                    if not data:
                        break
                    folder_hash.update(data)
                    file_hash.update(data)

            # populate per-file and book attribute
            mp3 = dict()
            mp3['path'] = file_path
            mp3['duration'] = tag.duration

            # attribute values must be populated and non-space
            if tag.title and not tag.title.isspace():
                mp3['title'] = tag.title
            else:
                mp3['title'] = os.path.split(file_path)[1]

            # we overwrite existing book title/author in assuming MP3 tags are
            # consistent between MP3s, perhaps we shouldn't
            if tag.album and not tag.album.isspace():
                mp3['album'] = tag.album
                book['title'] = tag.album
            else:
                mp3['album'] = os.path.split(book_path)[1]
                book['title'] = os.path.split(book_path)[1]

            if tag.artist and not tag.artist.isspace():
                mp3['author'] = tag.artist
                book['author'] = tag.artist
            else:
                mp3['author'] = 'Unknown'
                book['author'] = 'Unknown'

            mp3['duration'] = tag.duration
            mp3['track'] = tag.track
            mp3['size_bytes'] = tag.filesize

            duration_str = str(timedelta(seconds=mp3['duration']))
            mp3['duration_str'] = duration_str.split('.')[0]

            # increment book total size/duration, store MP3
            book['duration'] += tag.duration
            book['files'][file_hash.hexdigest()] = mp3
            book['size_bytes'] += tag.filesize

        # if we're a book, store formatted book size and duration
        if is_book:
            folder_hash = folder_hash.hexdigest()
            total_size = book['size_bytes']
            try:
                _i = int(math.floor(math.log(total_size, 1024)))
                _p = math.pow(1024, _i)
                _s = round(total_size / _p, 2)
            except:
                _i = 1
                _s = 0

            # e.g. 1.48 GB
            book['size_str'] = '%s %s' % (str(_s), SIZES[_i])

            # e.g. 2 days, 5:47:47
            duration_str = str(timedelta(seconds=book['duration']))
            book['duration_str'] = duration_str.split('.')[0]

            _books[folder_hash] = book

    return _books

def write_cache(books, json_path):
    '''
    Dump contents of :books: to :json_path:
    '''
    cache_path = os.path.dirname(json_path)
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)
    with open(json_path, 'w') as f:
        json.dump(books, f, indent=4)

if __name__ == '__main__':
    ABS_PATH = os.path.dirname(os.path.abspath(__file__))
    CACHE_PATH = os.path.join(ABS_PATH, 'cache')
    JSON_PATH = os.path.join(CACHE_PATH, 'audiobooks.json')

    # use Flask's config parser, configparser would be hacky
    APP = Flask(__name__)
    APP.config.from_pyfile(os.path.join(ABS_PATH, 'app.cfg'))

    BOOKS = get_books(APP.config['ROOT_PATH'])
    write_cache(BOOKS, JSON_PATH)