summaryrefslogtreecommitdiff
path: root/qutebrowser/misc/pakjoy.py
blob: c2647c47807c4d01d57a458594a59b02c20ea905 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Chromium .pak repacking.

This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157
and the fact we can't just simply disable the hangouts extension:
https://bugreports.qt.io/browse/QTBUG-118452

It's yet another big hack. If you think this is bad, look at elf.py instead.

The name of this file might or might not be inspired by a certain vegetable,
as well as the "joy" this bug has caused me.

Useful references:

- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3)
- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/
- https://github.com/myfreeer/chrome-pak-customizer
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py

This is a "best effort" parser. If it errors out, we don't apply the workaround
instead of crashing.
"""

import os
import shutil
import pathlib
import dataclasses
from typing import ClassVar, IO, Optional, Dict, Tuple

from qutebrowser.misc import binparsing
from qutebrowser.utils import qtutils, standarddir, version, utils

HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome"
HANGOUTS_ID = 36197  # as found by toofar

TARGET_URL = b"https://*.google.com/*"
REPLACEMENT_URL = b"https://*.qb.invalid/*"
assert len(TARGET_URL) == len(REPLACEMENT_URL)


@dataclasses.dataclass
class Pak5Header:

    """Chromium .pak header."""

    encoding: int  # uint32
    resource_count: int  # uint16
    alias_count: int  # uint16

    _FORMAT: ClassVar[str] = '<IHH'

    @classmethod
    def parse(cls, fobj: IO[bytes]) -> 'Pak5Header':
        """Parse a PAK version 5 header from a file."""
        return cls(*binparsing.unpack(cls._FORMAT, fobj))


@dataclasses.dataclass
class PakEntry:

    """Entry description in a .pak file"""

    resource_id: int  # uint16
    file_offset: int  # uint32
    size: int = 0  # not in file

    _FORMAT: ClassVar[str] = '<HI'

    @classmethod
    def parse(cls, fobj: IO[bytes]) -> 'PakEntry':
        """Parse a PAK entry from a file."""
        return cls(*binparsing.unpack(cls._FORMAT, fobj))


class PakParser:

    def __init__(self, fobj: IO[bytes]) -> None:
        """Parse the .pak file from the given file object."""
        version = binparsing.unpack("<I", fobj)[0]
        if version != 5:
            raise binparsing.ParseError(f"Unsupported .pak version {version}")

        self.fobj = fobj
        entries = self._read_header()
        self.manifest_entry, self.manifest = self._find_manifest(entries)

    def find_patch_offset(self) -> int:
        try:
            return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL)
        except ValueError:
            raise binparsing.ParseError("Couldn't find URL in manifest")

    def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]:
        self.fobj.seek(entry.file_offset)
        data = self.fobj.read(entry.size)

        if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"):
            # not JSON
            return None

        if HANGOUTS_MARKER not in data:
            return None

        return data

    def _read_header(self) -> Dict[int, PakEntry]:
        """Read the header and entry index from the .pak file."""
        entries = []

        header = Pak5Header.parse(self.fobj)
        for _ in range(header.resource_count + 1):  # + 1 due to sentinel at end
            entries.append(PakEntry.parse(self.fobj))

        for entry, next_entry in zip(entries, entries[1:]):
            if entry.resource_id == 0:
                raise binparsing.ParseError("Unexpected sentinel entry")
            entry.size = next_entry.file_offset - entry.file_offset

        if entries[-1].resource_id != 0:
            raise binparsing.ParseError("Missing sentinel entry")
        del entries[-1]

        return {entry.resource_id: entry for entry in entries}

    def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, str]:
        if HANGOUTS_ID in entries:
            suspected_entry = entries[HANGOUTS_ID]
            manifest = self._maybe_get_hangouts_manifest(suspected_entry)
            if manifest is not None:
                return suspected_entry, manifest

        # didn't find it via the previously known ID, let's search them all...
        for id_ in entries:
            manifest = self._maybe_get_hangouts_manifest(entries[id_])
            if manifest is not None:
                return entries[id_], manifest

        raise binparsing.ParseError("Couldn't find hangouts manifest")


def patch():
    versions = version.qtwebengine_versions(avoid_init=True)
    if versions.webengine != utils.VersionNumber(6, 6):
        return

    resources_path = qtutils.library_path(qtutils.LibraryPath.data) / "resources"
    work_dir = pathlib.Path(standarddir.cache()) / "webengine_resources_pak_quirk"
    patched_file = work_dir / "qtwebengine_resources.pak"

    print(f"{work_dir=} {work_dir.exists()=}")
    print(f"{resources_path=}")
    if work_dir.exists():
        shutil.rmtree(work_dir)

    shutil.copytree(resources_path, work_dir)

    with open(patched_file, "r+b") as f:
        parser = PakParser(f)
        offset = parser.find_patch_offset()
        f.seek(offset)
        f.write(REPLACEMENT_URL)

    os.environ["QTWEBENGINE_RESOURCES_PATH"] = str(work_dir)


if __name__ == "__main__":
    import shutil
    shutil.copy("/usr/share/qt6/resources/qtwebengine_resources.pak", "/tmp/test.pak")

    with open("/tmp/test.pak", "r+b") as f:
        parser = PakParser(f)
        print(parser.manifest_entry)
        print(parser.manifest)
        offset = parser.find_patch_offset()
        f.seek(offset)
        f.write(REPLACEMENT_URL)

    with open("/tmp/test.pak", "rb") as f:
        parser = PakParser(f)

    print(parser.manifest_entry)
    print(parser.manifest)