summaryrefslogtreecommitdiff
path: root/qutebrowser/misc/pakjoy.py
blob: 6765c868796effdb09b2ab8c04644507cc8117e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Chromium .pak repacking.

This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157
and the fact we can't just simply disable the hangouts extension:
https://bugreports.qt.io/browse/QTBUG-118452

It's yet another big hack. If you think this is bad, look at elf.py instead.

The name of this file might or might not be inspired by a certain vegetable,
as well as the "joy" this bug has caused me.

Useful references:

- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3)
- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/
- https://github.com/myfreeer/chrome-pak-customizer
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py

This is a "best effort" parser. If it errors out, we don't apply the workaround
instead of crashing.
"""

import os
import shutil
import pathlib
import dataclasses
from typing import ClassVar, IO, Optional, Dict, Tuple

from qutebrowser.misc import binparsing, objects
from qutebrowser.utils import qtutils, standarddir, version, utils, log

HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome"
HANGOUTS_ID = 36197  # as found by toofar
PAK_VERSION = 5
RESOURCES_ENV_VAR = "QTWEBENGINE_RESOURCES_PATH"
CACHE_DIR_NAME = "webengine_resources_pak_quirk"
PAK_FILENAME = "qtwebengine_resources.pak"

TARGET_URL = b"https://*.google.com/*"
REPLACEMENT_URL = b"https://qute.invalid/*"
assert len(TARGET_URL) == len(REPLACEMENT_URL)


@dataclasses.dataclass
class PakHeader:

    """Chromium .pak header (version 5)."""

    encoding: int  # uint32
    resource_count: int  # uint16
    _alias_count: int  # uint16

    _FORMAT: ClassVar[str] = "<IHH"

    @classmethod
    def parse(cls, fobj: IO[bytes]) -> "PakHeader":
        """Parse a PAK version 5 header from a file."""
        return cls(*binparsing.unpack(cls._FORMAT, fobj))


@dataclasses.dataclass
class PakEntry:

    """Entry description in a .pak file."""

    resource_id: int  # uint16
    file_offset: int  # uint32
    size: int = 0  # not in file

    _FORMAT: ClassVar[str] = "<HI"

    @classmethod
    def parse(cls, fobj: IO[bytes]) -> "PakEntry":
        """Parse a PAK entry from a file."""
        return cls(*binparsing.unpack(cls._FORMAT, fobj))


class PakParser:
    """Parse webengine pak and find patch location to disable Google Meet extension."""

    def __init__(self, fobj: IO[bytes]) -> None:
        """Parse the .pak file from the given file object."""
        pak_version = binparsing.unpack("<I", fobj)[0]
        if pak_version != PAK_VERSION:
            raise binparsing.ParseError(f"Unsupported .pak version {pak_version}")

        self.fobj = fobj
        entries = self._read_header()
        self.manifest_entry, self.manifest = self._find_manifest(entries)

    def find_patch_offset(self) -> int:
        """Return byte offset of TARGET_URL into the pak file."""
        try:
            return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL)
        except ValueError:
            raise binparsing.ParseError("Couldn't find URL in manifest")

    def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]:
        self.fobj.seek(entry.file_offset)
        data = self.fobj.read(entry.size)

        if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"):
            # not JSON
            return None

        if HANGOUTS_MARKER not in data:
            return None

        return data

    def _read_header(self) -> Dict[int, PakEntry]:
        """Read the header and entry index from the .pak file."""
        entries = []

        header = PakHeader.parse(self.fobj)
        for _ in range(header.resource_count + 1):  # + 1 due to sentinel at end
            entries.append(PakEntry.parse(self.fobj))

        for entry, next_entry in zip(entries, entries[1:]):
            if entry.resource_id == 0:
                raise binparsing.ParseError("Unexpected sentinel entry")
            entry.size = next_entry.file_offset - entry.file_offset

        if entries[-1].resource_id != 0:
            raise binparsing.ParseError("Missing sentinel entry")
        del entries[-1]

        return {entry.resource_id: entry for entry in entries}

    def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, bytes]:
        to_check = list(entries.values())
        if HANGOUTS_ID in entries:
            # Most likely candidate, based on previous known ID
            to_check.insert(0, entries[HANGOUTS_ID])

        for entry in to_check:
            manifest = self._maybe_get_hangouts_manifest(entry)
            if manifest is not None:
                return entry, manifest

        raise binparsing.ParseError("Couldn't find hangouts manifest")


def _find_webengine_resources() -> pathlib.Path:
    """Find the QtWebEngine resources dir.

    Mirrors logic from QtWebEngine:
    https://github.com/qt/qtwebengine/blob/v6.6.0/src/core/web_engine_library_info.cpp#L293-L341
    """
    if RESOURCES_ENV_VAR in os.environ:
        return pathlib.Path(os.environ[RESOURCES_ENV_VAR])

    candidates = []
    qt_data_path = qtutils.library_path(qtutils.LibraryPath.data)
    if utils.is_mac:  # pragma: no cover
        # I'm not sure how to arrive at this path without hardcoding it
        # ourselves. importlib_resources("PyQt6.Qt6") can serve as a
        # replacement for the qtutils bit but it doesn't seem to help find the
        # actuall Resources folder.
        candidates.append(
            qt_data_path / "lib" / "QtWebEngineCore.framework" / "Resources"
        )

    candidates += [
        qt_data_path / "resources",
        qt_data_path,
        pathlib.Path(objects.qapp.applicationDirPath()),
        pathlib.Path.home() / f".{objects.qapp.applicationName()}",
    ]

    for candidate in candidates:
        if (candidate / PAK_FILENAME).exists():
            return candidate

    raise binparsing.ParseError("Couldn't find webengine resources dir")


def copy_webengine_resources() -> Optional[pathlib.Path]:
    """Copy qtwebengine resources to local dir for patching."""
    resources_dir = _find_webengine_resources()
    work_dir = pathlib.Path(standarddir.cache()) / CACHE_DIR_NAME

    if work_dir.exists():
        log.misc.debug(f"Removing existing {work_dir}")
        shutil.rmtree(work_dir)

    versions = version.qtwebengine_versions(avoid_init=True)
    if versions.webengine != utils.VersionNumber(6, 6):
        # No patching needed
        return None

    log.misc.debug(
        "Copying webengine resources for quirk patching: "
        f"{resources_dir} -> {work_dir}"
    )

    shutil.copytree(resources_dir, work_dir)

    os.environ[RESOURCES_ENV_VAR] = str(work_dir)

    return work_dir


def _patch(file_to_patch: pathlib.Path) -> None:
    """Apply any patches to the given pak file."""
    if not file_to_patch.exists():
        log.misc.error(
            "Resource pak doesn't exist at expected location! "
            f"Not applying quirks. Expected location: {file_to_patch}"
        )
        return

    with open(file_to_patch, "r+b") as f:
        try:
            parser = PakParser(f)
            log.misc.debug(f"Patching pak entry: {parser.manifest_entry}")
            offset = parser.find_patch_offset()
            binparsing.safe_seek(f, offset)
            f.write(REPLACEMENT_URL)
        except binparsing.ParseError:
            log.misc.exception("Failed to apply quirk to resources pak.")


def patch_webengine() -> None:
    """Apply any patches to webengine resource pak files."""
    try:
        # Still calling this on Qt != 6.6 so that the directory is cleaned up
        # when not needed anymore.
        webengine_resources_path = copy_webengine_resources()
    except OSError:
        log.misc.exception("Failed to copy webengine resources, not applying quirk")
        return

    if webengine_resources_path is None:
        return

    _patch(webengine_resources_path / PAK_FILENAME)