1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Chromium .pak repacking.
This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157
and the fact we can't just simply disable the hangouts extension:
https://bugreports.qt.io/browse/QTBUG-118452
It's yet another big hack. If you think this is bad, look at elf.py instead.
The name of this file might or might not be inspired by a certain vegetable,
as well as the "joy" this bug has caused me.
Useful references:
- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3)
- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/
- https://github.com/myfreeer/chrome-pak-customizer
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py
- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py
This is a "best effort" parser. If it errors out, we don't apply the workaround
instead of crashing.
"""
import os
import shutil
import pathlib
import dataclasses
from typing import ClassVar, IO, Optional, Dict, Tuple
from qutebrowser.misc import binparsing
from qutebrowser.utils import qtutils, standarddir, version, utils
HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome"
HANGOUTS_ID = 36197 # as found by toofar
TARGET_URL = b"https://*.google.com/*"
REPLACEMENT_URL = b"https://*.qb.invalid/*"
assert len(TARGET_URL) == len(REPLACEMENT_URL)
@dataclasses.dataclass
class Pak5Header:
"""Chromium .pak header."""
encoding: int # uint32
resource_count: int # uint16
alias_count: int # uint16
_FORMAT: ClassVar[str] = '<IHH'
@classmethod
def parse(cls, fobj: IO[bytes]) -> 'Pak5Header':
"""Parse a PAK version 5 header from a file."""
return cls(*binparsing.unpack(cls._FORMAT, fobj))
@dataclasses.dataclass
class PakEntry:
"""Entry description in a .pak file"""
resource_id: int # uint16
file_offset: int # uint32
size: int = 0 # not in file
_FORMAT: ClassVar[str] = '<HI'
@classmethod
def parse(cls, fobj: IO[bytes]) -> 'PakEntry':
"""Parse a PAK entry from a file."""
return cls(*binparsing.unpack(cls._FORMAT, fobj))
class PakParser:
def __init__(self, fobj: IO[bytes]) -> None:
"""Parse the .pak file from the given file object."""
version = binparsing.unpack("<I", fobj)[0]
if version != 5:
raise binparsing.ParseError(f"Unsupported .pak version {version}")
self.fobj = fobj
entries = self._read_header()
self.manifest_entry, self.manifest = self._find_manifest(entries)
def find_patch_offset(self) -> int:
try:
return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL)
except ValueError:
raise binparsing.ParseError("Couldn't find URL in manifest")
def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]:
self.fobj.seek(entry.file_offset)
data = self.fobj.read(entry.size)
if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"):
# not JSON
return None
if HANGOUTS_MARKER not in data:
return None
return data
def _read_header(self) -> Dict[int, PakEntry]:
"""Read the header and entry index from the .pak file."""
entries = []
header = Pak5Header.parse(self.fobj)
for _ in range(header.resource_count + 1): # + 1 due to sentinel at end
entries.append(PakEntry.parse(self.fobj))
for entry, next_entry in zip(entries, entries[1:]):
if entry.resource_id == 0:
raise binparsing.ParseError("Unexpected sentinel entry")
entry.size = next_entry.file_offset - entry.file_offset
if entries[-1].resource_id != 0:
raise binparsing.ParseError("Missing sentinel entry")
del entries[-1]
return {entry.resource_id: entry for entry in entries}
def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, str]:
if HANGOUTS_ID in entries:
suspected_entry = entries[HANGOUTS_ID]
manifest = self._maybe_get_hangouts_manifest(suspected_entry)
if manifest is not None:
return suspected_entry, manifest
# didn't find it via the previously known ID, let's search them all...
for id_ in entries:
manifest = self._maybe_get_hangouts_manifest(entries[id_])
if manifest is not None:
return entries[id_], manifest
raise binparsing.ParseError("Couldn't find hangouts manifest")
def patch():
versions = version.qtwebengine_versions(avoid_init=True)
if versions.webengine != utils.VersionNumber(6, 6):
return
resources_path = qtutils.library_path(qtutils.LibraryPath.data) / "resources"
work_dir = pathlib.Path(standarddir.cache()) / "webengine_resources_pak_quirk"
patched_file = work_dir / "qtwebengine_resources.pak"
print(f"{work_dir=} {work_dir.exists()=}")
print(f"{resources_path=}")
if work_dir.exists():
shutil.rmtree(work_dir)
shutil.copytree(resources_path, work_dir)
with open(patched_file, "r+b") as f:
parser = PakParser(f)
offset = parser.find_patch_offset()
f.seek(offset)
f.write(REPLACEMENT_URL)
os.environ["QTWEBENGINE_RESOURCES_PATH"] = str(work_dir)
if __name__ == "__main__":
import shutil
shutil.copy("/usr/share/qt6/resources/qtwebengine_resources.pak", "/tmp/test.pak")
with open("/tmp/test.pak", "r+b") as f:
parser = PakParser(f)
print(parser.manifest_entry)
print(parser.manifest)
offset = parser.find_patch_offset()
f.seek(offset)
f.write(REPLACEMENT_URL)
with open("/tmp/test.pak", "rb") as f:
parser = PakParser(f)
print(parser.manifest_entry)
print(parser.manifest)
|