summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2023-10-21 00:32:13 +0200
committerFlorian Bruhin <me@the-compiler.org>2023-10-21 00:32:13 +0200
commit238a0fa2d15102342fd4a31c6c077f322bca35b2 (patch)
treed9fd15b53677e47d6ee407b981234bde6eeec73b
parent193b5a50a7c4448b6e700756f3854ac9332cbbc7 (diff)
downloadqutebrowser-238a0fa2d15102342fd4a31c6c077f322bca35b2.tar.gz
qutebrowser-238a0fa2d15102342fd4a31c6c077f322bca35b2.zip
wip: Add initial pakjoy.py
-rw-r--r--qutebrowser/misc/binparsing.py44
-rw-r--r--qutebrowser/misc/pakjoy.py158
2 files changed, 202 insertions, 0 deletions
diff --git a/qutebrowser/misc/binparsing.py b/qutebrowser/misc/binparsing.py
new file mode 100644
index 000000000..7627ef6cf
--- /dev/null
+++ b/qutebrowser/misc/binparsing.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Utilities for parsing binary files.
+
+Used by elf.py as well as pakjoy.py.
+"""
+
+import struct
+from typing import Any, IO, Tuple
+
+
+class ParseError(Exception):
+
+ """Raised when the file can't be parsed."""
+
+
+def unpack(fmt: str, fobj: IO[bytes]) -> Tuple[Any, ...]:
+ """Unpack the given struct format from the given file."""
+ size = struct.calcsize(fmt)
+ data = safe_read(fobj, size)
+
+ try:
+ return struct.unpack(fmt, data)
+ except struct.error as e:
+ raise ParseError(e)
+
+
+def safe_read(fobj: IO[bytes], size: int) -> bytes:
+ """Read from a file, handling possible exceptions."""
+ try:
+ return fobj.read(size)
+ except (OSError, OverflowError) as e:
+ raise ParseError(e)
+
+
+def safe_seek(fobj: IO[bytes], pos: int) -> None:
+ """Seek in a file, handling possible exceptions."""
+ try:
+ fobj.seek(pos)
+ except (OSError, OverflowError) as e:
+ raise ParseError(e)
+
diff --git a/qutebrowser/misc/pakjoy.py b/qutebrowser/misc/pakjoy.py
new file mode 100644
index 000000000..5fc1d2816
--- /dev/null
+++ b/qutebrowser/misc/pakjoy.py
@@ -0,0 +1,158 @@
+
+# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Chromium .pak repacking.
+
+This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157
+and the fact we can't just simply disable the hangouts extension:
+https://bugreports.qt.io/browse/QTBUG-118452
+
+It's yet another big hack. If you think this is bad, look at elf.py instead.
+
+The name of this file might or might not be inspired by a certain vegetable,
+as well as the "joy" this bug has caused me.
+
+Useful references:
+
+- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3)
+- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/
+- https://github.com/myfreeer/chrome-pak-customizer
+- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py
+- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py
+
+This is a "best effort" parser. If it errors out, we don't apply the workaround
+instead of crashing.
+"""
+
+import dataclasses
+from typing import ClassVar, IO, Optional, Dict, Tuple
+
+from qutebrowser.misc import binparsing
+
+HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome"
+HANGOUTS_ID = 36197 # as found by toofar
+
+TARGET_URL = b"https://*.google.com/*"
+REPLACEMENT_URL = b"https://*.qb.invalid/*"
+assert len(TARGET_URL) == len(REPLACEMENT_URL)
+
+
+@dataclasses.dataclass
+class Pak5Header:
+
+ """Chromium .pak header."""
+
+ encoding: int # uint32
+ resource_count: int # uint16
+ alias_count: int # uint16
+
+ _FORMAT: ClassVar[str] = '<IHH'
+
+ @classmethod
+ def parse(cls, fobj: IO[bytes]) -> 'Pak5Header':
+ """Parse a PAK version 5 header from a file."""
+ return cls(*binparsing.unpack(cls._FORMAT, fobj))
+
+
+@dataclasses.dataclass
+class PakEntry:
+
+ """Entry description in a .pak file"""
+
+ resource_id: int # uint16
+ file_offset: int # uint32
+ size: int = 0 # not in file
+
+ _FORMAT: ClassVar[str] = '<HI'
+
+ @classmethod
+ def parse(cls, fobj: IO[bytes]) -> 'PakEntry':
+ """Parse a PAK entry from a file."""
+ return cls(*binparsing.unpack(cls._FORMAT, fobj))
+
+
+class PakParser:
+
+ def __init__(self, fobj: IO[bytes]) -> None:
+ """Parse the .pak file from the given file object."""
+ version = binparsing.unpack("<I", fobj)[0]
+ if version != 5:
+ raise binparsing.ParseError(f"Unsupported .pak version {version}")
+
+ self.fobj = fobj
+ entries = self._read_header()
+ self.manifest_entry, self.manifest = self._find_manifest(entries)
+
+ def find_patch_offset(self) -> int:
+ try:
+ return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL)
+ except ValueError:
+ raise binparsing.ParseError("Couldn't find URL in manifest")
+
+ def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]:
+ self.fobj.seek(entry.file_offset)
+ data = self.fobj.read(entry.size)
+
+ if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"):
+ # not JSON
+ return None
+
+ if HANGOUTS_MARKER not in data:
+ return None
+
+ return data
+
+ def _read_header(self) -> Dict[int, PakEntry]:
+ """Read the header and entry index from the .pak file."""
+ entries = []
+
+ header = Pak5Header.parse(self.fobj)
+ for _ in range(header.resource_count + 1): # + 1 due to sentinel at end
+ entries.append(PakEntry.parse(self.fobj))
+
+ for entry, next_entry in zip(entries, entries[1:]):
+ if entry.resource_id == 0:
+ raise binparsing.ParseError("Unexpected sentinel entry")
+ entry.size = next_entry.file_offset - entry.file_offset
+
+ if entries[-1].resource_id != 0:
+ raise binparsing.ParseError("Missing sentinel entry")
+ del entries[-1]
+
+ return {entry.resource_id: entry for entry in entries}
+
+ def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, str]:
+ if HANGOUTS_ID in entries:
+ suspected_entry = entries[HANGOUTS_ID]
+ manifest = self._maybe_get_hangouts_manifest(suspected_entry)
+ if manifest is not None:
+ return suspected_entry, manifest
+
+ # didn't find it via the prevously known ID, let's search them all...
+ for entry in entries:
+ manifest = self._maybe_get_hangouts_manifest(entry)
+ if manifest is not None:
+ return entry, manifest
+
+ raise binparsing.ParseError("Couldn't find hangouts manifest")
+
+
+if __name__ == "__main__":
+ import shutil
+ shutil.copy("/usr/share/qt6/resources/qtwebengine_resources.pak", "/tmp/test.pak")
+
+ with open("/tmp/test.pak", "r+b") as f:
+ parser = PakParser(f)
+ print(parser.manifest_entry)
+ print(parser.manifest)
+ offset = parser.find_patch_offset()
+ f.seek(offset)
+ f.write(REPLACEMENT_URL)
+
+ with open("/tmp/test.pak", "rb") as f:
+ parser = PakParser(f)
+
+ print(parser.manifest_entry)
+ print(parser.manifest)