diff options
author | Florian Bruhin <me@the-compiler.org> | 2023-10-21 00:32:13 +0200 |
---|---|---|
committer | Florian Bruhin <me@the-compiler.org> | 2023-10-21 00:32:13 +0200 |
commit | 238a0fa2d15102342fd4a31c6c077f322bca35b2 (patch) | |
tree | d9fd15b53677e47d6ee407b981234bde6eeec73b | |
parent | 193b5a50a7c4448b6e700756f3854ac9332cbbc7 (diff) | |
download | qutebrowser-238a0fa2d15102342fd4a31c6c077f322bca35b2.tar.gz qutebrowser-238a0fa2d15102342fd4a31c6c077f322bca35b2.zip |
wip: Add initial pakjoy.py
-rw-r--r-- | qutebrowser/misc/binparsing.py | 44 | ||||
-rw-r--r-- | qutebrowser/misc/pakjoy.py | 158 |
2 files changed, 202 insertions, 0 deletions
diff --git a/qutebrowser/misc/binparsing.py b/qutebrowser/misc/binparsing.py new file mode 100644 index 000000000..7627ef6cf --- /dev/null +++ b/qutebrowser/misc/binparsing.py @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Utilities for parsing binary files. + +Used by elf.py as well as pakjoy.py. +""" + +import struct +from typing import Any, IO, Tuple + + +class ParseError(Exception): + + """Raised when the file can't be parsed.""" + + +def unpack(fmt: str, fobj: IO[bytes]) -> Tuple[Any, ...]: + """Unpack the given struct format from the given file.""" + size = struct.calcsize(fmt) + data = safe_read(fobj, size) + + try: + return struct.unpack(fmt, data) + except struct.error as e: + raise ParseError(e) + + +def safe_read(fobj: IO[bytes], size: int) -> bytes: + """Read from a file, handling possible exceptions.""" + try: + return fobj.read(size) + except (OSError, OverflowError) as e: + raise ParseError(e) + + +def safe_seek(fobj: IO[bytes], pos: int) -> None: + """Seek in a file, handling possible exceptions.""" + try: + fobj.seek(pos) + except (OSError, OverflowError) as e: + raise ParseError(e) + diff --git a/qutebrowser/misc/pakjoy.py b/qutebrowser/misc/pakjoy.py new file mode 100644 index 000000000..5fc1d2816 --- /dev/null +++ b/qutebrowser/misc/pakjoy.py @@ -0,0 +1,158 @@ + +# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) <mail@qutebrowser.org> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Chromium .pak repacking. + +This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157 +and the fact we can't just simply disable the hangouts extension: +https://bugreports.qt.io/browse/QTBUG-118452 + +It's yet another big hack. If you think this is bad, look at elf.py instead. + +The name of this file might or might not be inspired by a certain vegetable, +as well as the "joy" this bug has caused me. + +Useful references: + +- https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3) +- https://textslashplain.com/2022/05/03/chromium-internals-pak-files/ +- https://github.com/myfreeer/chrome-pak-customizer +- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py +- https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py + +This is a "best effort" parser. If it errors out, we don't apply the workaround +instead of crashing. +""" + +import dataclasses +from typing import ClassVar, IO, Optional, Dict, Tuple + +from qutebrowser.misc import binparsing + +HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome" +HANGOUTS_ID = 36197 # as found by toofar + +TARGET_URL = b"https://*.google.com/*" +REPLACEMENT_URL = b"https://*.qb.invalid/*" +assert len(TARGET_URL) == len(REPLACEMENT_URL) + + +@dataclasses.dataclass +class Pak5Header: + + """Chromium .pak header.""" + + encoding: int # uint32 + resource_count: int # uint16 + alias_count: int # uint16 + + _FORMAT: ClassVar[str] = '<IHH' + + @classmethod + def parse(cls, fobj: IO[bytes]) -> 'Pak5Header': + """Parse a PAK version 5 header from a file.""" + return cls(*binparsing.unpack(cls._FORMAT, fobj)) + + +@dataclasses.dataclass +class PakEntry: + + """Entry description in a .pak file""" + + resource_id: int # uint16 + file_offset: int # uint32 + size: int = 0 # not in file + + _FORMAT: ClassVar[str] = '<HI' + + @classmethod + def parse(cls, fobj: IO[bytes]) -> 'PakEntry': + """Parse a PAK entry from a file.""" + return cls(*binparsing.unpack(cls._FORMAT, fobj)) + + +class PakParser: + + def __init__(self, fobj: IO[bytes]) -> None: + """Parse the .pak file from the given file object.""" + version = binparsing.unpack("<I", fobj)[0] + if version != 5: + raise binparsing.ParseError(f"Unsupported .pak version {version}") + + self.fobj = fobj + entries = self._read_header() + self.manifest_entry, self.manifest = self._find_manifest(entries) + + def find_patch_offset(self) -> int: + try: + return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL) + except ValueError: + raise binparsing.ParseError("Couldn't find URL in manifest") + + def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]: + self.fobj.seek(entry.file_offset) + data = self.fobj.read(entry.size) + + if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"): + # not JSON + return None + + if HANGOUTS_MARKER not in data: + return None + + return data + + def _read_header(self) -> Dict[int, PakEntry]: + """Read the header and entry index from the .pak file.""" + entries = [] + + header = Pak5Header.parse(self.fobj) + for _ in range(header.resource_count + 1): # + 1 due to sentinel at end + entries.append(PakEntry.parse(self.fobj)) + + for entry, next_entry in zip(entries, entries[1:]): + if entry.resource_id == 0: + raise binparsing.ParseError("Unexpected sentinel entry") + entry.size = next_entry.file_offset - entry.file_offset + + if entries[-1].resource_id != 0: + raise binparsing.ParseError("Missing sentinel entry") + del entries[-1] + + return {entry.resource_id: entry for entry in entries} + + def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, str]: + if HANGOUTS_ID in entries: + suspected_entry = entries[HANGOUTS_ID] + manifest = self._maybe_get_hangouts_manifest(suspected_entry) + if manifest is not None: + return suspected_entry, manifest + + # didn't find it via the prevously known ID, let's search them all... + for entry in entries: + manifest = self._maybe_get_hangouts_manifest(entry) + if manifest is not None: + return entry, manifest + + raise binparsing.ParseError("Couldn't find hangouts manifest") + + +if __name__ == "__main__": + import shutil + shutil.copy("/usr/share/qt6/resources/qtwebengine_resources.pak", "/tmp/test.pak") + + with open("/tmp/test.pak", "r+b") as f: + parser = PakParser(f) + print(parser.manifest_entry) + print(parser.manifest) + offset = parser.find_patch_offset() + f.seek(offset) + f.write(REPLACEMENT_URL) + + with open("/tmp/test.pak", "rb") as f: + parser = PakParser(f) + + print(parser.manifest_entry) + print(parser.manifest) |