# SPDX-FileCopyrightText: Florian Bruhin (The-Compiler) # # SPDX-License-Identifier: GPL-3.0-or-later """Chromium .pak repacking. This entire file is a great WORKAROUND for https://bugreports.qt.io/browse/QTBUG-118157 and the fact we can't just simply disable the hangouts extension: https://bugreports.qt.io/browse/QTBUG-118452 It's yet another big hack. If you think this is bad, look at elf.py instead. The name of this file might or might not be inspired by a certain vegetable, as well as the "joy" this bug has caused me. Useful references: - https://sweetscape.com/010editor/repository/files/PAK.bt (010 editor <3) - https://textslashplain.com/2022/05/03/chromium-internals-pak-files/ - https://github.com/myfreeer/chrome-pak-customizer - https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/pak_util.py - https://source.chromium.org/chromium/chromium/src/+/main:tools/grit/grit/format/data_pack.py This is a "best effort" parser. If it errors out, we don't apply the workaround instead of crashing. """ import os import shutil import pathlib import dataclasses from typing import ClassVar, IO, Optional, Dict, Tuple from qutebrowser.misc import binparsing, objects from qutebrowser.utils import qtutils, standarddir, version, utils, log HANGOUTS_MARKER = b"// Extension ID: nkeimhogjdpnpccoofpliimaahmaaome" HANGOUTS_ID = 36197 # as found by toofar PAK_VERSION = 5 RESOURCES_ENV_VAR = "QTWEBENGINE_RESOURCES_PATH" CACHE_DIR_NAME = "webengine_resources_pak_quirk" PAK_FILENAME = "qtwebengine_resources.pak" TARGET_URL = b"https://*.google.com/*" REPLACEMENT_URL = b"https://qute.invalid/*" assert len(TARGET_URL) == len(REPLACEMENT_URL) @dataclasses.dataclass class PakHeader: """Chromium .pak header (version 5).""" encoding: int # uint32 resource_count: int # uint16 _alias_count: int # uint16 _FORMAT: ClassVar[str] = " "PakHeader": """Parse a PAK version 5 header from a file.""" return cls(*binparsing.unpack(cls._FORMAT, fobj)) @dataclasses.dataclass class PakEntry: """Entry description in a .pak file.""" resource_id: int # uint16 file_offset: int # uint32 size: int = 0 # not in file _FORMAT: ClassVar[str] = " "PakEntry": """Parse a PAK entry from a file.""" return cls(*binparsing.unpack(cls._FORMAT, fobj)) class PakParser: """Parse webengine pak and find patch location to disable Google Meet extension.""" def __init__(self, fobj: IO[bytes]) -> None: """Parse the .pak file from the given file object.""" pak_version = binparsing.unpack(" int: """Return byte offset of TARGET_URL into the pak file.""" try: return self.manifest_entry.file_offset + self.manifest.index(TARGET_URL) except ValueError: raise binparsing.ParseError("Couldn't find URL in manifest") def _maybe_get_hangouts_manifest(self, entry: PakEntry) -> Optional[bytes]: self.fobj.seek(entry.file_offset) data = self.fobj.read(entry.size) if not data.startswith(b"{") or not data.rstrip(b"\n").endswith(b"}"): # not JSON return None if HANGOUTS_MARKER not in data: return None return data def _read_header(self) -> Dict[int, PakEntry]: """Read the header and entry index from the .pak file.""" entries = [] header = PakHeader.parse(self.fobj) for _ in range(header.resource_count + 1): # + 1 due to sentinel at end entries.append(PakEntry.parse(self.fobj)) for entry, next_entry in zip(entries, entries[1:]): if entry.resource_id == 0: raise binparsing.ParseError("Unexpected sentinel entry") entry.size = next_entry.file_offset - entry.file_offset if entries[-1].resource_id != 0: raise binparsing.ParseError("Missing sentinel entry") del entries[-1] return {entry.resource_id: entry for entry in entries} def _find_manifest(self, entries: Dict[int, PakEntry]) -> Tuple[PakEntry, bytes]: to_check = list(entries.values()) if HANGOUTS_ID in entries: # Most likely candidate, based on previous known ID to_check.insert(0, entries[HANGOUTS_ID]) for entry in to_check: manifest = self._maybe_get_hangouts_manifest(entry) if manifest is not None: return entry, manifest raise binparsing.ParseError("Couldn't find hangouts manifest") def _find_webengine_resources() -> pathlib.Path: """Find the QtWebEngine resources dir. Mirrors logic from QtWebEngine: https://github.com/qt/qtwebengine/blob/v6.6.0/src/core/web_engine_library_info.cpp#L293-L341 """ if RESOURCES_ENV_VAR in os.environ: return pathlib.Path(os.environ[RESOURCES_ENV_VAR]) candidates = [] qt_data_path = qtutils.library_path(qtutils.LibraryPath.data) if utils.is_mac: # pragma: no cover # I'm not sure how to arrive at this path without hardcoding it # ourselves. importlib_resources("PyQt6.Qt6") can serve as a # replacement for the qtutils bit but it doesn't seem to help find the # actuall Resources folder. candidates.append( qt_data_path / "lib" / "QtWebEngineCore.framework" / "Resources" ) candidates += [ qt_data_path / "resources", qt_data_path, pathlib.Path(objects.qapp.applicationDirPath()), pathlib.Path.home() / f".{objects.qapp.applicationName()}", ] for candidate in candidates: if (candidate / PAK_FILENAME).exists(): return candidate raise binparsing.ParseError("Couldn't find webengine resources dir") def copy_webengine_resources() -> Optional[pathlib.Path]: """Copy qtwebengine resources to local dir for patching.""" resources_dir = _find_webengine_resources() work_dir = pathlib.Path(standarddir.cache()) / CACHE_DIR_NAME if work_dir.exists(): log.misc.debug(f"Removing existing {work_dir}") shutil.rmtree(work_dir) versions = version.qtwebengine_versions(avoid_init=True) if versions.webengine != utils.VersionNumber(6, 6): # No patching needed return None log.misc.debug( "Copying webengine resources for quirk patching: " f"{resources_dir} -> {work_dir}" ) shutil.copytree(resources_dir, work_dir) os.environ[RESOURCES_ENV_VAR] = str(work_dir) return work_dir def _patch(file_to_patch: pathlib.Path) -> None: """Apply any patches to the given pak file.""" if not file_to_patch.exists(): log.misc.error( "Resource pak doesn't exist at expected location! " f"Not applying quirks. Expected location: {file_to_patch}" ) return with open(file_to_patch, "r+b") as f: try: parser = PakParser(f) log.misc.debug(f"Patching pak entry: {parser.manifest_entry}") offset = parser.find_patch_offset() binparsing.safe_seek(f, offset) f.write(REPLACEMENT_URL) except binparsing.ParseError: log.misc.exception("Failed to apply quirk to resources pak.") def patch_webengine() -> None: """Apply any patches to webengine resource pak files.""" try: # Still calling this on Qt != 6.6 so that the directory is cleaned up # when not needed anymore. webengine_resources_path = copy_webengine_resources() except OSError: log.misc.exception("Failed to copy webengine resources, not applying quirk") return if webengine_resources_path is None: return _patch(webengine_resources_path / PAK_FILENAME)