From 6c937f6150ec49dbebacf379393b8e6b62ae9eca Mon Sep 17 00:00:00 2001 From: Eric Ericson Date: Wed, 8 Nov 2023 23:30:27 +0100 Subject: qutebrowser/browser/pdfjs.py: sophisticate check for pdf.js somewhat upstream shifted to mjs modules as of https://github.com/mozilla/pdf.js/commit/927e50f5d48a67e76f2a51c112ea5a98867822fe --- qutebrowser/browser/pdfjs.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 467f3c605..e4ea3519b 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -5,6 +5,7 @@ """pdf.js integration for qutebrowser.""" +import html import os from qutebrowser.qt.core import QUrl, QUrlQuery @@ -49,24 +50,25 @@ def generate_pdfjs_page(filename, url): filename: The filename of the PDF to open. url: The URL being opened. """ - if not is_available(): + pdfjs_name = _get_pdfjs_basename() + if pdfjs_name is None or not is_available(): pdfjs_dir = os.path.join(standarddir.data(), 'pdfjs') return jinja.render('no_pdfjs.html', url=url.toDisplayString(), title="PDF.js not found", pdfjs_dir=pdfjs_dir) - html = get_pdfjs_res('web/viewer.html').decode('utf-8') + viewhtml = get_pdfjs_res('web/viewer.html').decode('utf-8') script = _generate_pdfjs_script(filename) - html = html.replace('', + viewhtml = viewhtml.replace('', ''.format(script)) # WORKAROUND for the fact that PDF.js tries to use the Fetch API even with # qute:// URLs. - pdfjs_script = '' - html = html.replace(pdfjs_script, + pdfjs_script = f'' + viewhtml = viewhtml.replace(pdfjs_script, '\n' + pdfjs_script) - return html + return viewhtml def _generate_pdfjs_script(filename): @@ -202,10 +204,23 @@ def _read_from_system(system_path, names): return (None, None) +def _get_pdfjs_basename(): + """Checks for pdf.js main module availability and returns the basename if available.""" + exts = ['pdf.js', 'pdf.mjs'] + for ext in exts: + try: + get_pdfjs_res('build/' + ext) + except PDFJSNotFound: + pass + else: + return ext + return None + + def is_available(): - """Return true if a pdfjs installation is available.""" + """Return true if certain parts of a pdfjs installation are available.""" try: - get_pdfjs_res('build/pdf.js') + # get_pdfjs_res('build/pdf.mjs') get_pdfjs_res('web/viewer.html') except PDFJSNotFound: return False -- cgit v1.2.3-54-g00ecf From 0144ae3576aaecde295d40e22d636f04240d6761 Mon Sep 17 00:00:00 2001 From: toofar Date: Mon, 27 Nov 2023 08:07:40 +1300 Subject: fix pdf.js detection in :version Now that pdf.js could be shipped with either js or mjs file extensions we shouldn't hardcode the filename. Call the function for detecting the filename instead. And make it public. --- qutebrowser/browser/pdfjs.py | 4 ++-- qutebrowser/utils/version.py | 4 +++- tests/unit/utils/test_version.py | 11 ++++++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index e4ea3519b..427ef4537 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -50,7 +50,7 @@ def generate_pdfjs_page(filename, url): filename: The filename of the PDF to open. url: The URL being opened. """ - pdfjs_name = _get_pdfjs_basename() + pdfjs_name = get_pdfjs_basename() if pdfjs_name is None or not is_available(): pdfjs_dir = os.path.join(standarddir.data(), 'pdfjs') return jinja.render('no_pdfjs.html', @@ -204,7 +204,7 @@ def _read_from_system(system_path, names): return (None, None) -def _get_pdfjs_basename(): +def get_pdfjs_basename(): """Checks for pdf.js main module availability and returns the basename if available.""" exts = ['pdf.js', 'pdf.mjs'] for ext in exts: diff --git a/qutebrowser/utils/version.py b/qutebrowser/utils/version.py index 75df73ffa..ac0c8220f 100644 --- a/qutebrowser/utils/version.py +++ b/qutebrowser/utils/version.py @@ -480,7 +480,9 @@ def _pdfjs_version() -> str: A string with the version number. """ try: - pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path('build/pdf.js') + pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path( + str(pathlib.Path("build") / pdfjs.get_pdfjs_basename()) + ) except pdfjs.PDFJSNotFound: return 'no' else: diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index d902f8c53..dcb625b1e 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -849,11 +849,13 @@ class TestPDFJSVersion: """Tests for _pdfjs_version.""" def test_not_found(self, mocker): + mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', side_effect=pdfjs.PDFJSNotFound('/build/pdf.js')) assert version._pdfjs_version() == 'no' - def test_unknown(self, monkeypatch): + def test_unknown(self, monkeypatch, mocker): + mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') monkeypatch.setattr( 'qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', lambda path: (b'foobar', None)) @@ -864,7 +866,7 @@ class TestPDFJSVersion: 'var pdfjsVersion', # v2.0.943 'const pdfjsVersion', # v2.5.207 ]) - def test_known(self, monkeypatch, varname): + def test_known(self, monkeypatch, mocker, varname): pdfjs_code = textwrap.dedent(""" // Initializing PDFJS global object (if still undefined) if (typeof PDFJS === 'undefined') { @@ -878,6 +880,7 @@ class TestPDFJSVersion: // Use strict in our context only - users might not want it 'use strict'; """.replace('VARNAME', varname)).strip().encode('utf-8') + mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') monkeypatch.setattr( 'qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', lambda path: (pdfjs_code, '/foo/bar/pdf.js')) @@ -886,7 +889,9 @@ class TestPDFJSVersion: def test_real_file(self, data_tmpdir): """Test against the real file if pdfjs was found.""" try: - pdfjs.get_pdfjs_res_and_path('build/pdf.js') + pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path( + str(pathlib.Path("build") / pdfjs.get_pdfjs_basename()) + ) except pdfjs.PDFJSNotFound: pytest.skip("No pdfjs found") ver = version._pdfjs_version() -- cgit v1.2.3-54-g00ecf From a39dd64c5ebece50b0ae3574a5bb44885b99cefd Mon Sep 17 00:00:00 2001 From: toofar Date: Mon, 27 Nov 2023 08:16:34 +1300 Subject: Move pdf.js fetch() disabler earlier in the file. We are setting window.Response to undefined to cause certain versions of pdf.js to detect that fetch() isn't available and to fall back to XHRs. In more recent versions of pdf.js the HTML string we were matching against has changed, it has a 'type="module"' attribute now, so we need to change the pattern we match against. I don't think it matters where we put this mocking though, so I've just put it after '', which should hopefully be less fragile than matching against a '' --- qutebrowser/browser/pdfjs.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 427ef4537..1a13f5bda 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -63,11 +63,11 @@ def generate_pdfjs_page(filename, url): viewhtml = viewhtml.replace('', ''.format(script)) # WORKAROUND for the fact that PDF.js tries to use the Fetch API even with - # qute:// URLs. - pdfjs_script = f'' - viewhtml = viewhtml.replace(pdfjs_script, - '\n' + - pdfjs_script) + # qute:// URLs, this is probably no longer needed in PDFjs 4+. See #4235 + viewhtml = viewhtml.replace( + '', + '\n\n' + ) return viewhtml -- cgit v1.2.3-54-g00ecf From c02a15b6c7525226a8cea274b0e111d4a6518d54 Mon Sep 17 00:00:00 2001 From: toofar Date: Mon, 27 Nov 2023 08:20:28 +1300 Subject: Cleanup pdfjs_name Now we aren't using it for the patching we don't need it in this function. Move the `get_pdfjs_basename()` call into `is_available()` too since having to call two methods to check everything is there sounds like a pain. --- qutebrowser/browser/pdfjs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 1a13f5bda..4c344d727 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -50,8 +50,7 @@ def generate_pdfjs_page(filename, url): filename: The filename of the PDF to open. url: The URL being opened. """ - pdfjs_name = get_pdfjs_basename() - if pdfjs_name is None or not is_available(): + if not is_available(): pdfjs_dir = os.path.join(standarddir.data(), 'pdfjs') return jinja.render('no_pdfjs.html', url=url.toDisplayString(), @@ -220,7 +219,7 @@ def get_pdfjs_basename(): def is_available(): """Return true if certain parts of a pdfjs installation are available.""" try: - # get_pdfjs_res('build/pdf.mjs') + get_pdfjs_basename() get_pdfjs_res('web/viewer.html') except PDFJSNotFound: return False -- cgit v1.2.3-54-g00ecf From 0c3c1310748d85f26f8b0ba9ed8b0c0c464c02bd Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Fri, 1 Dec 2023 22:57:31 +0100 Subject: Undo html change html.escape is not needed after latest toofar changes --- qutebrowser/browser/pdfjs.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 4c344d727..53baea93d 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -5,7 +5,6 @@ """pdf.js integration for qutebrowser.""" -import html import os from qutebrowser.qt.core import QUrl, QUrlQuery @@ -56,18 +55,18 @@ def generate_pdfjs_page(filename, url): url=url.toDisplayString(), title="PDF.js not found", pdfjs_dir=pdfjs_dir) - viewhtml = get_pdfjs_res('web/viewer.html').decode('utf-8') + html = get_pdfjs_res('web/viewer.html').decode('utf-8') script = _generate_pdfjs_script(filename) - viewhtml = viewhtml.replace('', + html = html.replace('', ''.format(script)) # WORKAROUND for the fact that PDF.js tries to use the Fetch API even with # qute:// URLs, this is probably no longer needed in PDFjs 4+. See #4235 - viewhtml = viewhtml.replace( + html = html.replace( '', '\n\n' ) - return viewhtml + return html def _generate_pdfjs_script(filename): -- cgit v1.2.3-54-g00ecf From eabbdb8ea31167e53407c21830ed7f13ea7ce805 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Fri, 1 Dec 2023 23:14:49 +0100 Subject: pdfjs: Fix :version crash with no pdf.js pdfjs.get_pdf_basename() returned None, causing in a TypeError. Instead of throwing mocker.patch at it, fix the underlying issue. Given we made the same mistake in three places: - :version - test_real_file for PDF.js - is_available() in pdfjs.py (calls the function but doesn't use the result, so is a nop now, even if PDF.js wasn't found) ...evidently we need to change the API so it still raises an exception if no PDF.js is available. Amends 0144ae3576aaecde295d40e22d636f04240d6761. --- qutebrowser/browser/pdfjs.py | 3 ++- tests/unit/utils/test_version.py | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 53baea93d..ad8f54b0d 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -212,7 +212,8 @@ def get_pdfjs_basename(): pass else: return ext - return None + + raise PDFJSNotFound(" or ".join(f"'build/{ext}'" for ext in exts)) def is_available(): diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index dcb625b1e..1a337d7c5 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -849,13 +849,11 @@ class TestPDFJSVersion: """Tests for _pdfjs_version.""" def test_not_found(self, mocker): - mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', side_effect=pdfjs.PDFJSNotFound('/build/pdf.js')) assert version._pdfjs_version() == 'no' - def test_unknown(self, monkeypatch, mocker): - mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') + def test_unknown(self, monkeypatch): monkeypatch.setattr( 'qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', lambda path: (b'foobar', None)) @@ -880,7 +878,6 @@ class TestPDFJSVersion: // Use strict in our context only - users might not want it 'use strict'; """.replace('VARNAME', varname)).strip().encode('utf-8') - mocker.patch('qutebrowser.utils.version.pdfjs.get_pdfjs_basename') monkeypatch.setattr( 'qutebrowser.utils.version.pdfjs.get_pdfjs_res_and_path', lambda path: (pdfjs_code, '/foo/bar/pdf.js')) -- cgit v1.2.3-54-g00ecf From a2c74ec1bf2a0379d37716adc899ff71a65c1bf1 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Fri, 1 Dec 2023 23:32:07 +0100 Subject: Remove unused vars --- tests/unit/utils/test_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index 1a337d7c5..a7b957f59 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -886,7 +886,7 @@ class TestPDFJSVersion: def test_real_file(self, data_tmpdir): """Test against the real file if pdfjs was found.""" try: - pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path( + pdfjs.get_pdfjs_res_and_path( str(pathlib.Path("build") / pdfjs.get_pdfjs_basename()) ) except pdfjs.PDFJSNotFound: -- cgit v1.2.3-54-g00ecf From b5f11558a6d244bbbc8dd6fb5d819a51648d758e Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Fri, 1 Dec 2023 23:35:46 +0100 Subject: pdfjs: Simplify logic Makes things easier if we get build/ right with the return value --- qutebrowser/browser/pdfjs.py | 16 ++++++++-------- qutebrowser/utils/version.py | 4 +--- tests/unit/utils/test_version.py | 6 +----- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index ad8f54b0d..841285deb 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -202,24 +202,24 @@ def _read_from_system(system_path, names): return (None, None) -def get_pdfjs_basename(): - """Checks for pdf.js main module availability and returns the basename if available.""" - exts = ['pdf.js', 'pdf.mjs'] - for ext in exts: +def get_pdfjs_js_path(): + """Checks for pdf.js main module availability and returns the path if available.""" + paths = ['build/pdf.js', 'build/pdf.mjs'] + for path in paths: try: - get_pdfjs_res('build/' + ext) + get_pdfjs_res(path) except PDFJSNotFound: pass else: - return ext + return path - raise PDFJSNotFound(" or ".join(f"'build/{ext}'" for ext in exts)) + raise PDFJSNotFound(" or ".join(paths)) def is_available(): """Return true if certain parts of a pdfjs installation are available.""" try: - get_pdfjs_basename() + get_pdfjs_js_path() get_pdfjs_res('web/viewer.html') except PDFJSNotFound: return False diff --git a/qutebrowser/utils/version.py b/qutebrowser/utils/version.py index ac0c8220f..a20774617 100644 --- a/qutebrowser/utils/version.py +++ b/qutebrowser/utils/version.py @@ -480,9 +480,7 @@ def _pdfjs_version() -> str: A string with the version number. """ try: - pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path( - str(pathlib.Path("build") / pdfjs.get_pdfjs_basename()) - ) + pdfjs_file, file_path = pdfjs.get_pdfjs_res_and_path(pdfjs.get_pdfjs_js_path()) except pdfjs.PDFJSNotFound: return 'no' else: diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index a7b957f59..f8a1925e3 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -885,11 +885,7 @@ class TestPDFJSVersion: def test_real_file(self, data_tmpdir): """Test against the real file if pdfjs was found.""" - try: - pdfjs.get_pdfjs_res_and_path( - str(pathlib.Path("build") / pdfjs.get_pdfjs_basename()) - ) - except pdfjs.PDFJSNotFound: + if not pdfjs.is_available(): pytest.skip("No pdfjs found") ver = version._pdfjs_version() assert ver.split()[0] not in ['no', 'unknown'], ver -- cgit v1.2.3-54-g00ecf From d77053b86c3527a58af0a37059d34894dbba4cad Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Fri, 1 Dec 2023 23:47:42 +0100 Subject: Remove unused fixture --- tests/unit/utils/test_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index f8a1925e3..38134b40e 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -864,7 +864,7 @@ class TestPDFJSVersion: 'var pdfjsVersion', # v2.0.943 'const pdfjsVersion', # v2.5.207 ]) - def test_known(self, monkeypatch, mocker, varname): + def test_known(self, monkeypatch, varname): pdfjs_code = textwrap.dedent(""" // Initializing PDFJS global object (if still undefined) if (typeof PDFJS === 'undefined') { -- cgit v1.2.3-54-g00ecf From 7d5acb697086c903d485679f3dc754777c8e6ef5 Mon Sep 17 00:00:00 2001 From: toofar Date: Sat, 2 Dec 2023 12:04:06 +1300 Subject: Add test for get_pdfjs_js_path() --- tests/unit/browser/test_pdfjs.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/unit/browser/test_pdfjs.py b/tests/unit/browser/test_pdfjs.py index fe2fea9a0..7bd9588b4 100644 --- a/tests/unit/browser/test_pdfjs.py +++ b/tests/unit/browser/test_pdfjs.py @@ -193,6 +193,32 @@ def test_is_available(available, mocker): assert pdfjs.is_available() == available +@pytest.mark.parametrize('found_file', [ + "build/pdf.js", + "build/pdf.mjs", + None, +]) +def test_get_pdfjs_js_path(found_file, mocker): + + def side_effect(requested): + if found_file and requested.endswith(found_file): + return + + raise pdfjs.PDFJSNotFound(requested) + + mock = mocker.patch.object(pdfjs, 'get_pdfjs_res', autospec=True) + mock.side_effect = side_effect + + if found_file is None: + with pytest.raises( + pdfjs.PDFJSNotFound, + match="Path 'build/pdf.js or build/pdf.mjs' not found" + ): + pdfjs.get_pdfjs_js_path() + else: + assert pdfjs.get_pdfjs_js_path() == found_file + + @pytest.mark.parametrize('mimetype, url, enabled, expected', [ # PDF files ('application/pdf', 'http://www.example.com', True, True), -- cgit v1.2.3-54-g00ecf From 1d29cf641ba25556ca148dc0a20abe0b90469874 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Mon, 4 Dec 2023 10:46:30 +0100 Subject: Simplify get_pdfjs test Given that the two branches share rather little, it seems simpler to separate the two tests. Also use monkeypatch, since we don't use any of unittest.mock's complexity --- tests/unit/browser/test_pdfjs.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/tests/unit/browser/test_pdfjs.py b/tests/unit/browser/test_pdfjs.py index 7bd9588b4..cb5c26229 100644 --- a/tests/unit/browser/test_pdfjs.py +++ b/tests/unit/browser/test_pdfjs.py @@ -196,27 +196,28 @@ def test_is_available(available, mocker): @pytest.mark.parametrize('found_file', [ "build/pdf.js", "build/pdf.mjs", - None, ]) -def test_get_pdfjs_js_path(found_file, mocker): - - def side_effect(requested): - if found_file and requested.endswith(found_file): +def test_get_pdfjs_js_path(found_file: str, monkeypatch: pytest.MonkeyPatch): + def fake_pdfjs_res(requested): + if requested.endswith(found_file): return + raise pdfjs.PDFJSNotFound(requested) + + monkeypatch.setattr(pdfjs, 'get_pdfjs_res', fake_pdfjs_res) + assert pdfjs.get_pdfjs_js_path() == found_file + +def test_get_pdfjs_js_path_none(monkeypatch: pytest.MonkeyPatch): + def fake_pdfjs_res(requested): raise pdfjs.PDFJSNotFound(requested) - mock = mocker.patch.object(pdfjs, 'get_pdfjs_res', autospec=True) - mock.side_effect = side_effect - - if found_file is None: - with pytest.raises( - pdfjs.PDFJSNotFound, - match="Path 'build/pdf.js or build/pdf.mjs' not found" - ): - pdfjs.get_pdfjs_js_path() - else: - assert pdfjs.get_pdfjs_js_path() == found_file + monkeypatch.setattr(pdfjs, 'get_pdfjs_res', fake_pdfjs_res) + + with pytest.raises( + pdfjs.PDFJSNotFound, + match="Path 'build/pdf.js or build/pdf.mjs' not found" + ): + pdfjs.get_pdfjs_js_path() @pytest.mark.parametrize('mimetype, url, enabled, expected', [ -- cgit v1.2.3-54-g00ecf