summaryrefslogtreecommitdiff
path: root/qutebrowser/browser/navigate.py
blob: 82bf57136209a5ce6a84a84c360f853f6c52ffcb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:

# Copyright 2016-2021 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser.  If not, see <https://www.gnu.org/licenses/>.

"""Implementation of :navigate."""

import re
import posixpath
from typing import Optional, Set

from PyQt5.QtCore import QUrl

from qutebrowser.browser import webelem
from qutebrowser.config import config
from qutebrowser.utils import objreg, urlutils, log, message, qtutils
from qutebrowser.mainwindow import mainwindow


class Error(Exception):

    """Raised when the navigation can't be done."""


# Order of the segments in a URL.
# Each list entry is a tuple of (path name (string), getter, setter).
# Note that the getters must not use FullyDecoded decoded mode to prevent loss
# of information. (host and path use FullyDecoded by default)
_URL_SEGMENTS = [
    ('host',
     lambda url: url.host(QUrl.FullyEncoded),
     lambda url, host: url.setHost(host, QUrl.StrictMode)),

    ('port',
     lambda url: str(url.port()) if url.port() > 0 else '',
     lambda url, x: url.setPort(int(x))),

    ('path',
     lambda url: url.path(QUrl.FullyEncoded),
     lambda url, path: url.setPath(path, QUrl.StrictMode)),

    ('query',
     lambda url: url.query(QUrl.FullyEncoded),
     lambda url, query: url.setQuery(query, QUrl.StrictMode)),

    ('anchor',
     lambda url: url.fragment(QUrl.FullyEncoded),
     lambda url, fragment: url.setFragment(fragment, QUrl.StrictMode)),
]


def _get_incdec_value(match, inc_or_dec, count):
    """Get an incremented/decremented URL based on a URL match."""
    pre, zeroes, number, post = match.groups()
    # This should always succeed because we match \d+
    val = int(number)
    if inc_or_dec == 'decrement':
        if val < count:
            raise Error("Can't decrement {} by {}!".format(val, count))
        val -= count
    elif inc_or_dec == 'increment':
        val += count
    else:
        raise ValueError("Invalid value {} for inc_or_dec!".format(inc_or_dec))
    if zeroes:
        if len(number) < len(str(val)):
            zeroes = zeroes[1:]
        elif len(number) > len(str(val)):
            zeroes += '0'

    return ''.join([pre, zeroes, str(val), post])


def incdec(url, count, inc_or_dec):
    """Helper method for :navigate when `where' is increment/decrement.

    Args:
        url: The current url.
        count: How much to increment or decrement by.
        inc_or_dec: Either 'increment' or 'decrement'.
        tab: Whether to open the link in a new tab.
        background: Open the link in a new background tab.
        window: Open the link in a new window.
    """
    urlutils.ensure_valid(url)
    segments: Optional[Set[str]] = (
        set(config.val.url.incdec_segments)
    )

    if segments is None:
        segments = {'path', 'query'}

    # Make a copy of the QUrl so we don't modify the original
    url = QUrl(url)
    # We're searching the last number so we walk the url segments backwards
    for segment, getter, setter in reversed(_URL_SEGMENTS):
        if segment not in segments:
            continue

        # Get the last number in a string not preceded by regex '%' or '%.'
        match = re.fullmatch(r'(.*\D|^)(?<!%)(?<!%.)(0*)(\d+)(.*)',
                             getter(url))
        if not match:
            continue

        setter(url, _get_incdec_value(match, inc_or_dec, count))
        qtutils.ensure_valid(url)

        return url

    raise Error("No number found in URL!")


def path_up(url, count):
    """Helper method for :navigate when `where' is up.

    Args:
        url: The current url.
        count: The number of levels to go up in the url.
    """
    urlutils.ensure_valid(url)
    url = url.adjusted(QUrl.RemoveFragment | QUrl.RemoveQuery)
    path = url.path(QUrl.FullyEncoded)
    if not path or path == '/':
        raise Error("Can't go up!")
    for _i in range(0, min(count, path.count('/'))):
        path = posixpath.join(path, posixpath.pardir)
    path = posixpath.normpath(path)
    url.setPath(path, QUrl.StrictMode)
    return url


def strip(url, count):
    """Strip fragment/query from a URL."""
    if count != 1:
        raise Error("Count is not supported when stripping URL components")
    urlutils.ensure_valid(url)
    return url.adjusted(QUrl.RemoveFragment | QUrl.RemoveQuery)


def _find_prevnext(prev, elems):
    """Find a prev/next element in the given list of elements."""
    # First check for <link rel="prev(ious)|next"> as well as
    # e.g. <a class="nav-(prev|next)"> (Hugo)
    rel_values = {'prev', 'previous'} if prev else {'next'}
    classes = {'nav-prev'} if prev else {'nav-next'}
    for e in elems:
        if e.tag_name() not in ['link', 'a']:
            continue
        if 'rel' in e and set(e['rel'].split(' ')) & rel_values:
            log.hints.debug("Found {!r} with rel={}".format(e, e['rel']))
            return e
        elif e.classes() & classes:
            log.hints.debug("Found {!r} with class={}".format(e, e.classes()))
            return e

    # Then check for regular links/buttons.
    elems = [e for e in elems if e.tag_name() != 'link']
    option = 'prev_regexes' if prev else 'next_regexes'
    if not elems:
        return None

    for regex in getattr(config.val.hints, option):
        log.hints.vdebug(  # type: ignore[attr-defined]
            "== Checking regex '{}'.".format(regex.pattern))
        for e in elems:
            text = str(e)
            if not text:
                continue
            if regex.search(text):
                log.hints.debug("Regex '{}' matched on '{}'.".format(
                    regex.pattern, text))
                return e
            else:
                log.hints.vdebug(  # type: ignore[attr-defined]
                    "No match on '{}'!".format(text))
    return None


def prevnext(*, browsertab, win_id, baseurl, prev=False,
             tab=False, background=False, window=False):
    """Click a "previous"/"next" element on the page.

    Args:
        browsertab: The WebKitTab/WebEngineTab of the page.
        baseurl: The base URL of the current tab.
        prev: True to open a "previous" link, False to open a "next" link.
        tab: True to open in a new tab, False for the current tab.
        background: True to open in a background tab.
        window: True to open in a new window, False for the current one.
    """
    def _prevnext_cb(elems):
        elem = _find_prevnext(prev, elems)
        word = 'prev' if prev else 'forward'

        if elem is None:
            message.error("No {} links found!".format(word))
            return
        url = elem.resolve_url(baseurl)
        if url is None:
            message.error("No {} links found!".format(word))
            return
        qtutils.ensure_valid(url)

        cur_tabbed_browser = objreg.get('tabbed-browser', scope='window',
                                        window=win_id)

        if window:
            new_window = mainwindow.MainWindow(
                private=cur_tabbed_browser.is_private)
            new_window.show()
            tabbed_browser = objreg.get('tabbed-browser', scope='window',
                                        window=new_window.win_id)
            tabbed_browser.tabopen(url, background=False)
        elif tab:
            cur_tabbed_browser.tabopen(url, background=background)
        else:
            browsertab.load_url(url)

    try:
        link_selector = webelem.css_selector('links', baseurl)
    except webelem.Error as e:
        raise Error(str(e))

    browsertab.elements.find_css(link_selector, callback=_prevnext_cb,
                                 error_cb=lambda err: message.error(str(err)))