diff options
author | Adam Tauber <asciimoo@gmail.com> | 2017-01-18 23:50:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-01-18 23:50:11 +0100 |
commit | 57149661e4f90e5ad3a9a4c36bb5f31a1c7ec6aa (patch) | |
tree | cf41a6559ef7327315101641591411b70b2731ee /searx | |
parent | b1d49bacb0d6135a7c0a5a32a82681b12b1762cd (diff) | |
parent | 7814d4b7969c089ea033147ed725c90ac3ef1974 (diff) | |
download | searxng-57149661e4f90e5ad3a9a4c36bb5f31a1c7ec6aa.tar.gz searxng-57149661e4f90e5ad3a9a4c36bb5f31a1c7ec6aa.zip |
Merge pull request #830 from davidar/se
Semantic Scholar
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/xpath.py | 8 | ||||
-rw-r--r-- | searx/settings.yml | 11 |
2 files changed, 15 insertions, 4 deletions
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 18943bba4..0d39b28a8 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done def extract_text(xpath_results): if type(xpath_results) == list: # it's list of result : concat everything using recursive call - if not xpath_results: - raise Exception('Empty url resultset') result = '' for e in xpath_results: result = result + extract_text(e) @@ -48,6 +46,8 @@ def extract_text(xpath_results): def extract_url(xpath_results, search_url): + if xpath_results == []: + raise Exception('Empty url resultset') url = extract_text(xpath_results) if url.startswith('//'): @@ -103,8 +103,8 @@ def response(resp): if results_xpath: for result in dom.xpath(results_xpath): url = extract_url(result.xpath(url_xpath), search_url) - title = extract_text(result.xpath(title_xpath)[0]) - content = extract_text(result.xpath(content_xpath)[0]) + title = extract_text(result.xpath(title_xpath)) + content = extract_text(result.xpath(content_xpath)) results.append({'url': url, 'title': title, 'content': content}) else: for url, title, content in zip( diff --git a/searx/settings.yml b/searx/settings.yml index 549b2b31d..8515326ec 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -462,6 +462,17 @@ engines: # - ... # disabled : True + - name : semantic scholar + engine : xpath + paging : True + search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false + results_xpath : //article + url_xpath : .//div[@class="search-result-title"]/a/@href + title_xpath : .//div[@class="search-result-title"]/a + content_xpath : .//div[@class="search-result-abstract"] + shortcut : se + categories : science + - name : spotify engine : spotify shortcut : stf |