summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Dockerfile3
-rw-r--r--requirements-dev.txt2
-rw-r--r--requirements.txt2
-rw-r--r--searx/engines/semantic_scholar.py42
-rw-r--r--searx/engines/seznam.py37
-rw-r--r--searx/settings.yml11
6 files changed, 67 insertions, 30 deletions
diff --git a/Dockerfile b/Dockerfile
index f251d06ea..3894aa968 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,8 +41,6 @@ RUN apk upgrade --no-cache \
openssl-dev \
tar \
git \
- protoc \
- protobuf-dev \
&& apk add --no-cache \
ca-certificates \
su-exec \
@@ -55,7 +53,6 @@ RUN apk upgrade --no-cache \
uwsgi \
uwsgi-python3 \
brotli \
- protobuf \
&& pip3 install --upgrade pip \
&& pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ef948c587..2ed51f067 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -14,4 +14,4 @@ sphinx-jinja==1.1.1
sphinx-tabs==2.1.0
sphinxcontrib-programoutput==0.16
sphinx-autobuild==2020.9.1
-linuxdoc==20210110
+linuxdoc==20210324
diff --git a/requirements.txt b/requirements.txt
index 39eff78ad..bfbcecc51 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ flask-babel==2.0.0
flask==1.1.2
idna==2.10
jinja2==2.11.3
-lxml==4.6.2
+lxml==4.6.3
pygments==2.8.0
python-dateutil==2.8.1
pyyaml==5.4.1
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
new file mode 100644
index 000000000..297d0cf71
--- /dev/null
+++ b/searx/engines/semantic_scholar.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Semantic Scholar (Science)
+"""
+
+from json import dumps, loads
+
+
+search_url = 'https://www.semanticscholar.org/api/1/search'
+
+
+def request(query, params):
+ params['url'] = search_url
+ params['method'] = 'POST'
+ params['headers']['content-type'] = 'application/json'
+ params['data'] = dumps({
+ "queryString": query,
+ "page": params['pageno'],
+ "pageSize": 10,
+ "sort": "relevance",
+ "useFallbackRankerService": False,
+ "useFallbackSearchCluster": False,
+ "getQuerySuggestions": False,
+ "authors": [],
+ "coAuthors": [],
+ "venues": [],
+ "performTitleMatch": True,
+ })
+ return params
+
+
+def response(resp):
+ res = loads(resp.text)
+ results = []
+ for result in res['results']:
+ results.append({
+ 'url': result['primaryPaperLink']['url'],
+ 'title': result['title']['text'],
+ 'content': result['paperAbstractTruncated']
+ })
+
+ return results
diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py
index 1df92a845..faceb0550 100644
--- a/searx/engines/seznam.py
+++ b/searx/engines/seznam.py
@@ -7,7 +7,12 @@ from urllib.parse import urlencode, urlparse
from lxml import html
from searx.poolrequests import get
from searx.exceptions import SearxEngineAccessDeniedException
-from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+from searx.utils import (
+ extract_text,
+ eval_xpath_list,
+ eval_xpath_getindex,
+ eval_xpath,
+)
# about
about = {
@@ -26,7 +31,10 @@ def request(query, params):
response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
dom = html.fromstring(response_index.text)
- url_params = {'q': query}
+ url_params = {
+ 'q': query,
+ 'oq': query,
+ }
for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
name = e.get('name')
value = e.get('value')
@@ -45,20 +53,15 @@ def response(resp):
results = []
dom = html.fromstring(resp.content.decode())
- for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'):
- dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None)
- if dot_data is None:
- title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
- results.append({
- 'url': title_element.get('href'),
- 'title': extract_text(title_element),
- 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)),
- })
- elif dot_data == '{"reporter_name":"hint/related/relates"}':
- suggestions_element = eval_xpath_getindex(result_element,
- './div/div[@data-dot="main-box"]', 0, default=None)
- if suggestions_element is not None:
- for suggestion in eval_xpath_list(suggestions_element, './/ul/li'):
- results.append({'suggestion': extract_text(suggestion)})
+ for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
+ result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None)
+ if result_data is None:
+ continue
+ title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
+ results.append({
+ 'url': title_element.get('href'),
+ 'title': extract_text(title_element),
+ 'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')),
+ })
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 1958210d7..84aca86f1 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -918,13 +918,8 @@ engines:
# disabled : True
- name : semantic scholar
- engine : xpath
- paging : True
- search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false
- results_xpath : //article
- url_xpath : .//div[@class="search-result-title"]/a/@href
- title_xpath : .//div[@class="search-result-title"]/a
- content_xpath : .//div[@class="search-result-abstract"]
+ engine : semantic_scholar
+ disabled : True
shortcut : se
categories : science
about:
@@ -933,7 +928,7 @@ engines:
official_api_documentation: https://api.semanticscholar.org/
use_official_api: false
require_api_key: false
- results: HTML
+ results: JSON
# Spotify needs API credentials
# - name : spotify