diff options
author | Alexandre Flament <alex@al-f.net> | 2020-11-26 15:49:33 +0100 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2020-12-03 10:22:48 +0100 |
commit | ad72803ed98760b2a95690dea3b95eacc670427c (patch) | |
tree | 7e8515b6c4752baedb5e80588c9041bc72cdeb04 /searx/engines/arxiv.py | |
parent | de887c6347db4c810f7d1e11c77633340d4e2fbd (diff) | |
download | searxng-ad72803ed98760b2a95690dea3b95eacc670427c.tar.gz searxng-ad72803ed98760b2a95690dea3b95eacc670427c.zip |
[mod] xpath, 1337x, acgsou, apkmirror, archlinux, arxiv: use eval_xpath_* functions
Diffstat (limited to 'searx/engines/arxiv.py')
-rw-r--r-- | searx/engines/arxiv.py | 20 |
1 files changed, 9 insertions, 11 deletions
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 6e231c382..c702c5987 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -13,6 +13,7 @@ from lxml import html from datetime import datetime +from searx.utils import eval_xpath_list, eval_xpath_getindex categories = ['science'] @@ -42,29 +43,26 @@ def response(resp): results = [] dom = html.fromstring(resp.content) - search_results = dom.xpath('//entry') - for entry in search_results: - title = entry.xpath('.//title')[0].text + for entry in eval_xpath_list(dom, '//entry'): + title = eval_xpath_getindex(entry, './/title', 0).text - url = entry.xpath('.//id')[0].text + url = eval_xpath_getindex(entry, './/id', 0).text content_string = '{doi_content}{abstract_content}' - abstract = entry.xpath('.//summary')[0].text + abstract = eval_xpath_getindex(entry, './/summary', 0).text # If a doi is available, add it to the snipppet - try: - doi_content = entry.xpath('.//link[@title="doi"]')[0].text - content = content_string.format(doi_content=doi_content, abstract_content=abstract) - except: - content = content_string.format(doi_content="", abstract_content=abstract) + doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None) + doi_content = doi_element.text if doi_element is not None else '' + content = content_string.format(doi_content=doi_content, abstract_content=abstract) if len(content) > 300: content = content[0:300] + "..." # TODO: center snippet on query term - publishedDate = datetime.strptime(entry.xpath('.//published')[0].text, '%Y-%m-%dT%H:%M:%SZ') + publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') res_dict = {'url': url, 'title': title, |