diff options
author | Alexandre Flament <alex@al-f.net> | 2020-12-13 17:11:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-13 17:11:05 +0100 |
commit | 36600118fb3e40ebd172245941306943581ef6f8 (patch) | |
tree | 074a4cc5f32d0d4e28ba833142b18eb246166d3b | |
parent | 3c4a9c118844b8d436b1bdb2ece3cd6671371838 (diff) | |
parent | 1c9e7cef50a2fe74760112764181da7d08e13adb (diff) | |
download | searxng-36600118fb3e40ebd172245941306943581ef6f8.tar.gz searxng-36600118fb3e40ebd172245941306943581ef6f8.zip |
Merge pull request #2372 from dalf/remove-broken-engines
[remove] remove searchcode_doc and twitter
-rw-r--r-- | searx/engines/searchcode_doc.py | 49 | ||||
-rw-r--r-- | searx/engines/twitter.py | 87 | ||||
-rw-r--r-- | searx/settings.yml | 8 |
3 files changed, 0 insertions, 144 deletions
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py deleted file mode 100644 index 878d2e792..000000000 --- a/searx/engines/searchcode_doc.py +++ /dev/null @@ -1,49 +0,0 @@ -""" - Searchcode (It) - - @website https://searchcode.com/ - @provide-api yes (https://searchcode.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from json import loads -from urllib.parse import urlencode - -# engine dependent config -categories = ['it'] -paging = True - -# search-url -url = 'https://searchcode.com/' -search_url = url + 'api/search_IV/?{query}&p={pageno}' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1) - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # parse results - for result in search_results.get('results', []): - href = result['url'] - title = "[{}] {} {}".format(result['type'], result['namespace'], result['name']) - - # append result - results.append({'url': href, - 'title': title, - 'content': result['description']}) - - # return results - return results diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py deleted file mode 100644 index 6d9bdbb5c..000000000 --- a/searx/engines/twitter.py +++ /dev/null @@ -1,87 +0,0 @@ -""" - Twitter (Social media) - - @website https://twitter.com/ - @provide-api yes (https://dev.twitter.com/docs/using-search) - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo publishedDate -""" - -from urllib.parse import urlencode, urljoin -from lxml import html -from datetime import datetime -from searx.utils import extract_text - -# engine dependent config -categories = ['social media'] -language_support = True - -# search-url -base_url = 'https://twitter.com/' -search_url = base_url + 'search?' - -# specific xpath variables -results_xpath = '//li[@data-item-type="tweet"]' -avatar_xpath = './/img[contains(@class, "avatar")]/@src' -link_xpath = './/small[@class="time"]//a' -title_xpath = './/span[contains(@class, "username")]' -content_xpath = './/p[contains(@class, "tweet-text")]' -timestamp_xpath = './/span[contains(@class,"_timestamp")]' - - -# do search-request -def request(query, params): - params['url'] = search_url + urlencode({'q': query}) - - # set language if specified - if params['language'] != 'all': - params['cookies']['lang'] = params['language'].split('-')[0] - else: - params['cookies']['lang'] = 'en' - - return params - - -# get response from search-request -def response(resp): - results = [] - - dom = html.fromstring(resp.text) - - # parse results - for tweet in dom.xpath(results_xpath): - try: - link = tweet.xpath(link_xpath)[0] - content = extract_text(tweet.xpath(content_xpath)[0]) - img_src = tweet.xpath(avatar_xpath)[0] - img_src = img_src.replace('_bigger', '_normal') - except Exception: - continue - - url = urljoin(base_url, link.attrib.get('href')) - title = extract_text(tweet.xpath(title_xpath)) - - pubdate = tweet.xpath(timestamp_xpath) - if len(pubdate) > 0: - timestamp = float(pubdate[0].attrib.get('data-time')) - publishedDate = datetime.fromtimestamp(timestamp, None) - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'img_src': img_src, - 'publishedDate': publishedDate}) - else: - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'img_src': img_src}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 3ba9b745f..e263e3ad4 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -694,10 +694,6 @@ engines: engine : stackoverflow shortcut : st - - name : searchcode doc - engine : searchcode_doc - shortcut : scd - - name : searchcode code engine : searchcode_code shortcut : scc @@ -764,10 +760,6 @@ engines: categories : onions shortcut : tch - - name : twitter - engine : twitter - shortcut : tw - # maybe in a fun category # - name : uncyclopedia # engine : mediawiki |