diff options
author | Alexandre Flament <alex@al-f.net> | 2020-12-06 10:18:49 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-06 10:18:49 +0100 |
commit | 925bb561a2797b6d3cc5be748c2361721b8e1f08 (patch) | |
tree | c3837258d08b7fe89ac62dcd08f96c510c3f3977 /searx/engines | |
parent | cdceec1cbb2ca894572396e0a68c2d09b0769231 (diff) | |
parent | 38d32337e7600446c04ecd601b56d6915a4046fd (diff) | |
download | searxng-925bb561a2797b6d3cc5be748c2361721b8e1f08.tar.gz searxng-925bb561a2797b6d3cc5be748c2361721b8e1f08.zip |
Merge pull request #2352 from dalf/no_http
Remove HTTP connections as much as possible
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/filecrop.py | 85 |
1 files changed, 0 insertions, 85 deletions
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py deleted file mode 100644 index 0331e7b19..000000000 --- a/searx/engines/filecrop.py +++ /dev/null @@ -1,85 +0,0 @@ -from html.parser import HTMLParser -from urllib.parse import urlencode - - -url = 'http://www.filecrop.com/' -search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa - -paging = True - - -class FilecropResultParser(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844) - - def __init__(self): - HTMLParser.__init__(self) - self.__start_processing = False - - self.results = [] - self.result = {} - - self.tr_counter = 0 - self.data_counter = 0 - - def handle_starttag(self, tag, attrs): - - if tag == 'tr': - if ('bgcolor', '#edeff5') in attrs or\ - ('bgcolor', '#ffffff') in attrs: - self.__start_processing = True - - if not self.__start_processing: - return - - if tag == 'label': - self.result['title'] = [attr[1] for attr in attrs - if attr[0] == 'title'][0] - elif tag == 'a' and ('rel', 'nofollow') in attrs\ - and ('class', 'sourcelink') in attrs: - if 'content' in self.result: - self.result['content'] += [attr[1] for attr in attrs - if attr[0] == 'title'][0] - else: - self.result['content'] = [attr[1] for attr in attrs - if attr[0] == 'title'][0] - self.result['content'] += ' ' - elif tag == 'a': - self.result['url'] = url + [attr[1] for attr in attrs - if attr[0] == 'href'][0] - - def handle_endtag(self, tag): - if self.__start_processing is False: - return - - if tag == 'tr': - self.tr_counter += 1 - - if self.tr_counter == 2: - self.__start_processing = False - self.tr_counter = 0 - self.data_counter = 0 - self.results.append(self.result) - self.result = {} - - def handle_data(self, data): - if not self.__start_processing: - return - - if 'content' in self.result: - self.result['content'] += data + ' ' - else: - self.result['content'] = data + ' ' - - self.data_counter += 1 - - -def request(query, params): - index = 1 + (params['pageno'] - 1) * 30 - params['url'] = search_url.format(query=urlencode({'w': query}), index=index) - return params - - -def response(resp): - parser = FilecropResultParser() - parser.feed(resp.text) - - return parser.results |