searx/engines/wolframalpha_noapi.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

# WolframAlpha (Maths)
#
# @website     http://www.wolframalpha.com/
#
# @using-api   no
# @results     HTML, JS
# @stable      no
# @parse       answer

import re
import json
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text

# search-url
url = 'http://www.wolframalpha.com/'
search_url = url+'input/?{query}'


# do search-request
def request(query, params):
    params['url'] = search_url.format(query=urlencode({'i': query}))

    return params


# tries to find answer under the pattern given
def extract_answer(script_list, pattern):
    answer = None

    # get line that matches the pattern
    for script in script_list:
        try:
            line = re.search(pattern, script.text_content()).group(1)
        except AttributeError:
            continue

        # extract answer from json
        answer = line[line.find('{') : line.rfind('}')+1]
        answer = json.loads(answer.encode('unicode-escape'))
        answer = answer['stringified'].decode('unicode-escape')

    return answer


# get response from search-request
def response(resp):

    dom = html.fromstring(resp.text)

    # the answer is inside a js script
    scripts = dom.xpath('//script')

    results = []

    # answer can be located in different 'pods', although by default it should be in pod_0200
    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
    if not answer:
        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
        if answer:
            results.append({'answer': answer})
    else:
        results.append({'answer': answer})
    
    return results