summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPopolon <github@popolon.org>2018-08-07 17:54:43 +0200
committerGitHub <noreply@github.com>2018-08-07 17:54:43 +0200
commitef4820105fe80b9f2aecb346a3e2d1aa7a422910 (patch)
tree4b7765f2520f156e7238674ac01948d02f52a363
parent34af9a01418aa6c1efcaa6d30b0cceb504805ba3 (diff)
parenteea2e8e5f369fa20144c799d6a82b293d0d04a84 (diff)
downloadsearxng-ef4820105fe80b9f2aecb346a3e2d1aa7a422910.tar.gz
searxng-ef4820105fe80b9f2aecb346a3e2d1aa7a422910.zip
Merge branch 'master' into master
-rw-r--r--searx/data/useragents.json14
-rw-r--r--searx/utils.py28
-rwxr-xr-xutils/fetch_firefox_version.py73
3 files changed, 95 insertions, 20 deletions
diff --git a/searx/data/useragents.json b/searx/data/useragents.json
new file mode 100644
index 000000000..850bc418a
--- /dev/null
+++ b/searx/data/useragents.json
@@ -0,0 +1,14 @@
+{
+ "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
+ "versions": [
+ "61.0.1",
+ "61.0",
+ "60.0.2",
+ "60.0.1",
+ "60.0"
+ ],
+ "os": [
+ "Windows NT 10; WOW64",
+ "X11; Linux x86_64"
+ ]
+} \ No newline at end of file
diff --git a/searx/utils.py b/searx/utils.py
index bd6c3fe2f..dfa22c5fc 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -10,8 +10,10 @@ from codecs import getincrementalencoder
from imp import load_source
from numbers import Number
from os.path import splitext, join
+from io import open
from random import choice
import sys
+import json
from searx import settings
from searx.version import VERSION_STRING
@@ -39,29 +41,11 @@ else:
logger = logger.getChild('utils')
-ua_versions = ('52.8.1',
- '53.0',
- '54.0',
- '55.0',
- '56.0',
- '57.0',
- '58.0',
- '59.0',
- '60.0.2')
-
-ua_os = ('Windows NT 6.3; WOW64',
- 'X11; Linux x86_64',
- 'X11; Linux x86')
-
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
-
blocked_tags = ('script',
'style')
-
-def gen_useragent(os=None):
- # TODO
- return ua.format(os=os or choice(ua_os), version=choice(ua_versions))
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ + "/data/useragents.json", 'r', encoding='utf-8').read())
def searx_useragent():
@@ -70,6 +54,10 @@ def searx_useragent():
suffix=settings['outgoing'].get('useragent_suffix', ''))
+def gen_useragent(os=None):
+ return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
+
+
def highlight_content(content, query):
if not content:
diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py
new file mode 100755
index 000000000..ed179585b
--- /dev/null
+++ b/utils/fetch_firefox_version.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+# set path
+from sys import path
+from os.path import realpath, dirname, join
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+#
+import json
+import requests
+import re
+from distutils.version import LooseVersion, StrictVersion
+from lxml import html
+from searx.url_utils import urlparse, urljoin
+from searx import searx_dir
+
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
+RELEASE_PATH = '/pub/firefox/releases/'
+
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
+
+#
+useragents = {
+ "versions": (),
+ "os": ('Windows NT 10; WOW64',
+ 'X11; Linux x86_64'),
+ "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
+}
+
+
+def fetch_firefox_versions():
+ resp = requests.get(URL, timeout=2.0)
+ if resp.status_code != 200:
+ raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
+ else:
+ dom = html.fromstring(resp.text)
+ versions = []
+
+ for link in dom.xpath('//a/@href'):
+ url = urlparse(urljoin(URL, link))
+ path = url.path
+ if path.startswith(RELEASE_PATH):
+ version = path[len(RELEASE_PATH):-1]
+ if NORMAL_REGEX.match(version):
+ versions.append(LooseVersion(version))
+
+ list.sort(versions, reverse=True)
+ return versions
+
+
+def fetch_firefox_last_versions():
+ versions = fetch_firefox_versions()
+
+ result = []
+ major_last = versions[0].version[0]
+ major_list = (major_last, major_last - 1)
+ for version in versions:
+ major_current = version.version[0]
+ if major_current in major_list:
+ result.append(version.vstring)
+
+ return result
+
+
+def get_useragents_filename():
+ return join(join(searx_dir, "data"), "useragents.json")
+
+
+useragents["versions"] = fetch_firefox_last_versions()
+with open(get_useragents_filename(), "w") as f:
+ json.dump(useragents, f, indent=4, ensure_ascii=False)