8 files changed, 112 insertions, 30 deletions
diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml
index a97169767..70e491153 100644
--- a/.github/workflows/data-update.yml
+++ b/.github/workflows/data-update.yml
@@ -7,7 +7,7 @@ jobs:
   updateData:
     name: Update data
     runs-on: ubuntu-20.04
-    if: env.DATA_PR_TOKEN != null
+    if: secrets.DATA_PR_TOKEN != null
     steps:
       - name: Checkout
         uses: actions/checkout@v2
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 036ae0fe1..b44a10b55 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -154,6 +154,6 @@ generally made searx better:
 - @mrwormo
 - Xiaoyu WEI @xywei
 - @joshu9h
-
+- Daniel Hones
 
 
diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py
index a4c66e891..a9ddd711a 100644
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@@ -45,7 +45,7 @@ def response(resp):
     dom = html.fromstring(resp.text)
 
     # parse results
-    for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]/div[@class="appRow"]'):
+    for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'):
 
         link = eval_xpath_getindex(result, './/h5/a', 0)
         url = base_url + link.attrib.get('href') + '#downloads'
diff --git a/searx/engines/loc.py b/searx/engines/loc.py
new file mode 100644
index 000000000..5c09ceff2
--- /dev/null
+++ b/searx/engines/loc.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+
+ Library of Congress : images from Prints and Photographs Online Catalog
+
+"""
+
+from json import loads
+from urllib.parse import urlencode
+
+
+about = {
+    "website": 'https://www.loc.gov/pictures/',
+    "wikidata_id": 'Q131454',
+    "official_api_documentation": 'https://www.loc.gov/pictures/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['images']
+
+paging = True
+
+base_url = 'https://loc.gov/pictures/search/?'
+search_string = "&sp={page}&{query}&fo=json"
+
+IMG_SRC_FIXES = {
+    'https://tile.loc.gov/storage-services/': 'https://tile.loc.gov/storage-services/',
+    'https://loc.gov/pictures/static/images/': 'https://tile.loc.gov/storage-services/',
+    'https://www.loc.gov/pictures/cdn/': 'https://tile.loc.gov/storage-services/',
+}
+
+
+def request(query, params):
+
+    search_path = search_string.format(
+        query=urlencode({'q': query}),
+        page=params['pageno'])
+
+    params['url'] = base_url + search_path
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    json_data = loads(resp.text)
+
+    for result in json_data['results']:
+        img_src = result['image']['full']
+        for url_prefix, url_replace in IMG_SRC_FIXES.items():
+            if img_src.startswith(url_prefix):
+                img_src = img_src.replace(url_prefix, url_replace)
+                break
+        else:
+            img_src = result['image']['thumb']
+        results.append({
+            'url': result['links']['item'],
+            'title': result['title'],
+            'img_src': img_src,
+            'thumbnail_src': result['image']['thumb'],
+            'author': result['creator'],
+            'template': 'images.html'
+        })
+
+    return results
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index eff301145..c8e589e64 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -22,6 +22,7 @@ about = {
 # search-url
 search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
+language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
 
 
 # set language in base_url
@@ -37,8 +38,12 @@ def request(query, params):
     if query.islower():
         query = query.title()
 
+    language = url_lang(params['language'])
     params['url'] = search_url.format(title=quote(query),
-                                      language=url_lang(params['language']))
+                                      language=language)
+
+    if params['language'].lower() in language_variants.get(language, []):
+        params['headers']['Accept-Language'] = params['language'].lower()
 
     params['headers']['User-Agent'] = searx_useragent()
     params['raise_for_httperror'] = False
@@ -60,7 +65,7 @@ def response(resp):
     if api_result.get('type') != 'standard':
         return []
 
-    title = api_result['title']
+    title = api_result['displaytitle']
     wikipedia_link = api_result['content_urls']['desktop']['page']
 
     results.append({'url': wikipedia_link, 'title': title})
diff --git a/searx/settings.yml b/searx/settings.yml
index f03d8aff3..87008eb20 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -583,25 +583,6 @@ engines:
       require_api_key: false
       results: HTML
 
-  - name : google play music
-    engine : xpath
-    search_url : https://play.google.com/store/search?q={query}&c=music
-    results_xpath : '//div[@class="WHE7ib mpg5gc"]'
-    title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
-    url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
-    content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
-    thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]//img/@data-src'
-    categories : music
-    shortcut : gps
-    disabled : True
-    about:
-      website: https://play.google.com/
-      wikidata_id: Q79576
-      official_api_documentation:
-      use_official_api: false
-      require_api_key: false
-      results: HTML
-
   - name : geektimes
     engine : xpath
     paging : True
@@ -698,6 +679,11 @@ engines:
       require_api_key: false
       results: HTML
 
+  - name : library of congress
+    engine : loc
+    shortcut : loc
+    categories : images
+
   - name : lobste.rs
     engine : xpath
     search_url : https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance
@@ -1229,11 +1215,9 @@ engines:
     engine: xpath
     paging : True
     search_url : https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno}
-    results_xpath: /html/body//ul[@id="elThumbnailResultArea"]/li
-    url_xpath : ./dl/dt/a[@class="title_link"]/@href
-    title_xpath : ./dl/dt/a[@class="title_link"]
-    content_xpath : ./dl/dd[@class="sh_web_passage"]
-    suggestion_xpath : /html/body//div[@class="sp_keyword section"]//a
+    url_xpath : //a[@class="link_tit"]/@href
+    title_xpath : //a[@class="link_tit"]
+    content_xpath : //a[@class="total_dsc"]/div
     first_page_num : 1
     page_size : 10
     disabled : True
diff --git a/searx/webutils.py b/searx/webutils.py
index 8be8fcecd..2464a097f 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -119,7 +119,10 @@ def highlight_content(content, query):
     else:
         regex_parts = []
         for chunk in query.split():
-            if len(chunk) == 1:
+            chunk = chunk.replace('"', '')
+            if len(chunk) == 0:
+                continue
+            elif len(chunk) == 1:
                 regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
             else:
                 regex_parts.append('{0}'.format(re.escape(chunk)))
diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py
index aa464688b..023374b04 100644
--- a/tests/unit/test_webutils.py
+++ b/tests/unit/test_webutils.py
@@ -34,6 +34,28 @@ class TestWebUtils(SearxTestCase):
         query = 'a test'
         self.assertEqual(webutils.highlight_content(content, query), content)
 
+        data = (
+            ('" test "',
+             'a test string',
+             'a <span class="highlight">test</span> string'),
+            ('"a"',
+             'this is a test string',
+             'this is<span class="highlight"> a </span>test string'),
+            ('a test',
+             'this is a test string that matches entire query',
+             'this is <span class="highlight">a test</span> string that matches entire query'),
+            ('this a test',
+             'this is a string to test.',
+             ('<span class="highlight">this</span> is<span class="highlight"> a </span>'
+              'string to <span class="highlight">test</span>.')),
+            ('match this "exact phrase"',
+             'this string contains the exact phrase we want to match',
+             ('<span class="highlight">this</span> string contains the <span class="highlight">exact</span>'
+              ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>'))
+        )
+        for query, content, expected in data:
+            self.assertEqual(webutils.highlight_content(content, query), expected)
+
 
 class TestUnicodeWriter(SearxTestCase):