add get_embeded_stream_url to searx.utils

author: Austin-Olacsi <138650713+Austin-Olacsi@users.noreply.github.com> 2024-09-14 16:28:35 -0600
committer: Markus Heiser <markus.heiser@darmarIT.de> 2024-10-03 07:10:53 +0200
commit: cbf1e9097929cf851d31bfd17e87bec7d1e51422 (patch)
tree: 70532240b01da30e7acd54c86e53825ac9a10135 /searx/utils.py
parent: f07ab6deb0f43a2d08f4f12335481825c6aa77ac (diff)
download: searxng-cbf1e9097929cf851d31bfd17e87bec7d1e51422.tar.gz
searxng-cbf1e9097929cf851d31bfd17e87bec7d1e51422.zip
1 files changed, 47 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 407d44cd0..c0c6261f9 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -17,7 +17,7 @@ from os.path import splitext, join
 from random import choice
 from html.parser import HTMLParser
 from html import escape
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urljoin, urlparse, parse_qs, urlencode
 from markdown_it import MarkdownIt
 
 from lxml import html
@@ -615,6 +615,52 @@ def _get_fasttext_model() -> "fasttext.FastText._FastText":  # type: ignore
     return _FASTTEXT_MODEL
 
 
+def get_embeded_stream_url(url):
+    """
+    Converts a standard video URL into its embed format. Supported services include Youtube,
+    Facebook, Instagram, TikTok, and Dailymotion.
+    """
+    parsed_url = urlparse(url)
+    iframe_src = None
+
+    # YouTube
+    if parsed_url.netloc in ['www.youtube.com', 'youtube.com'] and parsed_url.path == '/watch' and parsed_url.query:
+        video_id = parse_qs(parsed_url.query).get('v', [])
+        if video_id:
+            iframe_src = 'https://www.youtube-nocookie.com/embed/' + video_id[0]
+
+    # Facebook
+    elif parsed_url.netloc in ['www.facebook.com', 'facebook.com']:
+        encoded_href = urlencode({'href': url})
+        iframe_src = 'https://www.facebook.com/plugins/video.php?allowfullscreen=true&' + encoded_href
+
+    # Instagram
+    elif parsed_url.netloc in ['www.instagram.com', 'instagram.com'] and parsed_url.path.startswith('/p/'):
+        if parsed_url.path.endswith('/'):
+            iframe_src = url + 'embed'
+        else:
+            iframe_src = url + '/embed'
+
+    # TikTok
+    elif (
+        parsed_url.netloc in ['www.tiktok.com', 'tiktok.com']
+        and parsed_url.path.startswith('/@')
+        and '/video/' in parsed_url.path
+    ):
+        path_parts = parsed_url.path.split('/video/')
+        video_id = path_parts[1]
+        iframe_src = 'https://www.tiktok.com/embed/' + video_id
+
+    # Dailymotion
+    elif parsed_url.netloc in ['www.dailymotion.com', 'dailymotion.com'] and parsed_url.path.startswith('/video/'):
+        path_parts = parsed_url.path.split('/')
+        if len(path_parts) == 3:
+            video_id = path_parts[2]
+            iframe_src = 'https://www.dailymotion.com/embed/video/' + video_id
+
+    return iframe_src
+
+
 def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
     """Detect the language of the ``text`` parameter.
author	Austin-Olacsi <138650713+Austin-Olacsi@users.noreply.github.com>	2024-09-14 16:28:35 -0600
committer	Markus Heiser <markus.heiser@darmarIT.de>	2024-10-03 07:10:53 +0200
commit	cbf1e9097929cf851d31bfd17e87bec7d1e51422 (patch)
tree	70532240b01da30e7acd54c86e53825ac9a10135 /searx/utils.py
parent	f07ab6deb0f43a2d08f4f12335481825c6aa77ac (diff)
download	searxng-cbf1e9097929cf851d31bfd17e87bec7d1e51422.tar.gz searxng-cbf1e9097929cf851d31bfd17e87bec7d1e51422.zip