From 4a7c062dbdc52961adbe0ebb338350410e308250 Mon Sep 17 00:00:00 2001 From: Saptak S Date: Tue, 30 Mar 2021 20:26:16 +0530 Subject: Reviving the old range request PR This commit basically just adds the codes from the PR https://github.com/micahflee/onionshare/pull/804/commits on the newly structured and refactored code and also fixes import dependencies and changed any code that was relevant with the new code of onionshare --- cli/onionshare_cli/web/share_mode.py | 352 +++++++++++++++++++++++++---------- cli/tests/test_cli_web.py | 234 +++++++++++++++++++++++ cli/tests/test_range_request.py | 41 ++++ 3 files changed, 533 insertions(+), 94 deletions(-) create mode 100644 cli/tests/test_range_request.py (limited to 'cli') diff --git a/cli/onionshare_cli/web/share_mode.py b/cli/onionshare_cli/web/share_mode.py index 72ba8c64..87ccf1d3 100644 --- a/cli/onionshare_cli/web/share_mode.py +++ b/cli/onionshare_cli/web/share_mode.py @@ -18,18 +18,100 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . """ +import binascii +import hashlib import os import sys import tempfile import zipfile import mimetypes -from flask import Response, request, render_template, make_response +from datetime import datetime +from flask import Response, request, render_template, make_response, abort from unidecode import unidecode +from werkzeug.http import parse_date, http_date from werkzeug.urls import url_quote from .send_base_mode import SendBaseModeWeb +def make_etag(data): + hasher = hashlib.sha256() + + while True: + read_bytes = data.read(4096) + if read_bytes: + hasher.update(read_bytes) + else: + break + + hash_value = binascii.hexlify(hasher.digest()).decode('utf-8') + return '"sha256:{}"'.format(hash_value) + + +def parse_range_header(range_header: str, target_size: int) -> list: + end_index = target_size - 1 + if range_header is None: + return [(0, end_index)] + + bytes_ = 'bytes=' + if not range_header.startswith(bytes_): + abort(416) + + ranges = [] + for range_ in range_header[len(bytes_):].split(','): + split = range_.split('-') + if len(split) == 1: + try: + start = int(split[0]) + end = end_index + except ValueError: + abort(416) + elif len(split) == 2: + start, end = split[0], split[1] + if not start: + # parse ranges of the form "bytes=-100" (i.e., last 100 bytes) + end = end_index + try: + start = end - int(split[1]) + 1 + except ValueError: + abort(416) + else: + # parse ranges of the form "bytes=100-200" + try: + start = int(start) + if not end: + end = target_size + else: + end = int(end) + except ValueError: + abort(416) + + if end < start: + abort(416) + + end = min(end, end_index) + else: + abort(416) + + ranges.append((start, end)) + + # merge the ranges + merged = [] + ranges = sorted(ranges, key=lambda x: x[0]) + for range_ in ranges: + # initial case + if not merged: + merged.append(range_) + else: + # merge ranges that are adjacent or overlapping + if range_[0] <= merged[-1][1] + 1: + merged[-1] = (merged[-1][0], max(range_[1], merged[-1][1])) + else: + merged.append(range_) + + return merged + + class ShareModeWeb(SendBaseModeWeb): """ All of the web logic for share mode @@ -43,6 +125,10 @@ class ShareModeWeb(SendBaseModeWeb): "share", "autostop_sharing" ) + self.download_etag = None + self.gzip_etag = None + self.last_modified = datetime.utcnow() + def define_routes(self): """ The web app routes for sharing files @@ -92,7 +178,7 @@ class ShareModeWeb(SendBaseModeWeb): # Prepare some variables to use inside generate() function below # which is outside of the request context shutdown_func = request.environ.get("werkzeug.server.shutdown") - path = request.path + request_path = request.path # If this is a zipped file, then serve as-is. If it's not zipped, then, # if the http client supports gzip compression, gzip the file first @@ -100,117 +186,189 @@ class ShareModeWeb(SendBaseModeWeb): use_gzip = self.should_use_gzip() if use_gzip: file_to_download = self.gzip_filename - self.filesize = self.gzip_filesize + filesize = self.gzip_filesize + etag = self.gzip_etag else: file_to_download = self.download_filename - self.filesize = self.download_filesize + filesize = self.download_filesize + etag = self.download_etag + + # for range requests + range_, status_code = self.get_range_and_status_code(filesize, etag, self.last_modified) # Tell GUI the download started history_id = self.cur_history_id self.cur_history_id += 1 self.web.add_request( - self.web.REQUEST_STARTED, path, {"id": history_id, "use_gzip": use_gzip} + self.web.REQUEST_STARTED, request_path, {"id": history_id, "use_gzip": use_gzip} ) basename = os.path.basename(self.download_filename) - def generate(): - # Starting a new download - if self.web.settings.get("share", "autostop_sharing"): - self.download_in_progress = True - - chunk_size = 102400 # 100kb - - fp = open(file_to_download, "rb") - self.web.done = False - canceled = False - while not self.web.done: - # The user has canceled the download, so stop serving the file - if not self.web.stop_q.empty(): - self.web.add_request( - self.web.REQUEST_CANCELED, path, {"id": history_id} - ) - break - - chunk = fp.read(chunk_size) - if chunk == b"": - self.web.done = True - else: - try: - yield chunk - - # tell GUI the progress - downloaded_bytes = fp.tell() - percent = (1.0 * downloaded_bytes / self.filesize) * 100 - - # only output to stdout if running onionshare in CLI mode, or if using Linux (#203, #304) - if ( - not self.web.is_gui - or self.common.platform == "Linux" - or self.common.platform == "BSD" - ): - sys.stdout.write( - "\r{0:s}, {1:.2f}% ".format( - self.common.human_readable_filesize( - downloaded_bytes - ), - percent, - ) - ) - sys.stdout.flush() - - self.web.add_request( - self.web.REQUEST_PROGRESS, - path, - {"id": history_id, "bytes": downloaded_bytes}, - ) - self.web.done = False - except: - # looks like the download was canceled - self.web.done = True - canceled = True - - # tell the GUI the download has canceled - self.web.add_request( - self.web.REQUEST_CANCELED, path, {"id": history_id} - ) - - fp.close() - - if self.common.platform != "Darwin": - sys.stdout.write("\n") - - # Download is finished - if self.web.settings.get("share", "autostop_sharing"): - self.download_in_progress = False - - # Close the server, if necessary - if self.web.settings.get("share", "autostop_sharing") and not canceled: - print("Stopped because transfer is complete") - self.web.running = False - try: - if shutdown_func is None: - raise RuntimeError("Not running with the Werkzeug Server") - shutdown_func() - except: - pass + if status_code == 304: + r = Response() + else: + r = Response( + self.generate(shutdown_func, range_, file_to_download, request_path, + history_id, filesize)) - r = Response(generate()) if use_gzip: - r.headers.set("Content-Encoding", "gzip") - r.headers.set("Content-Length", self.filesize) - filename_dict = { - "filename": unidecode(basename), - "filename*": "UTF-8''%s" % url_quote(basename), - } - r.headers.set("Content-Disposition", "attachment", **filename_dict) + r.headers.set('Content-Encoding', 'gzip') + + r.headers.set('Content-Length', range_[1] - range_[0]) + r.headers.set('Content-Disposition', 'attachment', filename=basename) r = self.web.add_security_headers(r) # guess content type (content_type, _) = mimetypes.guess_type(basename, strict=False) if content_type is not None: - r.headers.set("Content-Type", content_type) + r.headers.set('Content-Type', content_type) + + r.headers.set('Content-Length', range_[1] - range_[0]) + r.headers.set('Accept-Ranges', 'bytes') + r.headers.set('ETag', etag) + r.headers.set('Last-Modified', http_date(self.last_modified)) + # we need to set this for range requests + r.headers.set('Vary', 'Accept-Encoding') + + if status_code == 206: + r.headers.set('Content-Range', + 'bytes {}-{}/{}'.format(range_[0], range_[1], filesize)) + + r.status_code = status_code + return r + @classmethod + def get_range_and_status_code(cls, dl_size, etag, last_modified): + use_default_range = True + status_code = 200 + range_header = request.headers.get('Range') + + # range requests are only allowed for get + if request.method == 'GET': + ranges = parse_range_header(range_header, dl_size) + if not (len(ranges) == 1 and ranges[0][0] == 0 and ranges[0][1] == dl_size - 1): + use_default_range = False + status_code = 206 + + if range_header: + if_range = request.headers.get('If-Range') + if if_range and if_range != etag: + use_default_range = True + status_code = 200 + + if use_default_range: + ranges = [(0, dl_size - 1)] + + if len(ranges) > 1: + abort(416) # We don't support multipart range requests yet + range_ = ranges[0] + + etag_header = request.headers.get('ETag') + if etag_header is not None and etag_header != etag: + abort(412) + + if_unmod = request.headers.get('If-Unmodified-Since') + if if_unmod: + if_date = parse_date(if_unmod) + if if_date and if_date > last_modified: + abort(412) + elif range_header is None: + status_code = 304 + + return range_, status_code + + def generate(self, shutdown_func, range_, file_to_download, path, history_id, filesize): + # The user hasn't canceled the download + self.client_cancel = False + + # Starting a new download + if self.web.settings.get("share", "autostop_sharing"): + self.download_in_progress = True + + start, end = range_ + + chunk_size = 102400 # 100kb + + fp = open(file_to_download, "rb") + fp.seek(start) + self.web.done = False + canceled = False + bytes_left = end - start + 1 + while not self.web.done: + # The user has canceled the download, so stop serving the file + if not self.web.stop_q.empty(): + self.web.add_request( + self.web.REQUEST_CANCELED, path, {"id": history_id} + ) + break + + read_size = min(chunk_size, bytes_left) + chunk = fp.read(read_size) + if chunk == b"": + self.web.done = True + else: + try: + yield chunk + + # tell GUI the progress + downloaded_bytes = fp.tell() + percent = (1.0 * downloaded_bytes / self.filesize) * 100 + bytes_left -= read_size + + # only output to stdout if running onionshare in CLI mode, or if using Linux (#203, #304) + if ( + not self.web.is_gui + or self.common.platform == "Linux" + or self.common.platform == "BSD" + ): + sys.stdout.write( + "\r{0:s}, {1:.2f}% ".format( + self.common.human_readable_filesize( + downloaded_bytes + ), + percent, + ) + ) + sys.stdout.flush() + + self.web.add_request( + self.web.REQUEST_PROGRESS, + path, + {"id": history_id, "bytes": downloaded_bytes, 'total_bytes': filesize,}, + ) + self.web.done = False + except: + # looks like the download was canceled + self.web.done = True + canceled = True + + # tell the GUI the download has canceled + self.web.add_request( + self.web.REQUEST_CANCELED, path, {"id": history_id} + ) + + fp.close() + + if self.common.platform != "Darwin": + sys.stdout.write("\n") + + # Download is finished + if self.web.settings.get("share", "autostop_sharing"): + self.download_in_progress = False + + # Close the server, if necessary + if self.web.settings.get("share", "autostop_sharing") and not canceled: + print("Stopped because transfer is complete") + self.web.running = False + try: + if shutdown_func is None: + raise RuntimeError("Not running with the Werkzeug Server") + shutdown_func() + except: + pass + + def directory_listing_template( self, path, files, dirs, breadcrumbs, breadcrumbs_leaf ): @@ -305,6 +463,8 @@ class ShareModeWeb(SendBaseModeWeb): if len(self.file_info["files"]) == 1 and len(self.file_info["dirs"]) == 0: self.download_filename = self.file_info["files"][0]["filename"] self.download_filesize = self.file_info["files"][0]["size"] + with open(self.download_filename, 'rb') as f: + self.download_etag = make_etag(f) # Compress the file with gzip now, so we don't have to do it on each request self.gzip_filename = tempfile.mkstemp("wb+")[1] @@ -312,6 +472,8 @@ class ShareModeWeb(SendBaseModeWeb): self.download_filename, self.gzip_filename, 6, processed_size_callback ) self.gzip_filesize = os.path.getsize(self.gzip_filename) + with open(self.gzip_filename, 'rb') as f: + self.gzip_etag = make_etag(f) # Make sure the gzip file gets cleaned up when onionshare stops self.cleanup_filenames.append(self.gzip_filename) @@ -337,6 +499,8 @@ class ShareModeWeb(SendBaseModeWeb): self.zip_writer.close() self.download_filesize = os.path.getsize(self.download_filename) + with open(self.download_filename, 'rb') as f: + self.download_etag = make_etag(f) # Make sure the zip file gets cleaned up when onionshare stops self.cleanup_filenames.append(self.zip_writer.zip_filename) diff --git a/cli/tests/test_cli_web.py b/cli/tests/test_cli_web.py index 421ee4f3..e547e538 100644 --- a/cli/tests/test_cli_web.py +++ b/cli/tests/test_cli_web.py @@ -1,15 +1,23 @@ import os import random import re +import socket +import subprocess +import time import zipfile import tempfile import base64 import pytest +from contextlib import contextmanager +from multiprocessing import Process +from urllib.request import urlopen from werkzeug.datastructures import Headers +from werkzeug.exceptions import RequestedRangeNotSatisfiable from onionshare_cli.common import Common from onionshare_cli.web import Web +from onionshare_cli.web.share_mode import parse_range_header from onionshare_cli.settings import Settings from onionshare_cli.mode_settings import ModeSettings @@ -224,3 +232,229 @@ class TestZipWriterCustom: def test_custom_callback(self, custom_zw): assert custom_zw.processed_size_callback(None) == "custom_callback" + + +def check_unsupported(cmd: str, args: list): + cmd_args = [cmd] + cmd_args.extend(args) + skip = False + + try: + subprocess.check_call(cmd_args) + except Exception: + skip = True + + return pytest.mark.skipif(skip, reason='Command {!r} not supported'.format(cmd)) + + +@contextmanager +def live_server(web): + s = socket.socket() + s.bind(("localhost", 0)) + port = s.getsockname()[1] + s.close() + + def run(): + web.app.run(host='127.0.0.1', port=port, debug=False) + + proc = Process(target=run) + proc.start() + + url = 'http://127.0.0.1:{}'.format(port) + + attempts = 20 + while True: + try: + urlopen(url) + break + except Exception: + attempts -= 1 + if attempts > 0: + time.sleep(0.5) + else: + raise + + yield url + '/download' + + proc.terminate() + + +class TestRangeRequests: + + VALID_RANGES = [ + (None, 500, [(0, 499)]), + ('bytes=0', 500, [(0, 499)]), + ('bytes=100', 500, [(100, 499)]), + ('bytes=100-', 500, [(100, 499)]), # not in the RFC, but how curl sends + ('bytes=0-99', 500, [(0, 99)]), + ('bytes=0-599', 500, [(0, 499)]), + ('bytes=0-0', 500, [(0, 0)]), + ('bytes=-100', 500, [(400, 499)]), + ('bytes=0-99,100-199', 500, [(0, 199)]), + ('bytes=0-100,100-199', 500, [(0, 199)]), + ('bytes=0-99,101-199', 500, [(0, 99), (101, 199)]), + ('bytes=0-199,100-299', 500, [(0, 299)]), + ('bytes=0-99,200-299', 500, [(0, 99), (200, 299)]), + ] + + INVALID_RANGES = [ + 'bytes=200-100', + 'bytes=0-100,300-200', + ] + + def test_parse_ranges(self): + for case in self.VALID_RANGES: + (header, target_size, expected) = case + parsed = parse_range_header(header, target_size) + assert parsed == expected, case + + for invalid in self.INVALID_RANGES: + with pytest.raises(RequestedRangeNotSatisfiable): + parse_range_header(invalid, 500) + + def test_headers(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + + with web.app.test_client() as client: + resp = client.get(url, headers=self._make_auth_headers(web.password)) + assert resp.headers['ETag'].startswith('"sha256:') + assert resp.headers['Accept-Ranges'] == 'bytes' + assert resp.headers.get('Last-Modified') is not None + assert resp.headers.get('Content-Length') is not None + assert 'Accept-Encoding' in resp.headers['Vary'] + + def test_basic(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + with open(web.share_mode.download_filename, 'rb') as f: + contents = f.read() + + with web.app.test_client() as client: + resp = client.get(url, headers=self._make_auth_headers(web.password)) + assert resp.status_code == 200 + assert resp.data == contents + + def test_reassemble(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + with open(web.share_mode.download_filename, 'rb') as f: + contents = f.read() + + with web.app.test_client() as client: + headers = self._make_auth_headers(web.password) + headers.extend({'Range': 'bytes=0-10'}) + resp = client.get(url, headers=headers) + assert resp.status_code == 206 + content_range = resp.headers['Content-Range'] + assert content_range == 'bytes {}-{}/{}'.format(0, 10, web.share_mode.download_filesize) + bytes_out = resp.data + + headers.update({'Range': 'bytes=11-100000'}) + resp = client.get(url, headers=headers) + assert resp.status_code == 206 + content_range = resp.headers['Content-Range'] + assert content_range == 'bytes {}-{}/{}'.format( + 11, web.share_mode.download_filesize - 1, web.share_mode.download_filesize) + bytes_out += resp.data + + assert bytes_out == contents + + def test_mismatched_etags(self, common_obj): + '''RFC 7233 Section 3.2 + The "If-Range" header field allows a client to "short-circuit" the second request. + Informally, its meaning is as follows: if the representation is unchanged, send me the + part(s) that I am requesting in Range; otherwise, send me the entire representation. + ''' + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + with open(web.share_mode.download_filename, 'rb') as f: + contents = f.read() + + with web.app.test_client() as client: + headers = self._make_auth_headers(web.password) + resp = client.get(url, headers=headers) + assert resp.status_code == 200 + + headers.extend({'If-Range': 'mismatched etag', + 'Range': 'bytes=10-100'}) + resp = client.get(url, headers=headers) + assert resp.status_code == 200 + assert resp.data == contents + + def test_if_unmodified_since(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + + with web.app.test_client() as client: + headers = self._make_auth_headers(web.password) + resp = client.get(url, headers=headers) + assert resp.status_code == 200 + last_mod = resp.headers['Last-Modified'] + + headers.extend({'If-Unmodified-Since': last_mod}) + resp = client.get(url, headers=headers) + assert resp.status_code == 304 + + def test_firefox_like_behavior(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + url = '/download' + + with web.app.test_client() as client: + headers = self._make_auth_headers(web.password) + resp = client.get(url, headers=headers) + assert resp.status_code == 200 + + # Firefox sends these with all range requests + etag = resp.headers['ETag'] + last_mod = resp.headers['Last-Modified'] + + # make a request that uses the full header set + headers.extend({'Range': 'bytes=0-10', + 'If-Unmodified-Since': last_mod, + 'If-Range': etag}) + resp = client.get(url, headers=headers) + assert resp.status_code == 206 + + def _make_auth_headers(self, password): + auth = base64.b64encode(b"onionshare:" + password.encode()).decode() + h = Headers() + h.add("Authorization", "Basic " + auth) + return h + + @check_unsupported('curl', ['--version']) + def test_curl(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + + with live_server(web) as url: + # Debugging help from `man curl`, on error 33 + # 33 HTTP range error. The range "command" didn't work. + subprocess.check_call(['curl', '--continue-at', '10', url]) + + @check_unsupported('wget', ['--version']) + def test_wget(self, tmpdir, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + + # wget needs a file to exist to continue + download = tmpdir.join('download') + download.write('x' * 10) + + with live_server(web) as url: + subprocess.check_call(['wget', '--continue', '-O', str(download), url]) + + + @check_unsupported('http', ['--version']) + def test_httpie(self, common_obj): + web = web_obj(common_obj, 'share', 3) + web.stay_open = True + + with live_server(web) as url: + subprocess.check_call(['http', url, 'Range: bytes=10']) diff --git a/cli/tests/test_range_request.py b/cli/tests/test_range_request.py new file mode 100644 index 00000000..796bd6c3 --- /dev/null +++ b/cli/tests/test_range_request.py @@ -0,0 +1,41 @@ +import pytest +import subprocess + +from tempfile import NamedTemporaryFile +from werkzeug.exceptions import RequestedRangeNotSatisfiable + +from onionshare_cli.web.share_mode import parse_range_header + + +VALID_RANGES = [ + (None, 500, [(0, 499)]), + ('bytes=0', 500, [(0, 499)]), + ('bytes=100', 500, [(100, 499)]), + ('bytes=100-', 500, [(100, 499)]), # not in the RFC, but how curl sends + ('bytes=0-99', 500, [(0, 99)]), + ('bytes=0-599', 500, [(0, 499)]), + ('bytes=0-0', 500, [(0, 0)]), + ('bytes=-100', 500, [(400, 499)]), + ('bytes=0-99,100-199', 500, [(0, 199)]), + ('bytes=0-100,100-199', 500, [(0, 199)]), + ('bytes=0-99,101-199', 500, [(0, 99), (101, 199)]), + ('bytes=0-199,100-299', 500, [(0, 299)]), + ('bytes=0-99,200-299', 500, [(0, 99), (200, 299)]), +] + + +INVALID_RANGES = [ + 'bytes=200-100', + 'bytes=0-100,300-200', +] + + +def test_parse_ranges(): + for case in VALID_RANGES: + (header, target_size, expected) = case + parsed = parse_range_header(header, target_size) + assert parsed == expected, case + + for invalid in INVALID_RANGES: + with pytest.raises(RequestedRangeNotSatisfiable): + parse_range_header(invalid, 500) \ No newline at end of file -- cgit v1.2.3-54-g00ecf