curl/tests/http/test_07_upload.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#***************************************************************************
#                                  _   _ ____  _
#  Project                     ___| | | |  _ \| |
#                             / __| | | | |_) | |
#                            | (__| |_| |  _ <| |___
#                             \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
#
import difflib
import filecmp
import logging
import os
import pytest

from testenv import Env, CurlClient


log = logging.getLogger(__name__)


class TestUpload:

    @pytest.fixture(autouse=True, scope='class')
    def _class_scope(self, env, httpd, nghttpx):
        if env.have_h3():
            nghttpx.start_if_needed()
        env.make_data_file(indir=env.gen_dir, fname="data-100k", fsize=100*1024)
        env.make_data_file(indir=env.gen_dir, fname="data-10m", fsize=10*1024*1024)
        httpd.clear_extra_configs()
        httpd.reload()

    # upload small data, check that this is what was echoed
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_01_upload_1_small(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 fails here")
        data = '0123456789'
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-0]'
        r = curl.http_upload(urls=[url], data=data, alpn_proto=proto)
        r.check_response(count=1, http_status=200)
        respdata = open(curl.response_file(0)).readlines()
        assert respdata == [data]

    # upload large data, check that this is what was echoed
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_02_upload_1_large(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 fails here")
        fdata = os.path.join(env.gen_dir, 'data-100k')
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-0]'
        r = curl.http_upload(urls=[url], data=f'@{fdata}', alpn_proto=proto)
        r.check_response(count=1, http_status=200)
        indata = open(fdata).readlines()
        respdata = open(curl.response_file(0)).readlines()
        assert respdata == indata

    # upload data sequentially, check that they were echoed
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_10_upload_sequential(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        count = 50
        data = '0123456789'
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=data, alpn_proto=proto)
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == [data]

    # upload data parallel, check that they were echoed
    @pytest.mark.parametrize("proto", ['h2', 'h3'])
    def test_07_11_upload_parallel(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        # limit since we use a separate connection in h1
        count = 50
        data = '0123456789'
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=data, alpn_proto=proto,
                             extra_args=['--parallel'])
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == [data]

    # upload large data sequentially, check that this is what was echoed
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_20_upload_seq_large(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        fdata = os.path.join(env.gen_dir, 'data-100k')
        count = 50
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=f'@{fdata}', alpn_proto=proto)
        r.check_response(count=count, http_status=200)
        indata = open(fdata).readlines()
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == indata

    # upload very large data sequentially, check that this is what was echoed
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_12_upload_seq_large(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        fdata = os.path.join(env.gen_dir, 'data-10m')
        count = 2
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=f'@{fdata}', alpn_proto=proto)
        r.check_response(count=count, http_status=200)
        indata = open(fdata).readlines()
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == indata

    # upload data parallel, check that they were echoed
    @pytest.mark.parametrize("proto", ['h2', 'h3'])
    def test_07_20_upload_parallel(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        # limit since we use a separate connection in h1
        count = 50
        data = '0123456789'
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=data, alpn_proto=proto,
                             extra_args=['--parallel'])
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == [data]

    # upload large data parallel, check that this is what was echoed
    @pytest.mark.parametrize("proto", ['h2', 'h3'])
    def test_07_21_upload_parallel_large(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 stalls here")
        fdata = os.path.join(env.gen_dir, 'data-100k')
        # limit since we use a separate connection in h1
        count = 50
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-{count-1}]'
        r = curl.http_upload(urls=[url], data=f'@{fdata}', alpn_proto=proto,
                             extra_args=['--parallel'])
        r.check_response(count=count, http_status=200)
        self.check_download(count, fdata, curl)

    # PUT 100k
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_30_put_100k(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 fails here")
        fdata = os.path.join(env.gen_dir, 'data-100k')
        count = 1
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put?id=[0-{count-1}]'
        r = curl.http_put(urls=[url], fdata=fdata, alpn_proto=proto,
                             extra_args=['--parallel'])
        r.check_response(count=count, http_status=200)
        exp_data = [f'{os.path.getsize(fdata)}']
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == exp_data

    # PUT 10m
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_31_put_10m(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 fails here")
        fdata = os.path.join(env.gen_dir, 'data-10m')
        count = 1
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put?id=[0-{count-1}]&chunk_delay=10ms'
        r = curl.http_put(urls=[url], fdata=fdata, alpn_proto=proto,
                             extra_args=['--parallel'])
        r.check_response(count=count, http_status=200)
        exp_data = [f'{os.path.getsize(fdata)}']
        r.check_response(count=count, http_status=200)
        for i in range(count):
            respdata = open(curl.response_file(i)).readlines()
            assert respdata == exp_data

    # issue #10591
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
    def test_07_32_issue_10591(self, env: Env, httpd, nghttpx, repeat, proto):
        if proto == 'h3' and not env.have_h3():
            pytest.skip("h3 not supported")
        if proto == 'h3' and env.curl_uses_lib('msh3'):
            pytest.skip("msh3 fails here")
        fdata = os.path.join(env.gen_dir, 'data-10m')
        count = 1
        curl = CurlClient(env=env)
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put?id=[0-{count-1}]'
        r = curl.http_put(urls=[url], fdata=fdata, alpn_proto=proto)
        r.check_response(count=count, http_status=200)

    # issue #11157, upload that is 404'ed by server, needs to terminate
    # correctly and not time out on sending
    def test_07_33_issue_11157a(self, env: Env, httpd, nghttpx, repeat):
        proto = 'h2'
        fdata = os.path.join(env.gen_dir, 'data-10m')
        # send a POST to our PUT handler which will send immediately a 404 back
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put'
        curl = CurlClient(env=env)
        r = curl.run_direct(with_stats=True, args=[
            '--resolve', f'{env.authority_for(env.domain1, proto)}:127.0.0.1',
            '--cacert', env.ca.cert_file,
            '--request', 'POST',
            '--max-time', '5', '-v',
            '--url', url,
            '--form', 'idList=12345678',
            '--form', 'pos=top',
            '--form', 'name=mr_test',
            '--form', f'fileSource=@{fdata};type=application/pdf',
        ])
        assert r.exit_code == 0, f'{r}'
        r.check_stats(1, 404)

    # issue #11157, send upload that is slowly read in
    def test_07_33_issue_11157b(self, env: Env, httpd, nghttpx, repeat):
        proto = 'h2'
        fdata = os.path.join(env.gen_dir, 'data-10m')
        # tell our test PUT handler to read the upload more slowly, so
        # that the send buffering and transfer loop needs to wait
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put?chunk_delay=2ms'
        curl = CurlClient(env=env)
        r = curl.run_direct(with_stats=True, args=[
            '--resolve', f'{env.authority_for(env.domain1, proto)}:127.0.0.1',
            '--cacert', env.ca.cert_file,
            '--request', 'PUT',
            '--max-time', '10', '-v',
            '--url', url,
            '--form', 'idList=12345678',
            '--form', 'pos=top',
            '--form', 'name=mr_test',
            '--form', f'fileSource=@{fdata};type=application/pdf',
        ])
        assert r.exit_code == 0, r.dump_logs()
        r.check_stats(1, 200)

    def test_07_34_issue_11194(self, env: Env, httpd, nghttpx, repeat):
        proto = 'h2'
        fdata = os.path.join(env.gen_dir, 'data-10m')
        # tell our test PUT handler to read the upload more slowly, so
        # that the send buffering and transfer loop needs to wait
        fdata = os.path.join(env.gen_dir, 'data-100k')
        url = f'https://{env.authority_for(env.domain1, proto)}/curltest/put'
        curl = CurlClient(env=env)
        r = curl.run_direct(with_stats=True, args=[
            '--verbose',
            '--resolve', f'{env.authority_for(env.domain1, proto)}:127.0.0.1',
            '--cacert', env.ca.cert_file,
            '--request', 'PUT',
            '--digest', '--user', 'test:test',
            '--data-binary', f'@{fdata}'
            '--url', url,
        ])
        assert r.exit_code == 0, r.dump_logs()
        r.check_stats(1, 200)

    def check_download(self, count, srcfile, curl):
        for i in range(count):
            dfile = curl.download_file(i)
            assert os.path.exists(dfile)
            if not filecmp.cmp(srcfile, dfile, shallow=False):
                diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
                                                    b=open(dfile).readlines(),
                                                    fromfile=srcfile,
                                                    tofile=dfile,
                                                    n=1))
                assert False, f'download {dfile} differs:\n{diff}'