test: add test for HTTP/2 corruption as reported in #10525

- adding test_02_20 for reproducing the situation - using recently released mod_h2 Apache module - skipping test if an older version is installed - adding installation of current mod_h2 to github pytest workflow This reproduces the error reliable (for me) on the lib/http2.c version of curl 7.88.0. And passes with the recent curl master. Closes #10534
2025-01-18 14:04:30 +08:00 · 2023-02-16 14:09:16 +01:00 · 2023-02-16 14:09:16 +01:00 · 4437e3e344
commit 4437e3e344
parent 5479d9916e
4 changed files with 78 additions and 24 deletions
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@ -80,6 +80,15 @@ jobs:
        make install
      name: 'install nghttp2'

+    - run: |
+        git clone --depth=1 -b master https://github.com/icing/mod_h2
+        cd mod_h2
+        autoreconf -fi
+        ./configure PKG_CONFIG_PATH="$HOME/all/lib/pkgconfig"
+        make
+        sudo make install
+      name: 'install mod_h2'
+
    - uses: actions/checkout@v3

    - run: autoreconf -fi
--- a/tests/tests-httpd/test_02_download.py
+++ b/tests/tests-httpd/test_02_download.py
@ -24,6 +24,8 @@
 #
 ###########################################################################
 #
+import difflib
+import filecmp
 import logging
 import os
 import pytest
@ -43,21 +45,11 @@ class TestDownload:
        if env.have_h3():
            nghttpx.start_if_needed()

-    def _make_docs_file(self, docs_dir: str, fname: str, fsize: int):
-        fpath = os.path.join(docs_dir, fname)
-        data1k = 1024*'x'
-        flen = 0
-        with open(fpath, 'w') as fd:
-            while flen < fsize:
-                fd.write(data1k)
-                flen += len(data1k)
-        return flen
-
    @pytest.fixture(autouse=True, scope='class')
    def _class_scope(self, env, httpd):
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data1.data', fsize=1024*1024)
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data10.data', fsize=10*1024*1024)
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data100.data', fsize=100*1024*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-100k", fsize=100*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-1m", fsize=1024*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-10m", fsize=10*1024*1024)

    # download 1 file
    @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
@ -173,7 +165,7 @@ class TestDownload:
    def test_02_08_1MB_serial(self, env: Env,
                              httpd, nghttpx, repeat, proto):
        count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data1.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
        curl = CurlClient(env=env)
        r = curl.http_download(urls=[urln], alpn_proto=proto)
        assert r.exit_code == 0
@ -183,7 +175,7 @@ class TestDownload:
    def test_02_09_1MB_parallel(self, env: Env,
                              httpd, nghttpx, repeat, proto):
        count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data1.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
        curl = CurlClient(env=env)
        r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
            '--parallel'
@ -195,7 +187,7 @@ class TestDownload:
    def test_02_10_10MB_serial(self, env: Env,
                              httpd, nghttpx, repeat, proto):
        count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data10.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
        curl = CurlClient(env=env)
        r = curl.http_download(urls=[urln], alpn_proto=proto)
        assert r.exit_code == 0
@ -205,10 +197,53 @@ class TestDownload:
    def test_02_11_10MB_parallel(self, env: Env,
                              httpd, nghttpx, repeat, proto):
        count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data10.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
        curl = CurlClient(env=env)
        r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
            '--parallel'
        ])
        assert r.exit_code == 0
        r.check_stats(count=count, exp_status=200)
+
+    def test_02_20_h2_small_frames(self, env: Env, httpd, repeat):
+        # Test case to reproduce content corruption as observed in
+        # https://github.com/curl/curl/issues/10525
+        # To reliably reproduce, we need an Apache httpd that supports
+        # setting smaller frame sizes. This is not released yet, we
+        # test if it works and back out if not.
+        httpd.set_extra_config(env.domain1, lines=[
+            f'H2MaxDataFrameLen 1024',
+        ])
+        assert httpd.stop()
+        if not httpd.start():
+            # no, not supported, bail out
+            httpd.set_extra_config(env.domain1, lines=None)
+            assert httpd.start()
+            pytest.skip(f'H2MaxDataFrameLen not supported')
+        # ok, make 100 downloads with 2 parallel running and they
+        # are expected to stumble into the issue when using `lib/http2.c`
+        # from curl 7.88.0
+        count = 100
+        urln = f'https://{env.authority_for(env.domain1, "h2")}/data-1m?[0-{count-1}]'
+        curl = CurlClient(env=env)
+        r = curl.http_download(urls=[urln], alpn_proto="h2", extra_args=[
+            '--parallel', '--parallel-max', '2'
+        ])
+        assert r.exit_code == 0
+        r.check_stats(count=count, exp_status=200)
+        srcfile = os.path.join(httpd.docs_dir, 'data-1m')
+        for i in range(count):
+            dfile = curl.download_file(i)
+            assert os.path.exists(dfile)
+            if not filecmp.cmp(srcfile, dfile, shallow=False):
+                diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
+                                                    b=open(dfile).readlines(),
+                                                    fromfile=srcfile,
+                                                    tofile=dfile,
+                                                    n=1))
+                assert False, f'download {dfile} differs:\n{diff}'
+        # restore httpd defaults
+        httpd.set_extra_config(env.domain1, lines=None)
+        assert httpd.stop()
+        assert httpd.start()
+
--- a/tests/tests-httpd/testenv/curl.py
+++ b/tests/tests-httpd/testenv/curl.py
@ -209,6 +209,13 @@ class CurlClient:
        self._rmrf(self._run_dir)
        self._mkpath(self._run_dir)

+    @property
+    def run_dir(self) -> str:
+        return self._run_dir
+
+    def download_file(self, i: int) -> str:
+        return os.path.join(self.run_dir, f'download_{i}.data')
+
    def _rmf(self, path):
        if os.path.exists(path):
            return os.remove(path)
@ -232,8 +239,11 @@ class CurlClient:
        if extra_args is None:
            extra_args = []
        extra_args.extend([
-            '-o', 'download.data',
+            '-o', 'download_#1.data',
        ])
+        # remove any existing ones
+        for i in range(100):
+            self._rmf(self.download_file(i))
        if with_stats:
            extra_args.extend([
                '-w', '%{json}\\n'
--- a/tests/tests-httpd/testenv/httpd.py
+++ b/tests/tests-httpd/testenv/httpd.py
@ -307,12 +307,12 @@ class Httpd:
            ]))

    def _get_log_level(self):
-        if self.env.verbose > 3:
-            return 'trace2'
-        if self.env.verbose > 2:
-            return 'trace1'
-        if self.env.verbose > 1:
-            return 'debug'
+        #if self.env.verbose > 3:
+        #    return 'trace2'
+        #if self.env.verbose > 2:
+        #    return 'trace1'
+        #if self.env.verbose > 1:
+        #    return 'debug'
        return 'info'

    def _curltest_conf(self) -> List[str]: