curl/tests/http/test_30_vsftpd.py
Stefan Eissing c9b95c0bb3
lib: graceful connection shutdown
When libcurl discards a connection there are two phases this may go
through: "shutdown" and "closing". If a connection is aborted, the
shutdown phase is skipped and it is closed right away.

The connection filters attached to the connection implement the phases
in their `do_shutdown()` and `do_close()` callbacks. Filters carry now a
`shutdown` flags next to `connected` to keep track of the shutdown
operation.

Filters are shut down from top to bottom. If a filter is not connected,
its shutdown is skipped. Notable filters that *do* something during
shutdown are HTTP/2 and TLS. HTTP/2 sends the GOAWAY frame. TLS sends
its close notify and expects to receive a close notify from the server.

As sends and receives may EAGAIN on the network, a shutdown is often not
successful right away and needs to poll the connection's socket(s). To
facilitate this, such connections are placed on a new shutdown list
inside the connection cache.

Since managing this list requires the cooperation of a multi handle,
only the connection cache belonging to a multi handle is used. If a
connection was in another cache when being discarded, it is removed
there and added to the multi's cache. If no multi handle is available at
that time, the connection is shutdown and closed in a one-time,
best-effort attempt.

When a multi handle is destroyed, all connection still on the shutdown
list are discarded with a final shutdown attempt and close. In curl
debug builds, the environment variable `CURL_GRACEFUL_SHUTDOWN` can be
set to make this graceful with a timeout in milliseconds given by the
variable.

The shutdown list is limited to the max number of connections configured
for a multi cache. Set via CURLMOPT_MAX_TOTAL_CONNECTIONS. When the
limit is reached, the oldest connection on the shutdown list is
discarded.

- In multi_wait() and multi_waitfds(), collect all connection caches
  involved (each transfer might carry its own) into a temporary list.
  Let each connection cache on the list contribute sockets and
  POLLIN/OUT events it's connections are waiting for.

- in multi_perform() collect the connection caches the same way and let
  them peform their maintenance. This will make another non-blocking
  attempt to shutdown all connections on its shutdown list.

- for event based multis (multi->socket_cb set), add the sockets and
  their poll events via the callback. When `multi_socket()` is invoked
  for a socket not known by an active transfer, forward this to the
  multi's cache for processing. On closing a connection, remove its
  socket(s) via the callback.

TLS connection filters MUST NOT send close nofity messages in their
`do_close()` implementation. The reason is that a TLS close notify
signals a success. When a connection is aborted and skips its shutdown
phase, the server needs to see a missing close notify to detect
something has gone wrong.

A graceful shutdown of FTP's data connection is performed implicitly
before regarding the upload/download as complete and continuing on the
control connection. For FTP without TLS, there is just the socket close
happening. But with TLS, the sent/received close notify signals that the
transfer is complete and healthy. Servers like `vsftpd` verify that and
reject uploads without a TLS close notify.

- added test_19_* for shutdown related tests
- test_19_01 and test_19_02 test for TCP RST packets
  which happen without a graceful shutdown and should
  no longer appear otherwise.
- add test_19_03 for handling shutdowns by the server
- add test_19_04 for handling shutdowns by curl
- add test_19_05 for event based shutdowny by server
- add test_30_06/07 and test_31_06/07 for shutdown checks
  on FTP up- and downloads.

Closes #13976
2024-06-26 08:33:17 +02:00

191 lines
8.1 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
#
import difflib
import filecmp
import logging
import os
import shutil
import pytest
from testenv import Env, CurlClient, VsFTPD
log = logging.getLogger(__name__)
@pytest.mark.skipif(condition=not Env.has_vsftpd(), reason=f"missing vsftpd")
class TestVsFTPD:
@pytest.fixture(autouse=True, scope='class')
def vsftpd(self, env):
vsftpd = VsFTPD(env=env)
assert vsftpd.start()
yield vsftpd
vsftpd.stop()
def _make_docs_file(self, docs_dir: str, fname: str, fsize: int):
fpath = os.path.join(docs_dir, fname)
data1k = 1024*'x'
flen = 0
with open(fpath, 'w') as fd:
while flen < fsize:
fd.write(data1k)
flen += len(data1k)
return flen
@pytest.fixture(autouse=True, scope='class')
def _class_scope(self, env, vsftpd):
if os.path.exists(vsftpd.docs_dir):
shutil.rmtree(vsftpd.docs_dir)
if not os.path.exists(vsftpd.docs_dir):
os.makedirs(vsftpd.docs_dir)
self._make_docs_file(docs_dir=vsftpd.docs_dir, fname='data-1k', fsize=1024)
self._make_docs_file(docs_dir=vsftpd.docs_dir, fname='data-10k', fsize=10*1024)
self._make_docs_file(docs_dir=vsftpd.docs_dir, fname='data-1m', fsize=1024*1024)
self._make_docs_file(docs_dir=vsftpd.docs_dir, fname='data-10m', fsize=10*1024*1024)
env.make_data_file(indir=env.gen_dir, fname="upload-1k", fsize=1024)
env.make_data_file(indir=env.gen_dir, fname="upload-100k", fsize=100*1024)
env.make_data_file(indir=env.gen_dir, fname="upload-1m", fsize=1024*1024)
def test_30_01_list_dir(self, env: Env, vsftpd: VsFTPD, repeat):
curl = CurlClient(env=env)
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/'
r = curl.ftp_get(urls=[url], with_stats=True)
r.check_stats(count=1, http_status=226)
lines = open(os.path.join(curl.run_dir, 'download_#1.data')).readlines()
assert len(lines) == 4, f'list: {lines}'
# download 1 file, no SSL
@pytest.mark.parametrize("docname", [
'data-1k', 'data-1m', 'data-10m'
])
def test_30_02_download_1(self, env: Env, vsftpd: VsFTPD, docname, repeat):
curl = CurlClient(env=env)
srcfile = os.path.join(vsftpd.docs_dir, f'{docname}')
count = 1
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/{docname}?[0-{count-1}]'
r = curl.ftp_get(urls=[url], with_stats=True)
r.check_stats(count=count, http_status=226)
self.check_downloads(curl, srcfile, count)
@pytest.mark.parametrize("docname", [
'data-1k', 'data-1m', 'data-10m'
])
def test_30_03_download_10_serial(self, env: Env, vsftpd: VsFTPD, docname, repeat):
curl = CurlClient(env=env)
srcfile = os.path.join(vsftpd.docs_dir, f'{docname}')
count = 10
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/{docname}?[0-{count-1}]'
r = curl.ftp_get(urls=[url], with_stats=True)
r.check_stats(count=count, http_status=226)
self.check_downloads(curl, srcfile, count)
@pytest.mark.parametrize("docname", [
'data-1k', 'data-1m', 'data-10m'
])
def test_30_04_download_10_parallel(self, env: Env, vsftpd: VsFTPD, docname, repeat):
curl = CurlClient(env=env)
srcfile = os.path.join(vsftpd.docs_dir, f'{docname}')
count = 10
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/{docname}?[0-{count-1}]'
r = curl.ftp_get(urls=[url], with_stats=True, extra_args=[
'--parallel'
])
r.check_stats(count=count, http_status=226)
self.check_downloads(curl, srcfile, count)
@pytest.mark.parametrize("docname", [
'upload-1k', 'upload-100k', 'upload-1m'
])
def test_30_05_upload_1(self, env: Env, vsftpd: VsFTPD, docname, repeat):
curl = CurlClient(env=env)
srcfile = os.path.join(env.gen_dir, docname)
dstfile = os.path.join(vsftpd.docs_dir, docname)
self._rmf(dstfile)
count = 1
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/'
r = curl.ftp_upload(urls=[url], fupload=f'{srcfile}', with_stats=True)
r.check_stats(count=count, http_status=226)
self.check_upload(env, vsftpd, docname=docname)
def _rmf(self, path):
if os.path.exists(path):
return os.remove(path)
# check with `tcpdump` if curl causes any TCP RST packets
@pytest.mark.skipif(condition=not Env.tcpdump(), reason="tcpdump not available")
def test_30_06_shutdownh_download(self, env: Env, vsftpd: VsFTPD, repeat):
docname = 'data-1k'
curl = CurlClient(env=env)
count = 1
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/{docname}?[0-{count-1}]'
r = curl.ftp_get(urls=[url], with_stats=True, with_tcpdump=True)
r.check_stats(count=count, http_status=226)
assert r.tcpdump
assert len(r.tcpdump.stats) == 0, f'Unexpected TCP RSTs packets'
# check with `tcpdump` if curl causes any TCP RST packets
@pytest.mark.skipif(condition=not Env.tcpdump(), reason="tcpdump not available")
def test_30_07_shutdownh_upload(self, env: Env, vsftpd: VsFTPD, repeat):
docname = 'upload-1k'
curl = CurlClient(env=env)
srcfile = os.path.join(env.gen_dir, docname)
dstfile = os.path.join(vsftpd.docs_dir, docname)
self._rmf(dstfile)
count = 1
url = f'ftp://{env.ftp_domain}:{vsftpd.port}/'
r = curl.ftp_upload(urls=[url], fupload=f'{srcfile}', with_stats=True, with_tcpdump=True)
r.check_stats(count=count, http_status=226)
assert r.tcpdump
assert len(r.tcpdump.stats) == 0, f'Unexpected TCP RSTs packets'
def check_downloads(self, client, srcfile: str, count: int,
complete: bool = True):
for i in range(count):
dfile = client.download_file(i)
assert os.path.exists(dfile)
if complete and not filecmp.cmp(srcfile, dfile, shallow=False):
diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
b=open(dfile).readlines(),
fromfile=srcfile,
tofile=dfile,
n=1))
assert False, f'download {dfile} differs:\n{diff}'
def check_upload(self, env, vsftpd: VsFTPD, docname):
srcfile = os.path.join(env.gen_dir, docname)
dstfile = os.path.join(vsftpd.docs_dir, docname)
assert os.path.exists(srcfile)
assert os.path.exists(dstfile)
if not filecmp.cmp(srcfile, dstfile, shallow=False):
diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
b=open(dstfile).readlines(),
fromfile=srcfile,
tofile=dstfile,
n=1))
assert False, f'upload {dstfile} differs:\n{diff}'