mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-01-06 14:56:51 +08:00
f859cb732b
* fixed missed closing of a dataset * fixed missed closing of a dataset * fixed typo in error return * Committing clang-format changes * minor edits * code format * Committing clang-format changes * code format * minor edit * switched from using MPI_count, to actual bytes written for H5FD_mpio_debug rw debugging * Committing clang-format changes * changed size_i in printf to reflect the I/O. * Committing clang-format changes * Fixed seg fault with xlf on BE with -qintsize=8 * fixed error function string * spelling corrections via codespell, added new spell check github actions * Committing clang-format changes * misc * misc * misc * misc * misc * misc * misc * misc * misc * misc * misc * misc * misc * misc * Committing clang-format changes * misc * misc * misc * misc * misc * misc * Committing clang-format changes * misc * work around for https://github.com/codespell-project/codespell/issues/2137 * misc * added missing file * misc * misc. * misc * switch to using Codespell with GitHub Actions * misc. * misc. * fixed more sp errors * Fix new typos found by codespell. * fixed proceed with precede * fixed variable in fortran test * fixed minnum * updated spelling list Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Larry Knox <lrknox@hdfgroup.org>
563 lines
18 KiB
C
563 lines
18 KiB
C
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
* Copyright by The HDF Group. *
|
|
* All rights reserved. *
|
|
* *
|
|
* This file is part of HDF5. The full HDF5 copyright notice, including *
|
|
* terms governing use, modification, and redistribution, is contained in *
|
|
* the COPYING file, which can be found at the root of the source code *
|
|
* distribution tree, or in https://www.hdfgroup.org/licenses. *
|
|
* If you do not have access to either file, you may request a copy from *
|
|
* help@hdfgroup.org. *
|
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/*****************************************************************************
|
|
* Read-Only S3 Virtual File Driver (VFD)
|
|
*
|
|
* This is the header for the S3 Communications module
|
|
*
|
|
* ***NOT A FILE DRIVER***
|
|
*
|
|
* Purpose:
|
|
*
|
|
* - Provide structures and functions related to communicating with
|
|
* Amazon S3 (Simple Storage Service).
|
|
* - Abstract away the REST API (HTTP,
|
|
* networked communications) behind a series of uniform function calls.
|
|
* - Handle AWS4 authentication, if appropriate.
|
|
* - Fail predictably in event of errors.
|
|
* - Eventually, support more S3 operations, such as creating, writing to,
|
|
* and removing Objects remotely.
|
|
*
|
|
* translates:
|
|
* `read(some_file, bytes_offset, bytes_length, &dest_buffer);`
|
|
* to:
|
|
* ```
|
|
* GET myfile HTTP/1.1
|
|
* Host: somewhere.me
|
|
* Range: bytes=4096-5115
|
|
* ```
|
|
* and places received bytes from HTTP response...
|
|
* ```
|
|
* HTTP/1.1 206 Partial-Content
|
|
* Content-Range: 4096-5115/63239
|
|
*
|
|
* <bytes>
|
|
* ```
|
|
* ...in destination buffer.
|
|
*
|
|
* TODO: put documentation in a consistent place and point to it from here.
|
|
*
|
|
* Programmer: Jacob Smith
|
|
* 2017-11-30
|
|
*
|
|
*****************************************************************************/
|
|
|
|
#include "H5private.h" /* Generic Functions */
|
|
|
|
#ifdef H5_HAVE_ROS3_VFD
|
|
|
|
/* Necessary S3 headers */
|
|
#include <curl/curl.h>
|
|
#include <openssl/evp.h>
|
|
#include <openssl/hmac.h>
|
|
#include <openssl/sha.h>
|
|
|
|
/*****************
|
|
* PUBLIC MACROS *
|
|
*****************/
|
|
|
|
/* hexadecimal string of pre-computed sha256 checksum of the empty string
|
|
* hex(sha256sum(""))
|
|
*/
|
|
#define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
|
|
|
/* string length (plus null terminator)
|
|
* example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_')
|
|
*/
|
|
#define ISO8601_SIZE 17
|
|
|
|
/* string length (plus null terminator)
|
|
* example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT"
|
|
*/
|
|
#define RFC7231_SIZE 30
|
|
|
|
/*---------------------------------------------------------------------------
|
|
*
|
|
* Macro: ISO8601NOW()
|
|
*
|
|
* Purpose:
|
|
*
|
|
* write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest
|
|
* e.g., "20170630T204155Z"
|
|
*
|
|
* wrapper for strftime()
|
|
*
|
|
* It is left to the programmer to check return value of
|
|
* ISO8601NOW (should equal ISO8601_SIZE - 1).
|
|
*
|
|
*---------------------------------------------------------------------------
|
|
*/
|
|
#define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm))
|
|
|
|
/*---------------------------------------------------------------------------
|
|
*
|
|
* Macro: RFC7231NOW()
|
|
*
|
|
* Purpose:
|
|
*
|
|
* write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest
|
|
* e.g., "Fri, 30 Jun 2017 20:41:55 GMT"
|
|
*
|
|
* wrapper for strftime()
|
|
*
|
|
* It is left to the programmer to check return value of
|
|
* RFC7231NOW (should equal RFC7231_SIZE - 1).
|
|
*
|
|
*---------------------------------------------------------------------------
|
|
*/
|
|
#define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm))
|
|
|
|
/* Reasonable maximum length of a credential string.
|
|
* Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below).
|
|
* 17 <- "////aws4_request\0"
|
|
* 2 < "s3" (service)
|
|
* 8 <- "YYYYmmdd" (date)
|
|
* 128 <- (access_id)
|
|
* 155 :: sum
|
|
*/
|
|
#define S3COMMS_MAX_CREDENTIAL_SIZE 155
|
|
|
|
/*---------------------------------------------------------------------------
|
|
*
|
|
* Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL()
|
|
*
|
|
* Purpose:
|
|
*
|
|
* Format "S3 Credential" string from inputs, for AWS4.
|
|
*
|
|
* Wrapper for HDsnprintf().
|
|
*
|
|
* _HAS NO ERROR-CHECKING FACILITIES_
|
|
* It is left to programmer to ensure that return value confers success.
|
|
* e.g.,
|
|
* ```
|
|
* assert( S3COMMS_MAX_CREDENTIAL_SIZE >=
|
|
* S3COMMS_FORMAT_CREDENTIAL(...) );
|
|
* ```
|
|
*
|
|
* "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request"
|
|
* assuming that `dest` has adequate space.
|
|
*
|
|
* ALL inputs must be null-terminated strings.
|
|
*
|
|
* `access` should be the user's access key ID.
|
|
* `date` must be of format "YYYYmmdd".
|
|
* `region` should be relevant AWS region, i.e. "us-east-1".
|
|
* `service` should be "s3".
|
|
*
|
|
*---------------------------------------------------------------------------
|
|
*/
|
|
#define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \
|
|
HDsnprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date), \
|
|
(region), (service))
|
|
|
|
/*********************
|
|
* PUBLIC STRUCTURES *
|
|
*********************/
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*
|
|
* Structure: hrb_node_t
|
|
*
|
|
* HTTP Header Field Node
|
|
*
|
|
*
|
|
*
|
|
* Maintain a ordered (linked) list of HTTP Header fields.
|
|
*
|
|
* Provides efficient access and manipulation of a logical sequence of
|
|
* HTTP header fields, of particular use when composing an
|
|
* "S3 Canonical Request" for authentication.
|
|
*
|
|
* - The creation of a Canonical Request involves:
|
|
* - convert field names to lower case
|
|
* - sort by this lower-case name
|
|
* - convert ": " name-value separator in HTTP string to ":"
|
|
* - get sorted lowercase names without field or separator
|
|
*
|
|
* As HTTP headers allow headers in any order (excepting the case of multiple
|
|
* headers with the same name), the list ordering can be optimized for Canonical
|
|
* Request creation, suggesting alphabtical order. For more expedient insertion
|
|
* and removal of elements in the list, linked list seems preferable to a
|
|
* dynamically-expanding array. The usually-smaller number of entries (5 or
|
|
* fewer) makes performance overhead of traversing the list trivial.
|
|
*
|
|
* The above requirements of creating at Canonical Request suggests a reasonable
|
|
* trade-off of speed for space with the option to compute elements as needed
|
|
* or to have the various elements prepared and stored in the structure
|
|
* (e.g. name, value, lowername, concatenated name:value)
|
|
* The structure currently is implemented to pre-compute.
|
|
*
|
|
* At all times, the "first" node of the list should be the least,
|
|
* alphabetically. For all nodes, the `next` node should be either NULL or
|
|
* of greater alphabetical value.
|
|
*
|
|
* Each node contains its own header field information, plus a pointer to the
|
|
* next node.
|
|
*
|
|
* It is not allowed to have multiple nodes with the same _lowercase_ `name`s
|
|
* in the same list
|
|
* (i.e., name is case-insensitive for access and modification.)
|
|
*
|
|
* All data (`name`, `value`, `lowername`, and `cat`) are null-terminated
|
|
* strings allocated specifically for their node.
|
|
*
|
|
*
|
|
*
|
|
* `magic` (unsigned long)
|
|
*
|
|
* "unique" idenfier number for the structure type
|
|
*
|
|
* `name` (char *)
|
|
*
|
|
* Case-meaningful name of the HTTP field.
|
|
* Given case is how it is supplied to networking code.
|
|
* e.g., "Range"
|
|
*
|
|
* `lowername` (char *)
|
|
*
|
|
* Lowercase copy of name.
|
|
* e.g., "range"
|
|
*
|
|
* `value` (char *)
|
|
*
|
|
* Case-meaningful value of HTTP field.
|
|
* e.g., "bytes=0-9"
|
|
*
|
|
* `cat` (char *)
|
|
*
|
|
* Concatenated, null-terminated string of HTTP header line,
|
|
* as the field would appear in an HTTP request.
|
|
* e.g., "Range: bytes=0-9"
|
|
*
|
|
* `next` (hrb_node_t *)
|
|
*
|
|
* Pointers to next node in the list, or NULL sentinel as end of list.
|
|
* Next node must have a greater `lowername` as determined by strcmp().
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
typedef struct hrb_node_t {
|
|
unsigned long magic;
|
|
char * name;
|
|
char * value;
|
|
char * cat;
|
|
char * lowername;
|
|
struct hrb_node_t *next;
|
|
} hrb_node_t;
|
|
#define S3COMMS_HRB_NODE_MAGIC 0x7F5757UL
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*
|
|
* Structure: hrb_t
|
|
*
|
|
* HTTP Request Buffer structure
|
|
*
|
|
*
|
|
*
|
|
* Logically represent an HTTP request
|
|
*
|
|
* GET /myplace/myfile.h5 HTTP/1.1
|
|
* Host: over.rainbow.oz
|
|
* Date: Fri, 01 Dec 2017 12:35:04 CST
|
|
*
|
|
* <body>
|
|
*
|
|
* ...with fast, efficient access to and modification of primary and field
|
|
* elements.
|
|
*
|
|
* Structure for building HTTP requests while hiding much of the string
|
|
* processing required "under the hood."
|
|
*
|
|
* Information about the request target -- the first line -- and the body text,
|
|
* if any, are managed directly with this structure. All header fields, e.g.,
|
|
* "Host" and "Date" above, are created with a linked list of `hrb_node_t` and
|
|
* included in the request by a pointer to the head of the list.
|
|
*
|
|
*
|
|
*
|
|
* `magic` (unsigned long)
|
|
*
|
|
* "Magic" number confirming that this is an hrb_t structure and
|
|
* what operations are valid for it.
|
|
*
|
|
* Must be S3COMMS_HRB_MAGIC to be valid.
|
|
*
|
|
* `body` (char *) :
|
|
*
|
|
* Pointer to start of HTTP body.
|
|
*
|
|
* Can be NULL, in which case it is treated as the empty string, "".
|
|
*
|
|
* `body_len` (size_t) :
|
|
*
|
|
* Number of bytes (characters) in `body`. 0 if empty or NULL `body`.
|
|
*
|
|
* `first_header` (hrb_node_t *) :
|
|
*
|
|
* Pointer to first SORTED header node, if any.
|
|
* It is left to the programmer to ensure that this node and associated
|
|
* list is destroyed when done.
|
|
*
|
|
* `resource` (char *) :
|
|
*
|
|
* Pointer to resource URL string, e.g., "/folder/page.xhtml".
|
|
*
|
|
* `verb` (char *) :
|
|
*
|
|
* Pointer to HTTP verb string, e.g., "GET".
|
|
*
|
|
* `version` (char *) :
|
|
*
|
|
* Pointer to HTTP version string, e.g., "HTTP/1.1".
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
typedef struct {
|
|
unsigned long magic;
|
|
char * body;
|
|
size_t body_len;
|
|
hrb_node_t * first_header;
|
|
char * resource;
|
|
char * verb;
|
|
char * version;
|
|
} hrb_t;
|
|
#define S3COMMS_HRB_MAGIC 0x6DCC84UL
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*
|
|
* Structure: parsed_url_t
|
|
*
|
|
*
|
|
* Represent a URL with easily-accessed pointers to logical elements within.
|
|
* These elements (components) are stored as null-terminated strings (or just
|
|
* NULLs). These components should be allocated for the structure, making the
|
|
* data as safe as possible from modification. If a component is NULL, it is
|
|
* either implicit in or absent from the URL.
|
|
*
|
|
* "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value"
|
|
* ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^
|
|
* Scheme Host Port Resource Query/-ies
|
|
*
|
|
*
|
|
*
|
|
* `magic` (unsigned long)
|
|
*
|
|
* Structure identification and validation identifier.
|
|
* Identifies as `parsed_url_t` type.
|
|
*
|
|
* `scheme` (char *)
|
|
*
|
|
* String representing which protocol is to be expected.
|
|
* _Must_ be present.
|
|
* "http", "https", "ftp", e.g.
|
|
*
|
|
* `host` (char *)
|
|
*
|
|
* String of host, either domain name, IPv4, or IPv6 format.
|
|
* _Must_ be present.
|
|
* "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]"
|
|
*
|
|
* `port` (char *)
|
|
*
|
|
* String representation of specified port. Must resolve to a valid unsigned
|
|
* integer.
|
|
* "9000", "80"
|
|
*
|
|
* `path` (char *)
|
|
*
|
|
* Path to resource on host. If not specified, assumes root "/".
|
|
* "lollipop_guild.wav", "characters/witches/white.dat"
|
|
*
|
|
* `query` (char *)
|
|
*
|
|
* Single string of all query parameters in url (if any).
|
|
* "arg1=value1&arg2=value2"
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
typedef struct {
|
|
unsigned long magic;
|
|
char * scheme; /* required */
|
|
char * host; /* required */
|
|
char * port;
|
|
char * path;
|
|
char * query;
|
|
} parsed_url_t;
|
|
#define S3COMMS_PARSED_URL_MAGIC 0x21D0DFUL
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*
|
|
* Structure: s3r_t
|
|
*
|
|
*
|
|
*
|
|
* S3 request structure "handle".
|
|
*
|
|
* Holds persistent information for Amazon S3 requests.
|
|
*
|
|
* Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self.
|
|
*
|
|
* Intended to be re-used for operations on a remote object.
|
|
*
|
|
* Cleaned up through `H5FD_s3comms_s3r_close()`.
|
|
*
|
|
* _DO NOT_ share handle between threads: curl easy handle `curlhandle` has
|
|
* undefined behavior if called to perform in multiple threads.
|
|
*
|
|
*
|
|
*
|
|
* `magic` (unsigned long)
|
|
*
|
|
* "magic" number identifying this structure as unique type.
|
|
* MUST equal `S3R_MAGIC` to be valid.
|
|
*
|
|
* `curlhandle` (CURL)
|
|
*
|
|
* Pointer to the curl_easy handle generated for the request.
|
|
*
|
|
* `httpverb` (char *)
|
|
*
|
|
* Pointer to NULL-terminated string. HTTP verb,
|
|
* e.g. "GET", "HEAD", "PUT", etc.
|
|
*
|
|
* Default is NULL, resulting in a "GET" request.
|
|
*
|
|
* `purl` (parsed_url_t *)
|
|
*
|
|
* Pointer to structure holding the elements of URL for file open.
|
|
*
|
|
* e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2"
|
|
* parsed into...
|
|
* { scheme: "http"
|
|
* host: "bucket.aws.com"
|
|
* port: "8080"
|
|
* path: "myfile.dat"
|
|
* query: "q1=v1&q2=v2"
|
|
* }
|
|
*
|
|
* Cannot be NULL.
|
|
*
|
|
* `region` (char *)
|
|
*
|
|
* Pointer to NULL-terminated string, specifying S3 "region",
|
|
* e.g., "us-east-1".
|
|
*
|
|
* Required to authenticate.
|
|
*
|
|
* `secret_id` (char *)
|
|
*
|
|
* Pointer to NULL-terminated string for "secret" access id to S3 resource.
|
|
*
|
|
* Required to authenticate.
|
|
*
|
|
* `signing_key` (unsigned char *)
|
|
*
|
|
* Pointer to `SHA256_DIGEST_LENGTH`-long string for "re-usable" signing
|
|
* key, generated via
|
|
* `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>",
|
|
* "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")`
|
|
* which may be re-used for several (up to seven (7)) days from creation?
|
|
* Computed once upon file open.
|
|
*
|
|
* Required to authenticate.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
typedef struct {
|
|
unsigned long magic;
|
|
CURL * curlhandle;
|
|
size_t filesize;
|
|
char * httpverb;
|
|
parsed_url_t * purl;
|
|
char * region;
|
|
char * secret_id;
|
|
unsigned char *signing_key;
|
|
} s3r_t;
|
|
|
|
#define S3COMMS_S3R_MAGIC 0x44d8d79
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*******************************************
|
|
* DECLARATION OF HTTP FIELD LIST ROUTINES *
|
|
*******************************************/
|
|
|
|
H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value);
|
|
|
|
/***********************************************
|
|
* DECLARATION OF HTTP REQUEST BUFFER ROUTINES *
|
|
***********************************************/
|
|
|
|
H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf);
|
|
|
|
H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host);
|
|
|
|
/*************************************
|
|
* DECLARATION OF S3REQUEST ROUTINES *
|
|
*************************************/
|
|
|
|
H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle);
|
|
|
|
H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle);
|
|
|
|
H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[],
|
|
const unsigned char signing_key[]);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest);
|
|
|
|
/*********************************
|
|
* DECLARATION OF OTHER ROUTINES *
|
|
*********************************/
|
|
|
|
H5_DLL struct tm *gmnow(void);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size,
|
|
char *signed_headers_dest, int sh_size, hrb_t *http_request);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len,
|
|
hbool_t lowercase);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg,
|
|
size_t msg_len, char *dest);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out,
|
|
char *aws_region_out);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_nlowercase(char *dest, const char *s, size_t len);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_percent_encode_char(char *repr, const unsigned char c, size_t *repr_len);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region,
|
|
const char *iso8601now);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now,
|
|
const char *region);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_trim(char *dest, char *s, size_t s_len, size_t *n_written);
|
|
|
|
H5_DLL herr_t H5FD_s3comms_uriencode(char *dest, const char *s, size_t s_len, hbool_t encode_slash,
|
|
size_t *n_written);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* H5_HAVE_ROS3_VFD */
|