/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 * Collective file open optimization tests
 *
 */

#include "testpar.h"
#include "H5Dprivate.h"

/* The collection of files is included below to aid
 * an external "cleanup" process if required.
 *
 * Note that the code below relies on the ordering of this array
 * since each set of three is used by the tests either to construct
 * or to read and validate.
 */
#define NFILENAME 3
const char *FILENAMES[NFILENAME + 1] = {"reloc_t_pread_data_file", "reloc_t_pread_group_0_file",
                                        "reloc_t_pread_group_1_file", NULL};
#define FILENAME_BUF_SIZE 1024

#define COUNT 1000

#define LIMIT_NPROC 6

bool               pass             = true;
static const char *random_hdf5_text = "Now is the time for all first-time-users of HDF5 to read their \
manual or go through the tutorials!\n\
While you\'re at it, now is also the time to read up on MPI-IO.";

static const char *hitchhiker_quote = "A common mistake that people make when trying to design something\n\
completely foolproof is to underestimate the ingenuity of complete\n\
fools.\n";

static int generate_test_file(MPI_Comm comm, int mpi_rank, int group);
static int test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group);

static char *test_argv0 = NULL;

/*-------------------------------------------------------------------------
 * Function:    generate_test_file
 *
 * Purpose:     This function is called to produce an HDF5 data file
 *              whose superblock is relocated to a power-of-2 boundary.
 *
 *              Since data will be read back and validated, we generate
 *              data in a predictable manner rather than randomly.
 *              For now, we simply use the global mpi_rank of the writing
 *              process as a starting component for the data generation.
 *              Subsequent writes are increments from the initial start
 *              value.
 *
 *              In the overall scheme of running the test, we'll call
 *              this function twice: first as a collection of all MPI
 *              processes and then a second time with the processes split
 *              more or less in half. Each sub group will operate
 *              collectively on their assigned file.  This split into
 *              subgroups validates that parallel groups can successfully
 *              open and read data independently from the other parallel
 *              operations taking place.
 *
 * Return:      Success: 0
 *
 *              Failure: 1
 *-------------------------------------------------------------------------
 */
static int
generate_test_file(MPI_Comm comm, int mpi_rank, int group_id)
{
    int         header         = -1;
    const char *fcn_name       = "generate_test_file()";
    const char *failure_mssg   = NULL;
    const char *group_filename = NULL;
    char        data_filename[FILENAME_BUF_SIZE];
    int         file_index = 0;
    int         group_size;
    int         group_rank;
    int         local_failure   = 0;
    int         global_failures = 0;
    hsize_t     count           = COUNT;
    hsize_t     i;
    hsize_t     offset;
    hsize_t     dims[1]    = {0};
    hid_t       file_id    = H5I_INVALID_HID;
    hid_t       memspace   = H5I_INVALID_HID;
    hid_t       filespace  = H5I_INVALID_HID;
    hid_t       fctmpl     = H5I_INVALID_HID;
    hid_t       fapl_id    = H5I_INVALID_HID;
    hid_t       dxpl_id    = H5I_INVALID_HID;
    hid_t       dset_id    = H5I_INVALID_HID;
    hid_t       dset_id_ch = H5I_INVALID_HID;
    hid_t       dcpl_id    = H5P_DEFAULT;
    hsize_t     chunk[1];
    float       nextValue;
    float      *data_slice = NULL;

    pass = true;

    assert(comm != MPI_COMM_NULL);

    if ((MPI_Comm_rank(comm, &group_rank)) != MPI_SUCCESS) {
        pass         = false;
        failure_mssg = "generate_test_file: MPI_Comm_rank failed.\n";
    }

    if ((MPI_Comm_size(comm, &group_size)) != MPI_SUCCESS) {
        pass         = false;
        failure_mssg = "generate_test_file: MPI_Comm_size failed.\n";
    }

    if (mpi_rank == 0) {

        fprintf(stdout, "Constructing test files...");
    }

    /* Setup the file names
     * The test specific filenames are stored as consecutive
     * array entries in the global 'FILENAMES' array above.
     * Here, we simply decide on the starting index for
     * file construction.  The reading portion of the test
     * will have a similar setup process...
     */
    if (pass) {
        if (comm == MPI_COMM_WORLD) { /* Test 1 */
            file_index = 0;
        }
        else if (group_id == 0) { /* Test 2 group 0 */
            file_index = 1;
        }
        else { /* Test 2 group 1 */
            file_index = 2;
        }

        /* The 'group_filename' is just a temp variable and
         * is used to call into the h5_fixname function. No
         * need to worry that we reassign it for each file!
         */
        group_filename = FILENAMES[file_index];
        assert(group_filename);

        /* Assign the 'data_filename' */
        if (h5_fixname(group_filename, H5P_DEFAULT, data_filename, sizeof(data_filename)) == NULL) {
            pass         = false;
            failure_mssg = "h5_fixname(0) failed.\n";
        }
    }

    /* setup data to write */
    if (pass) {
        if ((data_slice = (float *)malloc(COUNT * sizeof(float))) == NULL) {
            pass         = false;
            failure_mssg = "malloc of data_slice failed.\n";
        }
    }

    if (pass) {
        nextValue = (float)(mpi_rank * COUNT);

        for (i = 0; i < COUNT; i++) {
            data_slice[i] = nextValue;
            nextValue += 1;
        }
    }

    /* Initialize a file creation template */
    if (pass) {
        if ((fctmpl = H5Pcreate(H5P_FILE_CREATE)) < 0) {
            pass         = false;
            failure_mssg = "H5Pcreate(H5P_FILE_CREATE) failed.\n";
        }
        else if (H5Pset_userblock(fctmpl, 512) != SUCCEED) {
            pass         = false;
            failure_mssg = "H5Pset_userblock(,size) failed.\n";
        }
    }
    /* setup FAPL */
    if (pass) {
        if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {
            pass         = false;
            failure_mssg = "H5Pcreate(H5P_FILE_ACCESS) failed.\n";
        }
    }

    if (pass) {
        if ((H5Pset_fapl_mpio(fapl_id, comm, MPI_INFO_NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Pset_fapl_mpio() failed\n";
        }
    }

    /* create the data file */
    if (pass) {
        if ((file_id = H5Fcreate(data_filename, H5F_ACC_TRUNC, fctmpl, fapl_id)) < 0) {
            pass         = false;
            failure_mssg = "H5Fcreate() failed.\n";
        }
    }

    /* create and write the dataset */
    if (pass) {
        if ((dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0) {
            pass         = false;
            failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
        }
    }

    if (pass) {
        if ((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0) {
            pass         = false;
            failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
        }
    }

    if (pass) {
        dims[0] = COUNT;
        if ((memspace = H5Screate_simple(1, dims, NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Screate_simple(1, dims, NULL) failed (1).\n";
        }
    }

    if (pass) {
        dims[0] *= (hsize_t)group_size;
        if ((filespace = H5Screate_simple(1, dims, NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Screate_simple(1, dims, NULL) failed (2).\n";
        }
    }

    if (pass) {
        offset = (hsize_t)group_rank * (hsize_t)COUNT;
        if ((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, &offset, NULL, &count, NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Sselect_hyperslab() failed.\n";
        }
    }

    if (pass) {
        if ((dset_id = H5Dcreate2(file_id, "dataset0", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, H5P_DEFAULT,
                                  H5P_DEFAULT)) < 0) {
            pass         = false;
            failure_mssg = "H5Dcreate2() failed.\n";
        }
    }

    if (pass) {
        if ((H5Dwrite(dset_id, H5T_NATIVE_FLOAT, memspace, filespace, dxpl_id, data_slice)) < 0) {
            pass         = false;
            failure_mssg = "H5Dwrite() failed.\n";
        }
    }

    /* create a chunked dataset */
    chunk[0] = COUNT / 8;

    if (pass) {
        if ((dcpl_id = H5Pcreate(H5P_DATASET_CREATE)) < 0) {
            pass         = false;
            failure_mssg = "H5Pcreate() failed.\n";
        }
    }

    if (pass) {
        if ((H5Pset_chunk(dcpl_id, 1, chunk)) < 0) {
            pass         = false;
            failure_mssg = "H5Pset_chunk() failed.\n";
        }
    }

    if (pass) {

        if ((dset_id_ch = H5Dcreate2(file_id, "dataset0_chunked", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT,
                                     dcpl_id, H5P_DEFAULT)) < 0) {
            pass         = false;
            failure_mssg = "H5Dcreate2() failed.\n";
        }
    }

    if (pass) {
        if ((H5Dwrite(dset_id_ch, H5T_NATIVE_FLOAT, memspace, filespace, dxpl_id, data_slice)) < 0) {
            pass         = false;
            failure_mssg = "H5Dwrite() failed.\n";
        }
    }
    if (pass || (dcpl_id != -1)) {
        if (H5Pclose(dcpl_id) < 0) {
            pass         = false;
            failure_mssg = "H5Pclose(dcpl_id) failed.\n";
        }
    }

    if (pass || (dset_id_ch != -1)) {
        if (H5Dclose(dset_id_ch) < 0) {
            pass         = false;
            failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
        }
    }

    /* close file, etc. */
    if (pass || (dset_id != -1)) {
        if (H5Dclose(dset_id) < 0) {
            pass         = false;
            failure_mssg = "H5Dclose(dset_id) failed.\n";
        }
    }

    if (pass || (memspace != -1)) {
        if (H5Sclose(memspace) < 0) {
            pass         = false;
            failure_mssg = "H5Sclose(memspace) failed.\n";
        }
    }

    if (pass || (filespace != -1)) {
        if (H5Sclose(filespace) < 0) {
            pass         = false;
            failure_mssg = "H5Sclose(filespace) failed.\n";
        }
    }

    if (pass || (file_id != -1)) {
        if (H5Fclose(file_id) < 0) {
            pass         = false;
            failure_mssg = "H5Fclose(file_id) failed.\n";
        }
    }

    if (pass || (dxpl_id != -1)) {
        if (H5Pclose(dxpl_id) < 0) {
            pass         = false;
            failure_mssg = "H5Pclose(dxpl_id) failed.\n";
        }
    }

    if (pass || (fapl_id != -1)) {
        if (H5Pclose(fapl_id) < 0) {
            pass         = false;
            failure_mssg = "H5Pclose(fapl_id) failed.\n";
        }
    }

    if (pass || (fctmpl != -1)) {
        if (H5Pclose(fctmpl) < 0) {
            pass         = false;
            failure_mssg = "H5Pclose(fctmpl) failed.\n";
        }
    }

    /* Add a userblock to the head of the datafile.
     * We will use this to for a functional test of the
     * file open optimization.  This superblock
     * relocation is done by the rank 0 process associated
     * with the communicator being used.  For test 1, we
     * utilize MPI_COMM_WORLD, so group_rank 0 is the
     * same as mpi_rank 0.  For test 2 which utilizes
     * two groups resulting from an MPI_Comm_split, we
     * will have parallel groups and hence two
     * group_rank(0) processes. Each parallel group
     * will create a unique file with different text
     * headers and different data.
     */
    if (group_rank == 0) {
        const char *text_to_write;
        size_t      bytes_to_write;

        if (group_id == 0)
            text_to_write = random_hdf5_text;
        else
            text_to_write = hitchhiker_quote;

        bytes_to_write = strlen(text_to_write);

        if (pass) {
            if ((header = HDopen(data_filename, O_WRONLY)) < 0) {
                pass         = false;
                failure_mssg = "HDopen(data_filename, O_WRONLY) failed.\n";
            }
        }

        if (pass) {
            HDlseek(header, 0, SEEK_SET);
            if (HDwrite(header, text_to_write, bytes_to_write) < 0) {
                pass         = false;
                failure_mssg = "Unable to write user text into file.\n";
            }
        }

        if (pass || (header > 0)) {
            if (HDclose(header) < 0) {
                pass         = false;
                failure_mssg = "HDclose() failed.\n";
            }
        }
    }

    /* collect results from other processes.
     * Only overwrite the failure message if no previous error
     * has been detected
     */
    local_failure = (pass ? 0 : 1);

    /* This is a global all reduce (NOT group specific) */
    if (MPI_Allreduce(&local_failure, &global_failures, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) {
        if (pass) {
            pass         = false;
            failure_mssg = "MPI_Allreduce() failed.\n";
        }
    }
    else if ((pass) && (global_failures > 0)) {
        pass         = false;
        failure_mssg = "One or more processes report failure.\n";
    }

    /* report results */
    if (mpi_rank == 0) {
        if (pass) {
            fprintf(stdout, "Done.\n");
        }
        else {
            fprintf(stdout, "FAILED.\n");
            fprintf(stdout, "%s: failure_mssg = \"%s\"\n", fcn_name, failure_mssg);
        }
    }

    /* free data_slice if it has been allocated */
    if (data_slice != NULL) {
        free(data_slice);
        data_slice = NULL;
    }

    return (!pass);

} /* generate_test_file() */

/*-------------------------------------------------------------------------
 * Function:    test_parallel_read
 *
 * Purpose:     This actually tests the superblock optimization
 *              and covers the three primary cases we're interested in.
 *              1). That HDF5 files can be opened in parallel by
 *                  the rank 0 process and that the superblock
 *                  offset is correctly broadcast to the other
 *                  parallel file readers.
 *              2). That a parallel application can correctly
 *                  handle reading multiple files by using
 *                  subgroups of MPI_COMM_WORLD and that each
 *                  subgroup operates as described in (1) to
 *                  collectively read the data.
 *              3). Testing proc0-read-and-MPI_Bcast using
 *                  sub-communicators, and reading into
 *                  a memory space that is different from the
 *                  file space, and chunked datasets.
 *
 *              The global MPI rank is used for reading and
 *              writing data for process specific data in the
 *              dataset.  We do this rather simplistically, i.e.
 *               rank 0:  writes/reads 0-9999
 *               rank 1:  writes/reads 1000-1999
 *               rank 2:  writes/reads 2000-2999
 *               ...
 *
 * Return:      Success: 0
 *
 *              Failure: 1
 *-------------------------------------------------------------------------
 */
static int
test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group_id)
{
    const char *failure_mssg;
    const char *fcn_name       = "test_parallel_read()";
    const char *group_filename = NULL;
    char        reloc_data_filename[FILENAME_BUF_SIZE];
    int         local_failure   = 0;
    int         global_failures = 0;
    int         group_size;
    int         group_rank;
    hid_t       fapl_id    = H5I_INVALID_HID;
    hid_t       file_id    = H5I_INVALID_HID;
    hid_t       dset_id    = H5I_INVALID_HID;
    hid_t       dset_id_ch = H5I_INVALID_HID;
    hid_t       dxpl_id    = H5P_DEFAULT;
    hid_t       memspace   = H5I_INVALID_HID;
    hid_t       filespace  = H5I_INVALID_HID;
    hid_t       filetype   = H5I_INVALID_HID;
    size_t      filetype_size;
    hssize_t    dset_size;
    hsize_t     i;
    hsize_t     offset;
    hsize_t     count   = COUNT;
    hsize_t     dims[1] = {0};
    float       nextValue;
    float      *data_slice = NULL;

    pass = true;

    assert(comm != MPI_COMM_NULL);

    if ((MPI_Comm_rank(comm, &group_rank)) != MPI_SUCCESS) {
        pass         = false;
        failure_mssg = "test_parallel_read: MPI_Comm_rank failed.\n";
    }

    if ((MPI_Comm_size(comm, &group_size)) != MPI_SUCCESS) {
        pass         = false;
        failure_mssg = "test_parallel_read: MPI_Comm_size failed.\n";
    }

    if (mpi_rank == 0) {
        if (comm == MPI_COMM_WORLD) {
            TESTING("parallel file open test 1");
        }
        else {
            TESTING("parallel file open test 2");
        }
    }

    /* allocate space for the data_slice array */
    if (pass) {
        if ((data_slice = (float *)malloc(COUNT * sizeof(float))) == NULL) {
            pass         = false;
            failure_mssg = "malloc of data_slice failed.\n";
        }
    }

    /* Select the file file name to read
     * Please see the comments in the 'generate_test_file' function
     * for more details...
     */
    if (pass) {

        if (comm == MPI_COMM_WORLD) /* test 1 */
            group_filename = FILENAMES[0];
        else if (group_id == 0) /* test 2 group 0 */
            group_filename = FILENAMES[1];
        else /* test 2 group 1 */
            group_filename = FILENAMES[2];

        assert(group_filename);
        if (h5_fixname(group_filename, H5P_DEFAULT, reloc_data_filename, sizeof(reloc_data_filename)) ==
            NULL) {

            pass         = false;
            failure_mssg = "h5_fixname(1) failed.\n";
        }
    }

    /* setup FAPL */
    if (pass) {
        if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {
            pass         = false;
            failure_mssg = "H5Pcreate(H5P_FILE_ACCESS) failed.\n";
        }
    }

    if (pass) {
        if ((H5Pset_fapl_mpio(fapl_id, comm, MPI_INFO_NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Pset_fapl_mpio() failed\n";
        }
    }

    /* open the file -- should have user block, exercising the optimization */
    if (pass) {
        if ((file_id = H5Fopen(reloc_data_filename, H5F_ACC_RDONLY, fapl_id)) < 0) {
            pass         = false;
            failure_mssg = "H5Fopen() failed\n";
        }
    }

    /* open the data set */
    if (pass) {
        if ((dset_id = H5Dopen2(file_id, "dataset0", H5P_DEFAULT)) < 0) {
            pass         = false;
            failure_mssg = "H5Dopen2() failed\n";
        }
    }

    /* open the chunked data set */
    if (pass) {
        if ((dset_id_ch = H5Dopen2(file_id, "dataset0_chunked", H5P_DEFAULT)) < 0) {
            pass         = false;
            failure_mssg = "H5Dopen2() failed\n";
        }
    }

    /* setup memspace */
    if (pass) {
        dims[0] = count;
        if ((memspace = H5Screate_simple(1, dims, NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
        }
    }

    /* setup filespace */
    if (pass) {
        if ((filespace = H5Dget_space(dset_id)) < 0) {
            pass         = false;
            failure_mssg = "H5Dget_space(dataset) failed\n";
        }
    }

    if (pass) {
        offset = (hsize_t)group_rank * count;
        if ((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, &offset, NULL, &count, NULL)) < 0) {
            pass         = false;
            failure_mssg = "H5Sselect_hyperslab() failed\n";
        }
    }

    /* read this processes section of the data */
    if (pass) {
        if ((H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, filespace, H5P_DEFAULT, data_slice)) < 0) {
            pass         = false;
            failure_mssg = "H5Dread() failed\n";
        }
    }

    /* verify the data */
    if (pass) {
        nextValue = (float)((hsize_t)mpi_rank * count);
        i         = 0;
        while ((pass) && (i < count)) {
            /* what we really want is data_slice[i] != nextValue --
             * the following is a circumlocution to shut up the
             * the compiler.
             */
            if ((data_slice[i] > nextValue) || (data_slice[i] < nextValue)) {
                pass         = false;
                failure_mssg = "Unexpected dset contents.\n";
            }
            nextValue += 1;
            i++;
        }
    }

    if (pass || (memspace != -1)) {
        if (H5Sclose(memspace) < 0) {
            pass         = false;
            failure_mssg = "H5Sclose(memspace) failed.\n";
        }
    }

    if (pass || (filespace != -1)) {
        if (H5Sclose(filespace) < 0) {
            pass         = false;
            failure_mssg = "H5Sclose(filespace) failed.\n";
        }
    }

    /* free data_slice if it has been allocated */
    if (data_slice != NULL) {
        free(data_slice);
        data_slice = NULL;
    }

    /*
     * Test reading proc0-read-and-bcast with sub-communicators
     */

    /* Don't test with more than LIMIT_NPROC processes to avoid memory issues */

    if (group_size <= LIMIT_NPROC) {
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        bool prop_value;
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        if ((filespace = H5Dget_space(dset_id)) < 0) {
            pass         = false;
            failure_mssg = "H5Dget_space failed.\n";
        }

        if ((dset_size = H5Sget_simple_extent_npoints(filespace)) < 0) {
            pass         = false;
            failure_mssg = "H5Sget_simple_extent_npoints failed.\n";
        }

        if ((filetype = H5Dget_type(dset_id)) < 0) {
            pass         = false;
            failure_mssg = "H5Dget_type failed.\n";
        }

        if ((filetype_size = H5Tget_size(filetype)) == 0) {
            pass         = false;
            failure_mssg = "H5Tget_size failed.\n";
        }

        if (H5Tclose(filetype) < 0) {
            pass         = false;
            failure_mssg = "H5Tclose failed.\n";
        };

        if ((data_slice = (float *)malloc((size_t)dset_size * filetype_size)) == NULL) {
            pass         = false;
            failure_mssg = "malloc of data_slice failed.\n";
        }

        if (pass) {
            if ((dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0) {
                pass         = false;
                failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
            }
        }

        if (pass) {
            if ((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0) {
                pass         = false;
                failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
            }
        }

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
            if (H5Pinsert2(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, H5D_XFER_COLL_RANK0_BCAST_SIZE,
                           &prop_value, NULL, NULL, NULL, NULL, NULL, NULL) < 0) {
                pass         = false;
                failure_mssg = "H5Pinsert2() failed\n";
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        /* read H5S_ALL section */
        if (pass) {
            if ((H5Dread(dset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, dxpl_id, data_slice)) < 0) {
                pass         = false;
                failure_mssg = "H5Dread() failed\n";
            }
        }

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = false;
            if (H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
                pass         = false;
                failure_mssg = "H5Pget() failed\n";
            }
            if (pass) {
                if (prop_value != true) {
                    pass         = false;
                    failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
                }
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        /* verify the data */
        if (pass) {

            if (comm == MPI_COMM_WORLD) /* test 1 */
                nextValue = 0;
            else if (group_id == 0) /* test 2 group 0 */
                nextValue = 0;
            else /* test 2 group 1 */
                nextValue = (float)((hsize_t)(mpi_size / 2) * count);

            i = 0;
            while ((pass) && (i < (hsize_t)dset_size)) {
                /* what we really want is data_slice[i] != nextValue --
                 * the following is a circumlocution to shut up the
                 * the compiler.
                 */
                if ((data_slice[i] > nextValue) || (data_slice[i] < nextValue)) {
                    pass         = false;
                    failure_mssg = "Unexpected dset contents.\n";
                }
                nextValue += 1;
                i++;
            }
        }

        /* read H5S_ALL section for the chunked dataset */

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
            if (H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
                pass         = false;
                failure_mssg = "H5Pset() failed\n";
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        for (i = 0; i < (hsize_t)dset_size; i++) {
            data_slice[i] = 0;
        }
        if (pass) {
            if ((H5Dread(dset_id_ch, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, dxpl_id, data_slice)) < 0) {
                pass         = false;
                failure_mssg = "H5Dread() failed\n";
            }
        }

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = false;
            if (H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
                pass         = false;
                failure_mssg = "H5Pget() failed\n";
            }
            if (pass) {
                if (prop_value == true) {
                    pass         = false;
                    failure_mssg = "rank 0 Bcast optimization was mistakenly performed for chunked dataset\n";
                }
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        /* verify the data */
        if (pass) {

            if (comm == MPI_COMM_WORLD) /* test 1 */
                nextValue = 0;
            else if (group_id == 0) /* test 2 group 0 */
                nextValue = 0;
            else /* test 2 group 1 */
                nextValue = (float)((hsize_t)(mpi_size / 2) * count);

            i = 0;
            while ((pass) && (i < (hsize_t)dset_size)) {
                /* what we really want is data_slice[i] != nextValue --
                 * the following is a circumlocution to shut up the
                 * the compiler.
                 */
                if ((data_slice[i] > nextValue) || (data_slice[i] < nextValue)) {
                    pass         = false;
                    failure_mssg = "Unexpected chunked dset contents.\n";
                }
                nextValue += 1;
                i++;
            }
        }

        if (pass || (filespace != -1)) {
            if (H5Sclose(filespace) < 0) {
                pass         = false;
                failure_mssg = "H5Sclose(filespace) failed.\n";
            }
        }

        /* free data_slice if it has been allocated */
        if (data_slice != NULL) {
            free(data_slice);
            data_slice = NULL;
        }

        /*
         * Read an H5S_ALL filespace into a hyperslab defined memory space
         */

        if ((data_slice = (float *)malloc((size_t)(dset_size * 2) * filetype_size)) == NULL) {
            pass         = false;
            failure_mssg = "malloc of data_slice failed.\n";
        }

        /* setup memspace */
        if (pass) {
            dims[0] = (hsize_t)dset_size * 2;
            if ((memspace = H5Screate_simple(1, dims, NULL)) < 0) {
                pass         = false;
                failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
            }
        }
        if (pass) {
            offset = (hsize_t)dset_size;
            if ((H5Sselect_hyperslab(memspace, H5S_SELECT_SET, &offset, NULL, &offset, NULL)) < 0) {
                pass         = false;
                failure_mssg = "H5Sselect_hyperslab() failed\n";
            }
        }

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
            if (H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
                pass         = false;
                failure_mssg = "H5Pset() failed\n";
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        /* read this processes section of the data */
        if (pass) {
            if ((H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, H5S_ALL, dxpl_id, data_slice)) < 0) {
                pass         = false;
                failure_mssg = "H5Dread() failed\n";
            }
        }

#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
        if (pass) {
            prop_value = false;
            if (H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
                pass         = false;
                failure_mssg = "H5Pget() failed\n";
            }
            if (pass) {
                if (prop_value != true) {
                    pass         = false;
                    failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
                }
            }
        }
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */

        /* verify the data */
        if (pass) {

            if (comm == MPI_COMM_WORLD) /* test 1 */
                nextValue = 0;
            else if (group_id == 0) /* test 2 group 0 */
                nextValue = 0;
            else /* test 2 group 1 */
                nextValue = (float)((hsize_t)(mpi_size / 2) * count);

            i = (hsize_t)dset_size;
            while ((pass) && (i < (hsize_t)dset_size)) {
                /* what we really want is data_slice[i] != nextValue --
                 * the following is a circumlocution to shut up the
                 * the compiler.
                 */
                if ((data_slice[i] > nextValue) || (data_slice[i] < nextValue)) {
                    pass         = false;
                    failure_mssg = "Unexpected dset contents.\n";
                }
                nextValue += 1;
                i++;
            }
        }

        if (pass || (memspace != -1)) {
            if (H5Sclose(memspace) < 0) {
                pass         = false;
                failure_mssg = "H5Sclose(memspace) failed.\n";
            }
        }

        /* free data_slice if it has been allocated */
        if (data_slice != NULL) {
            free(data_slice);
            data_slice = NULL;
        }

        if (pass || (dxpl_id != -1)) {
            if (H5Pclose(dxpl_id) < 0) {
                pass         = false;
                failure_mssg = "H5Pclose(dxpl_id) failed.\n";
            }
        }
    }

    /* close file, etc. */
    if (pass || (dset_id != -1)) {
        if (H5Dclose(dset_id) < 0) {
            pass         = false;
            failure_mssg = "H5Dclose(dset_id) failed.\n";
        }
    }

    if (pass || (dset_id_ch != -1)) {
        if (H5Dclose(dset_id_ch) < 0) {
            pass         = false;
            failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
        }
    }

    if (pass || (file_id != -1)) {
        if (H5Fclose(file_id) < 0) {
            pass         = false;
            failure_mssg = "H5Fclose(file_id) failed.\n";
        }
    }

    if (pass || (fapl_id != -1)) {
        if (H5Pclose(fapl_id) < 0) {
            pass         = false;
            failure_mssg = "H5Pclose(fapl_id) failed.\n";
        }
    }

    /* collect results from other processes.
     * Only overwrite the failure message if no previous error
     * has been detected
     */
    local_failure = (pass ? 0 : 1);

    if (MPI_Allreduce(&local_failure, &global_failures, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) {
        if (pass) {
            pass         = false;
            failure_mssg = "MPI_Allreduce() failed.\n";
        }
    }
    else if ((pass) && (global_failures > 0)) {
        pass         = false;
        failure_mssg = "One or more processes report failure.\n";
    }

    /* report results and finish cleanup */
    if (group_rank == 0) {
        if (pass) {
            PASSED();
        }
        else {
            H5_FAILED();
            fprintf(stdout, "%s: failure_mssg = \"%s\"\n", fcn_name, failure_mssg);
        }
        HDremove(reloc_data_filename);
    }

    return (!pass);

} /* test_parallel_read() */

/*-------------------------------------------------------------------------
 * Function:    main
 *
 * Purpose:     To implement a parallel test which validates whether the
 *              new superblock lookup functionality is working correctly.
 *
 *              The test consists of creating two separate HDF datasets
 *              in which random text is inserted at the start of each
 *              file using the 'h5jam' application.  This forces the
 *              HDF5 file superblock to a non-zero offset.
 *              Having created the two independent files, we create two
 *              non-overlapping MPI groups, each of which is then tasked
 *              with the opening and validation of the data contained
 *              therein.
 *
 * Return:      Success: 0
 *              Failure: 1
 *-------------------------------------------------------------------------
 */

int
main(int argc, char **argv)
{
    int      nerrs       = 0;
    int      which_group = 0;
    int      mpi_rank;
    int      mpi_size;
    int      split_size;
    MPI_Comm group_comm = MPI_COMM_NULL;

    /* I don't believe that argv[0] can ever be NULL.
     * It should thus be safe to duplicate and save as a check
     * for CMake testing. Note that in our CMake builds,
     * all executables are located in the same directory.
     * We assume (but we'll check) that the h5jam utility
     * is in the directory as this executable.  If that
     * isn't true, then we can use a relative path that
     * should be valid for the Autotools environment.
     */
    test_argv0 = strdup(argv[0]);

    if ((MPI_Init(&argc, &argv)) != MPI_SUCCESS) {
        fprintf(stderr, "FATAL: Unable to initialize MPI\n");
        exit(EXIT_FAILURE);
    }

    if ((MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank)) != MPI_SUCCESS) {
        fprintf(stderr, "FATAL: MPI_Comm_rank returned an error\n");
        exit(EXIT_FAILURE);
    }

    if ((MPI_Comm_size(MPI_COMM_WORLD, &mpi_size)) != MPI_SUCCESS) {
        fprintf(stderr, "FATAL: MPI_Comm_size returned an error\n");
        exit(EXIT_FAILURE);
    }

    H5open();

    if (mpi_rank == 0) {
        fprintf(stdout, "========================================\n");
        fprintf(stdout, "Collective file open optimization tests\n");
        fprintf(stdout, "        mpi_size     = %d\n", mpi_size);
        fprintf(stdout, "========================================\n");
    }

    if (mpi_size < 3) {

        if (mpi_rank == 0) {

            printf("    Need at least 3 processes.  Exiting.\n");
        }
        goto finish;
    }

    /* ------  Create two (2) MPI groups  ------
     *
     * We split MPI_COMM_WORLD into 2 more or less equal sized
     * groups.  The resulting communicators will be used to generate
     * two HDF files which in turn will be opened in parallel and the
     * contents verified in the second read test below.
     */
    split_size  = mpi_size / 2;
    which_group = (mpi_rank < split_size ? 0 : 1);

    if ((MPI_Comm_split(MPI_COMM_WORLD, which_group, 0, &group_comm)) != MPI_SUCCESS) {

        fprintf(stderr, "FATAL: MPI_Comm_split returned an error\n");
        exit(EXIT_FAILURE);
    }

    /* ------  Generate all files ------ */

    /* We generate the file used for test 1 */
    nerrs += generate_test_file(MPI_COMM_WORLD, mpi_rank, which_group);

    if (nerrs > 0) {
        if (mpi_rank == 0) {
            printf("    Test(1) file construction failed -- skipping tests.\n");
        }
        goto finish;
    }

    /* We generate the file used for test 2 */
    nerrs += generate_test_file(group_comm, mpi_rank, which_group);

    if (nerrs > 0) {
        if (mpi_rank == 0) {
            printf("    Test(2) file construction failed -- skipping tests.\n");
        }
        goto finish;
    }

    /* Now read the generated test file (still using MPI_COMM_WORLD) */
    nerrs += test_parallel_read(MPI_COMM_WORLD, mpi_rank, mpi_size, which_group);

    if (nerrs > 0) {
        if (mpi_rank == 0) {
            printf("    Parallel read test(1) failed -- skipping tests.\n");
        }
        goto finish;
    }

    /* Update the user on our progress so far. */
    if (mpi_rank == 0) {
        printf("    Test 1 of 2 succeeded\n");
        printf("    -- Starting multi-group parallel read test.\n");
    }

    /* run the 2nd set of tests */
    nerrs += test_parallel_read(group_comm, mpi_rank, mpi_size, which_group);

    if (nerrs > 0) {
        if (mpi_rank == 0) {
            printf("    Multi-group read test(2) failed\n");
        }
        goto finish;
    }

    if (mpi_rank == 0) {
        printf("    Test 2 of 2 succeeded\n");
    }

finish:

    if ((group_comm != MPI_COMM_NULL) && (MPI_Comm_free(&group_comm)) != MPI_SUCCESS) {
        fprintf(stderr, "MPI_Comm_free failed!\n");
    }

    /* make sure all processes are finished before final report, cleanup
     * and exit.
     */
    MPI_Barrier(MPI_COMM_WORLD);

    if (mpi_rank == 0) { /* only process 0 reports */
        const char *header = "Collective file open optimization tests";

        fprintf(stdout, "===================================\n");
        if (nerrs > 0) {
            fprintf(stdout, "***%s detected %d failures***\n", header, nerrs);
        }
        else {
            fprintf(stdout, "%s finished with no failures\n", header);
        }
        fprintf(stdout, "===================================\n");
    }

    /* close HDF5 library */
    if (H5close() != SUCCEED) {
        fprintf(stdout, "H5close() failed. (Ignoring)\n");
    }

    /* MPI_Finalize must be called AFTER H5close which may use MPI calls */
    MPI_Finalize();

    /* cannot just return (nerrs) because exit code is limited to 1byte */
    return ((nerrs > 0) ? EXIT_FAILURE : EXIT_SUCCESS);

} /* main() */