[svn-r24480] Description:

Bring r24479 from 1.8 branch to trunk:

    Fix ph5diff worker command handling so it can correctly respond to
situations where there's no work to perform.  Also, a few minor code cleanups.

Tested on:
    Mac OSX/64 10.9.0 (amazon) w/parallel
    (too minor to require h5committest)
This commit is contained in:
Quincey Koziol 2013-11-30 13:10:36 -05:00
parent 0a506a625b
commit 2d79ba29ed
3 changed files with 167 additions and 162 deletions

View File

@ -13,11 +13,12 @@
* access to either file, you may request a copy from help@hdfgroup.org. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include "h5diff.h"
#include "ph5diff.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "H5private.h"
#include "h5diff.h"
#include "ph5diff.h"
#include "h5diff_common.h"
#include "h5tools.h"
#include "h5tools_utils.h"
@ -30,9 +31,9 @@ static void ph5diff_worker(int );
/*-------------------------------------------------------------------------
* Function: main
*
* Purpose: h5diff/ph5diff main program
* Purpose: ph5diff main program
*
* Return: An exit status of 0 means no differences were found, 1 means some
* Return: An exit status of 0 means no differences were found, 1 means some
* differences were found.
*
* Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu
@ -41,16 +42,6 @@ static void ph5diff_worker(int );
*
* Comments:
*
* Modifications: July 2004
* Introduced the four modes:
* Normal mode: print the number of differences found and where they occured
* Report mode: print the above plus the differences
* Verbose mode: print the above plus a list of objects and warnings
* Quiet mode: do not print output
*
* November 2004: Leon Arber (larber@uiuc.edu)
* Additions that allow h5diff to be run in parallel
*
* This function drives the diff process and will do a serial or parallel diff depending
* on the value of the global variable g_Parallel (default is 0), set to 1 when the program
* is run as "ph5diff"
@ -140,112 +131,165 @@ int main(int argc, const char *argv[])
static void
ph5diff_worker(int nID)
{
struct diff_mpi_args args;
hid_t file1_id, file2_id;
char filenames[2][MAX_FILENAME];
char out_data[PRINT_DATA_MAX_SIZE] = {0};
struct diffs_found diffs;
int i;
MPI_Status Status;
hid_t file1_id = -1, file2_id = -1;
outBuffOffset = 0;
MPI_Recv(filenames, MAX_FILENAME*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
if(Status.MPI_TAG == MPI_TAG_PARALLEL)
while(1)
{
/* disable error reporting */
H5E_BEGIN_TRY
MPI_Status Status;
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
/* Check for filenames */
if(Status.MPI_TAG == MPI_TAG_PARALLEL)
{
/* Open the files */
if ((file1_id = H5Fopen (filenames[0], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
char filenames[2][MAX_FILENAME];
/* Retrieve filenames */
MPI_Recv(filenames, MAX_FILENAME*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
/* disable error reporting */
H5E_BEGIN_TRY
{
printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[0]);
MPI_Abort(MPI_COMM_WORLD, 0);
}
if ((file2_id = H5Fopen (filenames[1], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
{
printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[1]);
MPI_Abort(MPI_COMM_WORLD, 0);
}
/* enable error reporting */
}
H5E_END_TRY;
while(1)
{
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
if(Status.MPI_TAG == MPI_TAG_ARGS)
{
/*Recv parameters for diff from manager task */
MPI_Recv(&args, sizeof(args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status);
/*Do the diff */
diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), &(args.argdata));
diffs.not_cmp = args.options.not_cmp;
/*If print buffer has something in it, request print token.*/
if(outBuffOffset>0)
/* Open the files */
if ((file1_id = H5Fopen (filenames[0], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
{
MPI_Send(NULL, 0, MPI_BYTE, 0, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD);
/*Wait for print token. */
MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD, &Status);
/*When get token, send all of our output to the manager task and then return the token */
for(i=0; i<outBuffOffset; i+=PRINT_DATA_MAX_SIZE)
MPI_Send(outBuff+i, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
/* An overflow file exists, so we send it's output to the manager too and then delete it */
if(overflow_file)
{
int tmp;
memset(out_data, 0, PRINT_DATA_MAX_SIZE);
i=0;
rewind(overflow_file);
while((tmp = getc(overflow_file)) >= 0)
{
*(out_data + i++) = (char)tmp;
if(i==PRINT_DATA_MAX_SIZE)
{
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
i=0;
memset(out_data, 0, PRINT_DATA_MAX_SIZE);
}
}
if(i>0)
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
fclose(overflow_file);
overflow_file = NULL;
}
fflush(stdout);
memset(outBuff, 0, OUTBUFF_SIZE);
outBuffOffset = 0;
MPI_Send(&diffs, sizeof(diffs), MPI_BYTE, 0, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD);
printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[0]);
MPI_Abort(MPI_COMM_WORLD, 0);
}
else
MPI_Send(&diffs, sizeof(diffs), MPI_BYTE, 0, MPI_TAG_DONE, MPI_COMM_WORLD);
if ((file2_id = H5Fopen (filenames[1], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
{
printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[1]);
MPI_Abort(MPI_COMM_WORLD, 0);
}
/* enable error reporting */
}
else if(Status.MPI_TAG == MPI_TAG_END)
H5E_END_TRY;
}
/* Check for work */
else if(Status.MPI_TAG == MPI_TAG_ARGS)
{
struct diff_mpi_args args;
struct diffs_found diffs;
int i;
/* Make certain we've received the filenames and opened the files already */
if(file1_id < 0 || file2_id < 0)
{
MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_END, MPI_COMM_WORLD, &Status);
/* printf("exiting..., task: %d\n", nID);
fflush(stdout);*/
printf("ph5diff_worker: ERROR: work received before/without filenames\n");
break;
}
else
{
printf("ph5diff_worker: ERROR: invalid tag (%d) received\n", Status.MPI_TAG);
MPI_Abort(MPI_COMM_WORLD, 0);
}
/* Recv parameters for diff from manager task */
MPI_Recv(&args, sizeof(args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status);
/* Do the diff */
diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), &(args.argdata));
diffs.not_cmp = args.options.not_cmp;
/* If print buffer has something in it, request print token.*/
if(outBuffOffset>0)
{
MPI_Send(NULL, 0, MPI_BYTE, 0, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD);
/* Wait for print token. */
MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD, &Status);
/* When get token, send all of our output to the manager task and then return the token */
for(i=0; i<outBuffOffset; i+=PRINT_DATA_MAX_SIZE)
MPI_Send(outBuff+i, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
/* An overflow file exists, so we send it's output to the manager too and then delete it */
if(overflow_file)
{
char out_data[PRINT_DATA_MAX_SIZE];
int tmp;
memset(out_data, 0, PRINT_DATA_MAX_SIZE);
i=0;
rewind(overflow_file);
while((tmp = getc(overflow_file)) >= 0)
{
*(out_data + i++) = (char)tmp;
if(i==PRINT_DATA_MAX_SIZE)
{
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
i=0;
memset(out_data, 0, PRINT_DATA_MAX_SIZE);
}
}
if(i>0)
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA, MPI_COMM_WORLD);
fclose(overflow_file);
overflow_file = NULL;
}
fflush(stdout);
memset(outBuff, 0, OUTBUFF_SIZE);
outBuffOffset = 0;
MPI_Send(&diffs, sizeof(diffs), MPI_BYTE, 0, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD);
}
else
MPI_Send(&diffs, sizeof(diffs), MPI_BYTE, 0, MPI_TAG_DONE, MPI_COMM_WORLD);
}
/* Check for leaving */
else if(Status.MPI_TAG == MPI_TAG_END)
{
MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_END, MPI_COMM_WORLD, &Status);
break;
}
else
{
printf("ph5diff_worker: ERROR: invalid tag (%d) received\n", Status.MPI_TAG);
break;
}
}
return;
}
/*-------------------------------------------------------------------------
* Function: print_manager_output
*
* Purpose: special function that prints any output accumulated by the
* manager task.
*
* Return: none
*
* Programmer: Leon Arber
*
* Date: Feb 7, 2005
*
*-------------------------------------------------------------------------
*/
void print_manager_output(void)
{
/* If there was something we buffered, let's print it now */
if( (outBuffOffset>0) && g_Parallel)
{
printf("%s", outBuff);
if(overflow_file)
{
int tmp;
rewind(overflow_file);
while((tmp = getc(overflow_file)) >= 0)
putchar(tmp);
fclose(overflow_file);
overflow_file = NULL;
}
HDfflush(stdout);
HDmemset(outBuff, 0, OUTBUFF_SIZE);
outBuffOffset = 0;
}
else if( (outBuffOffset>0) && !g_Parallel)
{
HDfprintf(stderr, "h5diff error: outBuffOffset>0, but we're not in parallel!\n");
}
}
@ -268,12 +312,14 @@ ph5diff_worker(int nID)
void h5diff_exit(int status)
{
/* if in parallel mode, dismiss workers, close down MPI, then exit */
if((g_nTasks > 1) && g_Parallel) {
phdiff_dismiss_workers();
MPI_Barrier(MPI_COMM_WORLD);
}
if(g_Parallel)
if(g_Parallel) {
if(g_nTasks > 1) {
phdiff_dismiss_workers();
MPI_Barrier(MPI_COMM_WORLD);
}
MPI_Finalize();
status = EXIT_SUCCESS; /* Reset exit status, since some mpiexec commands generate output on failure status */
}
/* Always exit(0), since MPI implementations do weird stuff when they
* receive a non-zero exit value. - QAK

View File

@ -105,47 +105,6 @@ void phdiff_dismiss_workers(void)
}
/*-------------------------------------------------------------------------
* Function: print_manager_output
*
* Purpose: special function that prints any output accumulated by the
* manager task.
*
* Return: none
*
* Programmer: Leon Arber
*
* Date: Feb 7, 2005
*
*-------------------------------------------------------------------------
*/
void print_manager_output(void)
{
/* If there was something we buffered, let's print it now */
if( (outBuffOffset>0) && g_Parallel)
{
printf("%s", outBuff);
if(overflow_file)
{
int tmp;
rewind(overflow_file);
while((tmp = getc(overflow_file)) >= 0)
putchar(tmp);
fclose(overflow_file);
overflow_file = NULL;
}
HDfflush(stdout);
HDmemset(outBuff, 0, OUTBUFF_SIZE);
outBuffOffset = 0;
}
else if( (outBuffOffset>0) && !g_Parallel)
{
HDfprintf(stderr, "h5diff error: outBuffOffset>0, but we're not in parallel!\n");
}
}
/*-------------------------------------------------------------------------
* Function: print_incoming_data
*

View File

@ -17,18 +17,18 @@
#define _PH5DIFF_H__
/* Send from manager to workers */
#define MPI_TAG_ARGS 1
#define MPI_TAG_PRINT_TOK 2
#define MPI_TAG_ARGS 1
#define MPI_TAG_PRINT_TOK 2
/*Sent from workers to manager */
#define MPI_TAG_TOK_REQUEST 3
#define MPI_TAG_DONE 4
#define MPI_TAG_TOK_RETURN 5
#define MPI_TAG_PRINT_DATA 6
#define MPI_TAG_TOK_REQUEST 3
#define MPI_TAG_DONE 4
#define MPI_TAG_TOK_RETURN 5
#define MPI_TAG_PRINT_DATA 6
/* Operational tags used to init and complete diff */
#define MPI_TAG_END 7
#define MPI_TAG_PARALLEL 8
#define MPI_TAG_END 7
#define MPI_TAG_PARALLEL 8
struct diff_mpi_args
{