#!/usr/bin/env perl ## # Copyright by The HDF Group. # All rights reserved. # # This file is part of HDF5. The full HDF5 copyright notice, including # terms governing use, modification, and redistribution, is contained in # the COPYING file, which can be found at the root of the source code # distribution tree, or in https://www.hdfgroup.org/licenses. # If you do not have access to either file, you may request a copy from # help@hdfgroup.org. ## require 5.003; use warnings; $Source = ""; ############################################################################## # A map from type name to type letter. We use this map for two reasons: # 1. We want the debugging stuff in the source code to be as unobtrusive as # possible, which means as compact as possible. # 2. It's easier (faster) to parse these one and two-letter types in the C # functions that display debugging results. # # All type strings are one or two characters. One-character strings # are always lower case and should be used for common types. # Two-character strings begin with an upper-case letter which is # usually the same as the package name. # %TypeString = ("haddr_t" => "a", "H5A_info_t" => "Ai", "H5A_operator1_t" => "Ao", "H5A_operator2_t" => "AO", "hbool_t" => "b", "H5AC_cache_config_t" => "Cc", "H5AC_cache_image_config_t" => "CC", "double" => "d", "H5D_alloc_time_t" => "Da", "H5D_append_cb_t" => "DA", "H5FD_mpio_collective_opt_t" => "Dc", "H5D_selection_io_mode_t" => "DC", "H5D_fill_time_t" => "Df", "H5D_fill_value_t" => "DF", "H5D_gather_func_t" => "Dg", "H5FD_mpio_chunk_opt_t" => "Dh", "H5D_mpio_actual_io_mode_t" => "Di", "H5FD_file_image_callbacks_t" => "DI", "H5D_chunk_index_t" => "Dk", "H5D_layout_t" => "Dl", "H5D_mpio_no_collective_cause_t" => "Dn", "H5D_mpio_actual_chunk_opt_mode_t" => "Do", "H5D_operator_t" => "DO", "H5D_space_status_t" => "Ds", "H5D_scatter_func_t" => "DS", "H5FD_mpio_xfer_t" => "Dt", "H5D_vds_view_t" => "Dv", "H5FD_class_value_t" => "DV", "H5D_chunk_iter_op_t" => "x", "herr_t" => "e", "H5E_auto1_t" => "Ea", "H5E_auto2_t" => "EA", "H5ES_event_complete_func_t" => "EC", "H5E_direction_t" => "Ed", "H5E_error_t" => "Ee", "H5ES_event_insert_func_t" => "EI", "H5ES_status_t" => "Es", "H5E_type_t" => "Et", "H5FD_class_t" => "FC", "H5F_close_degree_t" => "Fd", "H5F_fspace_strategy_t" => "Ff", "H5F_flush_cb_t" => "FF", "H5F_info2_t" => "FI", "H5F_mem_t" => "Fm", "H5F_scope_t" => "Fs", "H5F_file_space_type_t" => "Ft", "H5F_libver_t" => "Fv", "H5G_iterate_t" => "Gi", "H5G_obj_t" => "Go", "H5G_stat_t" => "Gs", "hsize_t" => "h", "H5_atclose_func_t" => "Hc", "hssize_t" => "Hs", "H5E_major_t" => "i", # H5E_major_t is typedef'd to hid_t "H5E_minor_t" => "i", # H5E_minor_t is typedef'd to hid_t "hid_t" => "i", "H5I_future_discard_func_t" => "ID", "H5I_free_t" => "If", "H5_index_t" => "Ii", "H5I_iterate_func_t" => "II", "H5_iter_order_t" => "Io", "H5FD_subfiling_ioc_select_t" => "IO", "H5I_future_realize_func_t" => "IR", "int" => "Is", "int32_t" => "Is", "H5I_search_func_t" => "IS", "H5I_type_t" => "It", "unsigned" => "Iu", "unsigned int" => "Iu", "uint32_t" => "Iu", "H5O_token_t" => "k", "H5L_iterate1_t" => "Li", "H5L_iterate2_t" => "LI", "H5G_link_t" => "Ll", #Same as H5L_type_t now "H5L_type_t" => "Ll", "H5L_elink_traverse_t" => "Lt", "H5MM_allocate_t" => "Ma", "MPI_Comm" => "Mc", "H5MM_free_t" => "Mf", "MPI_Info" => "Mi", "H5M_iterate_t" => 'MI', "H5FD_mem_t" => "Mt", "off_t" => "o", "H5O_iterate1_t" => "Oi", "H5O_iterate2_t" => "OI", "H5O_mcdt_search_cb_t" => "Os", "H5O_type_t" => "Ot", "H5P_class_t" => "p", "H5P_cls_create_func_t" => "Pc", "H5P_prp_create_func_t" => "PC", "H5P_prp_delete_func_t" => "PD", "H5P_prp_get_func_t" => "PG", "H5P_iterate_t" => "Pi", "H5P_cls_close_func_t" => "Pl", "H5P_prp_close_func_t" => "PL", "H5P_prp_compare_func_t" => "PM", "H5P_cls_copy_func_t" => "Po", "H5P_prp_copy_func_t" => "PO", "H5P_prp_set_func_t" => "PS", "hdset_reg_ref_t" => "Rd", "hobj_ref_t" => "Ro", "H5R_ref_t" => "Rr", "H5R_type_t" => "Rt", "char" => "s", "unsigned char" => "s", "H5S_class_t" => "Sc", "H5S_seloper_t" => "Ss", "H5S_sel_type" => "St", "htri_t" => "t", "H5T_cset_t", => "Tc", "H5T_conv_t" => "TC", "H5T_direction_t", => "Td", "H5T_pers_t" => "Te", "H5T_conv_except_func_t" => "TE", "H5T_norm_t" => "Tn", "H5T_order_t" => "To", "H5T_pad_t" => "Tp", "H5T_sign_t" => "Ts", "H5T_class_t" => "Tt", "H5T_str_t" => "Tz", "unsigned long" => "Ul", "unsigned long long" => "UL", "uint64_t" => "UL", "H5VL_attr_get_t" => "Va", "H5VL_blob_optional_t" => "VA", "H5VL_attr_specific_t" => "Vb", "H5VL_blob_specific_t" => "VB", "H5VL_dataset_get_t" => "Vc", "H5VL_class_value_t" => "VC", "H5VL_dataset_specific_t" => "Vd", "H5VL_datatype_get_t" => "Ve", "H5VL_datatype_specific_t" => "Vf", "H5VL_file_get_t" => "Vg", "H5VL_file_specific_t" => "Vh", "H5VL_group_get_t" => "Vi", "H5VL_group_specific_t" => "Vj", "H5VL_link_create_t" => "Vk", "H5VL_link_get_t" => "Vl", "H5VL_get_conn_lvl_t" => "VL", "H5VL_link_specific_t" => "Vm", "H5VL_object_get_t" => "Vn", "H5VL_request_notify_t" => "VN", "H5VL_object_specific_t" => "Vo", "H5VL_request_specific_t" => "Vr", "H5VL_attr_optional_t" => "Vs", "H5VL_subclass_t" => "VS", "H5VL_dataset_optional_t" => "Vt", "H5VL_datatype_optional_t" => "Vu", "H5VL_file_optional_t" => "Vv", "H5VL_group_optional_t" => "Vw", "H5VL_link_optional_t" => "Vx", "H5VL_object_optional_t" => "Vy", "H5VL_request_optional_t" => "Vz", "va_list" => "x", "void" => "x", "size_t" => "z", "H5Z_SO_scale_type_t" => "Za", "H5Z_class_t" => "Zc", "H5Z_EDC_t" => "Ze", "H5Z_filter_t" => "Zf", "H5Z_filter_func_t" => "ZF", "ssize_t" => "Zs", # Types below must be defined here, as they appear in function arguments, # but they are not yet supported in the H5_trace_args() routine yet. If # they are used as an actual parameter type (and not just as a pointer to # to the type), they must have a "real" abbreviation added (like the ones # above), moved to the section of entries above, and support for displaying # the type must be added to H5_trace_args(). "H5ES_err_info_t" => "#", "H5FD_t" => "#", "H5FD_hdfs_fapl_t" => "#", "H5FD_mirror_fapl_t" => "#", "H5FD_onion_fapl_t" => "#", "H5FD_ros3_fapl_t" => "#", "H5FD_splitter_vfd_config_t" => "#", "H5L_class_t" => "#", "H5VL_class_t" => "#", "H5VL_loc_params_t" => "#", "H5VL_request_status_t" => "#", ); ############################################################################## # Maximum length of H5TRACE macro line # If the ColumnLimit in .clang-format is changed, this value will need to be updated # my $max_trace_macro_line_len = 110; ############################################################################## # Print an error message. # my $found_errors = 0; sub errmesg ($$@) { my ($file, $func, @mesg) = @_; my ($mesg) = join "", @mesg; my ($lineno) = 1; if ($Source =~ /(.*?\n)($func)/s) { local $_ = $1; $lineno = tr/\n/\n/; } $found_errors = 1; print "$file: in function \`$func\':\n"; print "$file:$lineno: $mesg\n"; } ############################################################################## # Given a C data type return the type string that goes with it. # sub argstring ($$$) { my ($file, $func, $atype) = @_; my ($ptr, $tstr, $array) = (0, "!", ""); my ($fq_atype); # Normalize the data type by removing redundant white space, # certain type qualifiers, and indirection. $atype =~ s/^\bconst\b//; # Leading const $atype =~ s/\s*const\s*//; # const after type, possibly in the middle of '*'s $atype =~ s/^\bstatic\b//; $atype =~ s/\bH5_ATTR_UNUSED\b//g; $atype =~ s/\bH5_ATTR_DEPRECATED_USED\b//g; $atype =~ s/\bH5_ATTR_NDEBUG_UNUSED\b//g; $atype =~ s/\bH5_ATTR_DEBUG_API_USED\b//g; $atype =~ s/\bH5_ATTR_PARALLEL_UNUSED\b//g; $atype =~ s/\bH5_ATTR_PARALLEL_USED\b//g; $atype =~ s/\s+/ /g; $ptr = length $1 if $atype =~ s/(\*+)//; $atype =~ s/^\s+//; $atype =~ s/\s+$//; if ($atype =~ /(.*)\[(.*)\]$/) { ($array, $atype) = ($2, $1); $atype =~ s/\s+$//; } $fq_atype = $atype . ('*' x $ptr); if ($ptr>0 && exists $TypeString{$fq_atype}) { $ptr = 0; $tstr = $TypeString{$fq_atype}; } elsif ($ptr>0 && exists $TypeString{"$atype*"}) { --$ptr; $tstr = $TypeString{"$atype*"}; } elsif (!exists $TypeString{$atype}) { # Defer throwing error until type is actually used # errmesg $file, $func, "untraceable type \`$atype", '*'x$ptr, "\'"; } else { $tstr = $TypeString{$atype}; } return ("*" x $ptr) . ($array ? "[$array]" : "") . $tstr; } ############################################################################## # Given information about an API function, rewrite that function with # updated tracing information. # my $file_api = 0; my $file_args = 0; my $total_api = 0; my $total_args = 0; sub rewrite_func ($$$$$) { my ($file, $type, $name, $args, $body) = @_; my ($arg, $trace, $argtrace); my (@arg_name, @arg_str, @arg_type); local $_; # Keep copy of original arguments my $orig_args = $args; # Parse return value my $rettype = argstring $file, $name, $type; # Parse arguments if ($args eq "void") { $trace = "H5TRACE0(\"$rettype\", \"\");\n"; $argtrace = "H5ARG_TRACE0(\"\")"; } else { # Split arguments. First convert `/*in,out*/' to get rid of the # comma and remove lines beginning with a '#', then split the arguments # on commas. $args =~ s/(\/\*\s*in),\s*(out\s*\*\/)/$1_$2/g; # Get rid of comma in 'in,out' $args =~ s/H5FL_TRACK_PARAMS//g; # Remove free list macro $args =~ s/\n#.*?\n/\n/g; # Remove lines beginning with '#' my @args = split /,[\s\n]*/, $args; my $argno = 0; my %names; for $arg (@args) { if($arg=~/\w*\.{3}\w*/){ # Skip "..." for varargs parameter next; } unless ($arg=~/^((\s*[a-z_A-Z](\w|\*)*\s+)+(\s*\*\s*|\s*const\s*|\s*volatile\s*)*) ([a-z_A-Z]\w*)(\[.*?\])? (\s*\/\*\s*(in|out|in_out)\s*\*\/)?\s*$/x) { errmesg $file, $name, "unable to parse \`$arg\'"; goto error; } else { my ($atype, $aname, $array, $adir) = ($1, $5, $6, $8); $names{$aname} = $argno++; $adir ||= "in"; $atype =~ s/\s+$//; push @arg_name, $aname; push @arg_type, $atype; if ($adir eq "out") { push @arg_str, "x"; } else { if (defined $array) { $atype .= "*"; if ($array =~ /^\[\/\*([a-z_A-Z]\w*)\*\/\]$/) { my $asize = $1; if (exists $names{$asize}) { $atype .= '[a' . $names{$asize} . ']'; } else { warn "bad array size: $asize"; $atype .= "*"; } } } push @arg_str, argstring $file, $name, $atype; } } } # Compose the trace macro $trace = "H5TRACE" . scalar(@arg_str) . "(\"$rettype\", \""; $argtrace = "H5ARG_TRACE" . scalar(@arg_str) . "(__func__, \""; $trace .= join("", @arg_str) . "\""; $argtrace .= join("", @arg_str) . "\""; # Add 4 for indenting the line my $len = 4 + length($trace); for my $i (0 .. $#arg_name) { # Handle wrapping # Be VERY careful here! clang-format and this script MUST agree # on which lines get wrapped or there will be churn as each tries # to undo the other's output. # # TWO cases must be handled: # 1) The argument is that last one and ');' will be appended # 2) The argument is NOT the last one and ',' will be appended # # NB: clang-format does NOT consider terminal newlines when # counting columns for the ColumnLimit # # The extra '2' added after $len includes the ', ' that would be # added BEFORE the argument. # my $adjust = ($i + 1 == scalar(@arg_str)) ? 2 : 1; my $len_if_added = $len + 2 + length($arg_name[$i]) + $adjust; # Wrap lines that will be longer than the limit if ($len_if_added > $max_trace_macro_line_len) { # Wrap line, with indentation $trace .= ",\n "; $len = 13; # Set to 13, for indentation # Indent an extra space to account for extra digit in 'H5TRACE' macro if (scalar(@arg_str) >= 10) { $trace .= " "; $len++; } } else { $trace .= ", "; $len += 2; # Add 2, for ', ' } # Append argument $trace .= "$arg_name[$i]"; $argtrace .= ", $arg_name[$i]"; # Add length of appended argument name $len += length($arg_name[$i]); } # Append final ');' for macro $trace .= ");\n"; $argtrace .= ")"; } # Check for API / non-API routine name if( $name =~ /H5[A-Z]{0,2}[a-z].*/) { # The H5TRACE() statement, for API routines if ($body =~ /\/\*[ \t]*NO[ \t]*TRACE[ \t]*\*\//) { # Ignored due to NO TRACE comment. } else { # Check for known, but unsupported type if ( $trace =~ /(^#)|([^*]#)/ ) { # Check for unsupported return type if ( $type =~ /(^#)|([^*]#)/ ) { errmesg $file, $name, "unsupported type in return type\nAdd to TypeString hash in trace script and update H5_trace_args()"; print "type = '$type'\n"; } # Check for unsupported argument type $index = 0; for (@arg_str) { if ( $_ =~ /(^#)|([^*]#)/ ) { errmesg $file, $name, "unsupported type in args\nAdd to TypeString hash in trace script and update H5_trace_args()"; print "type = $arg_type[$index]\n"; } $index++; } goto error; } # Check for unknown (and therefore unsupported) type if ( $trace =~ /(^!)|([^*]!)/ ) { # Check for unsupported return type if ( $type =~ /(^!)|([^*]!)/ ) { errmesg $file, $name, "unknown type in return type\nAdd to TypeString hash in trace script and also update H5_trace_args() if used by value"; print "type = '$type'\n"; } # Check for unsupported argument type $index = 0; for (@arg_str) { if ( $_ =~ /(^!)|([^*]!)/ ) { errmesg $file, $name, "unknown type in args\nAdd to TypeString hash in trace script and also update H5_trace_args() if used by value"; print "type = $arg_type[$index]\n"; } $index++; } goto error; } if ($body =~ s/((\n[ \t]*)H5TRACE\d+\s*\(.*?\);)\n/"$2$trace"/es) { # Replaced an H5TRACE macro. } elsif ($body=~s/((\n[ \t]*)FUNC_ENTER\w*[ \t]*(\(.*?\))?;??)\n/"$1$2$trace"/es) { # Added an H5TRACE macro after a FUNC_ENTER macro. } else { errmesg $file, $name, "unable to insert tracing information"; print "body = ", $body, "\n"; goto error; } } #Increment # of API routines modified $file_api++; } # Check for H5ARG_TRACE macros in non-API routines if ( $body =~ /H5ARG_TRACE/ ) { # Check for untraceable type (deferred until $argtrace used) if ( $argtrace =~ /(^!)|([^*]!)/ ) { errmesg $file, $name, "untraceable type in args"; print "args = '$orig_args'\n"; goto error; } # Replace / update H5ARG_TRACE macro. $body =~ s/(H5ARG_TRACE(\d+\s*\(.*?\))?)/"$argtrace"/esg; #Increment # of non-API routines modified $file_args++; } error: return "\n$type\n$name($orig_args)\n$body"; } ############################################################################## # Process each source file, rewriting API functions with updated # tracing information. # for $file (@ARGV) { $file_api = 0; $file_args = 0; # Ignore some files that do not need tracing macros unless ($file eq "H5FDmulti.c" or $file eq "src/H5FDmulti.c" or $file eq "H5FDstdio.c" or $file eq "src/H5FDstdio.c" or $file eq "src/H5TS.c" or $file eq "src/H5FDperform.c") { # Snarf up the entire file open SOURCE, $file or die "$file: $!\n"; $Source = join "", <SOURCE>; close SOURCE; # Make a copy of the original data my $original = $Source; # Make modifications $Source =~ s/\n([A-Za-z]\w*(\s+[A-Za-z]\w*)*\s*\**)\n #type (H5[A-Z]{0,2}_?[a-zA-Z0-9_]\w*) #name \s*\((.*?)\)\s* #args (\{.*?\n\}[^\n]*) #body /rewrite_func($file,$1,$3,$4,$5)/segx; # If the source changed then print out the new version if ($original ne $Source) { printf "%s: instrumented %d API function%s and %d argument list%s\n", $file, $file_api, (1 == $file_api ? "" : "s"), $file_args, (1 == $file_args ? "" : "s"); rename $file, "$file~" or die "unable to make backup"; open SOURCE, ">$file" or die "unable to modify source"; print SOURCE $Source; close SOURCE; $total_api += $file_api; $total_args += $file_args; } } } if ($found_errors eq 1) { printf "\n"; printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"; printf "*** ERRORS FOUND *** ERRORS FOUND *** ERRORS FOUND ****\n"; printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"; exit 1; } else { printf "Finished processing HDF5 API calls:\n"; printf "\tinstrumented %d API function%s and %d argument list%s\n", $total_api, (1 == $total_api ? "" : "s"), $total_args, (1 == $total_args ? "" : "s"); }