#!/usr/bin/env perl
##
# Copyright by The HDF Group.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the COPYING file, which can be found at the root of the source code
# distribution tree, or in https://www.hdfgroup.org/licenses.
# If you do not have access to either file, you may request a copy from
# help@hdfgroup.org.
##
require 5.003;
use warnings;
$Source = "";

##############################################################################
# A map from type name to type letter.  We use this map for two reasons:
#  1. We want the debugging stuff in the source code to be as unobtrusive as
#     possible, which means as compact as possible.
#  2. It's easier (faster) to parse these one and two-letter types in the C
#     functions that display debugging results.
#
# All type strings are one or two characters.  One-character strings
# are always lower case and should be used for common types.
# Two-character strings begin with an upper-case letter which is
# usually the same as the package name.
#
%TypeString = ("haddr_t"                    => "a",
               "H5A_info_t"                 => "Ai",
               "H5A_operator1_t"            => "Ao",
               "H5A_operator2_t"            => "AO",
               "hbool_t"                    => "b",
               "H5AC_cache_config_t"        => "Cc",
               "H5AC_cache_image_config_t"  => "CC",
               "double"                     => "d",
               "H5D_alloc_time_t"           => "Da",
               "H5D_append_cb_t"            => "DA",
               "H5FD_mpio_collective_opt_t" => "Dc",
               "H5D_selection_io_mode_t"    => "DC",
               "H5D_fill_time_t"            => "Df",
               "H5D_fill_value_t"           => "DF",
               "H5D_gather_func_t"          => "Dg",
               "H5FD_mpio_chunk_opt_t"      => "Dh",
               "H5D_mpio_actual_io_mode_t"  => "Di",
               "H5FD_file_image_callbacks_t" => "DI",
               "H5D_chunk_index_t"          => "Dk",
               "H5D_layout_t"               => "Dl",
               "H5D_mpio_no_collective_cause_t" => "Dn",
               "H5D_mpio_actual_chunk_opt_mode_t" => "Do",
               "H5D_operator_t"             => "DO",
               "H5D_space_status_t"         => "Ds",
               "H5D_scatter_func_t"         => "DS",
               "H5FD_mpio_xfer_t"           => "Dt",
               "H5D_vds_view_t"             => "Dv",
               "H5FD_class_value_t"         => "DV",
               "H5D_chunk_iter_op_t"        => "x",
               "herr_t"                     => "e",
               "H5E_auto1_t"                => "Ea",
               "H5E_auto2_t"                => "EA",
               "H5ES_event_complete_func_t" => "EC",
               "H5E_direction_t"            => "Ed",
               "H5E_error_t"                => "Ee",
               "H5ES_event_insert_func_t"   => "EI",
               "H5ES_status_t"              => "Es",
               "H5E_type_t"                 => "Et",
               "H5FD_class_t"               => "FC",
               "H5F_close_degree_t"         => "Fd",
               "H5F_fspace_strategy_t"      => "Ff",
               "H5F_flush_cb_t"             => "FF",
               "H5F_info2_t"                => "FI",
               "H5F_mem_t"                  => "Fm",
               "H5F_scope_t"                => "Fs",
               "H5F_file_space_type_t"      => "Ft",
               "H5F_libver_t"               => "Fv",
               "H5G_iterate_t"              => "Gi",
               "H5G_obj_t"                  => "Go",
               "H5G_stat_t"                 => "Gs",
               "hsize_t"                    => "h",
               "H5_atclose_func_t"          => "Hc",
               "hssize_t"                   => "Hs",
               "H5E_major_t"                => "i",     # H5E_major_t is typedef'd to hid_t
               "H5E_minor_t"                => "i",     # H5E_minor_t is typedef'd to hid_t
               "hid_t"                      => "i",
               "H5I_future_discard_func_t"  => "ID",
               "H5I_free_t"                 => "If",
               "H5_index_t"                 => "Ii",
               "H5I_iterate_func_t"         => "II",
               "H5_iter_order_t"            => "Io",
               "H5FD_subfiling_ioc_select_t" => "IO",
               "H5I_future_realize_func_t"  => "IR",
               "int"                        => "Is",
               "int32_t"                    => "Is",
               "H5I_search_func_t"          => "IS",
               "H5I_type_t"                 => "It",
               "unsigned"                   => "Iu",
               "unsigned int"               => "Iu",
               "uint32_t"                   => "Iu",
               "H5O_token_t"                => "k",
               "H5L_iterate1_t"             => "Li",
               "H5L_iterate2_t"             => "LI",
               "H5G_link_t"                 => "Ll", #Same as H5L_type_t now
               "H5L_type_t"                 => "Ll",
               "H5L_elink_traverse_t"       => "Lt",
               "H5MM_allocate_t"            => "Ma",
               "MPI_Comm"                   => "Mc",
               "H5MM_free_t"                => "Mf",
               "MPI_Info"                   => "Mi",
               "H5M_iterate_t"              => 'MI',
               "H5FD_mem_t"                 => "Mt",
               "off_t"                      => "o",
               "H5O_iterate1_t"             => "Oi",
               "H5O_iterate2_t"             => "OI",
               "H5O_mcdt_search_cb_t"       => "Os",
               "H5O_type_t"                 => "Ot",
               "H5P_class_t"                => "p",
               "H5P_cls_create_func_t"      => "Pc",
               "H5P_prp_create_func_t"      => "PC",
               "H5P_prp_delete_func_t"      => "PD",
               "H5P_prp_get_func_t"         => "PG",
               "H5P_iterate_t"              => "Pi",
               "H5P_cls_close_func_t"       => "Pl",
               "H5P_prp_close_func_t"       => "PL",
               "H5P_prp_compare_func_t"     => "PM",
               "H5P_cls_copy_func_t"        => "Po",
               "H5P_prp_copy_func_t"        => "PO",
               "H5P_prp_set_func_t"         => "PS",
               "hdset_reg_ref_t"            => "Rd",
               "hobj_ref_t"                 => "Ro",
               "H5R_ref_t"                  => "Rr",
               "H5R_type_t"                 => "Rt",
               "char"                       => "s",
               "unsigned char"              => "s",
               "H5S_class_t"                => "Sc",
               "H5S_seloper_t"              => "Ss",
               "H5S_sel_type"               => "St",
               "htri_t"                     => "t",
               "H5T_cset_t",                => "Tc",
               "H5T_conv_t"                 => "TC",
               "H5T_direction_t",           => "Td",
               "H5T_pers_t"                 => "Te",
               "H5T_conv_except_func_t"     => "TE",
               "H5T_norm_t"                 => "Tn",
               "H5T_order_t"                => "To",
               "H5T_pad_t"                  => "Tp",
               "H5T_sign_t"                 => "Ts",
               "H5T_class_t"                => "Tt",
               "H5T_str_t"                  => "Tz",
               "unsigned long"              => "Ul",
               "unsigned long long"         => "UL",
               "uint64_t"                   => "UL",
               "H5VL_attr_get_t"            => "Va",
               "H5VL_blob_optional_t"       => "VA",
               "H5VL_attr_specific_t"       => "Vb",
               "H5VL_blob_specific_t"       => "VB",
               "H5VL_dataset_get_t"         => "Vc",
               "H5VL_class_value_t"         => "VC",
               "H5VL_dataset_specific_t"    => "Vd",
               "H5VL_datatype_get_t"        => "Ve",
               "H5VL_datatype_specific_t"   => "Vf",
               "H5VL_file_get_t"            => "Vg",
               "H5VL_file_specific_t"       => "Vh",
               "H5VL_group_get_t"           => "Vi",
               "H5VL_group_specific_t"      => "Vj",
               "H5VL_link_create_t"         => "Vk",
               "H5VL_link_get_t"            => "Vl",
               "H5VL_get_conn_lvl_t"        => "VL",
               "H5VL_link_specific_t"       => "Vm",
               "H5VL_object_get_t"          => "Vn",
               "H5VL_request_notify_t"      => "VN",
               "H5VL_object_specific_t"     => "Vo",
               "H5VL_request_specific_t"    => "Vr",
               "H5VL_attr_optional_t"       => "Vs",
               "H5VL_subclass_t"            => "VS",
               "H5VL_dataset_optional_t"    => "Vt",
               "H5VL_datatype_optional_t"   => "Vu",
               "H5VL_file_optional_t"       => "Vv",
               "H5VL_group_optional_t"      => "Vw",
               "H5VL_link_optional_t"       => "Vx",
               "H5VL_object_optional_t"     => "Vy",
               "H5VL_request_optional_t"    => "Vz",
               "va_list"                    => "x",
               "void"                       => "x",
               "size_t"                     => "z",
               "H5Z_SO_scale_type_t"        => "Za",
               "H5Z_class_t"                => "Zc",
               "H5Z_EDC_t"                  => "Ze",
               "H5Z_filter_t"               => "Zf",
               "H5Z_filter_func_t"          => "ZF",
               "ssize_t"                    => "Zs",

# Types below must be defined here, as they appear in function arguments,
# but they are not yet supported in the H5_trace_args() routine yet.  If
# they are used as an actual parameter type (and not just as a pointer to
# to the type), they must have a "real" abbreviation added (like the ones
# above), moved to the section of entries above, and support for displaying
# the type must be added to H5_trace_args().
               "H5ES_err_info_t"            => "#",
               "H5FD_t"                     => "#",
               "H5FD_hdfs_fapl_t"           => "#",
               "H5FD_mirror_fapl_t"         => "#",
               "H5FD_onion_fapl_t"         => "#",
               "H5FD_ros3_fapl_t"           => "#",
               "H5FD_splitter_vfd_config_t" => "#",
               "H5L_class_t"                => "#",
               "H5VL_class_t"               => "#",
               "H5VL_loc_params_t"          => "#",
               "H5VL_request_status_t"      => "#",
              );


##############################################################################
# Maximum length of H5TRACE macro line
# If the ColumnLimit in .clang-format is changed, this value will need to be updated
#
my $max_trace_macro_line_len = 110;


##############################################################################
# Print an error message.
#
my $found_errors = 0;

sub errmesg ($$@) {
  my ($file, $func, @mesg) = @_;
  my ($mesg) = join "", @mesg;
  my ($lineno) = 1;
  if ($Source =~ /(.*?\n)($func)/s) {
    local $_ = $1;
    $lineno = tr/\n/\n/;
  }

  $found_errors = 1;

  print "$file: in function \`$func\':\n";
  print "$file:$lineno: $mesg\n";
}

##############################################################################
# Given a C data type return the type string that goes with it.
#
sub argstring ($$$) {
  my ($file, $func, $atype) = @_;
  my ($ptr, $tstr, $array) = (0, "!", "");
  my ($fq_atype);

  # Normalize the data type by removing redundant white space,
  # certain type qualifiers, and indirection.
  $atype =~ s/^\bconst\b//;     # Leading const
  $atype =~ s/\s*const\s*//;    # const after type, possibly in the middle of '*'s
  $atype =~ s/^\bstatic\b//;
  $atype =~ s/\bH5_ATTR_UNUSED\b//g;
  $atype =~ s/\bH5_ATTR_DEPRECATED_USED\b//g;
  $atype =~ s/\bH5_ATTR_NDEBUG_UNUSED\b//g;
  $atype =~ s/\bH5_ATTR_DEBUG_API_USED\b//g;
  $atype =~ s/\bH5_ATTR_PARALLEL_UNUSED\b//g;
  $atype =~ s/\bH5_ATTR_PARALLEL_USED\b//g;
  $atype =~ s/\s+/ /g;
  $ptr = length $1 if  $atype =~ s/(\*+)//;
  $atype =~ s/^\s+//;
  $atype =~ s/\s+$//;
  if ($atype =~ /(.*)\[(.*)\]$/) {
    ($array, $atype) = ($2, $1);
    $atype =~ s/\s+$//;
  }
  $fq_atype = $atype . ('*' x $ptr);

  if ($ptr>0 && exists $TypeString{$fq_atype}) {
    $ptr = 0;
    $tstr = $TypeString{$fq_atype};
  } elsif ($ptr>0 && exists $TypeString{"$atype*"}) {
    --$ptr;
    $tstr = $TypeString{"$atype*"};
  } elsif (!exists $TypeString{$atype}) {
# Defer throwing error until type is actually used
#    errmesg $file, $func, "untraceable type \`$atype", '*'x$ptr, "\'";
  } else {
    $tstr = $TypeString{$atype};
  }
  return ("*" x $ptr) . ($array ? "[$array]" : "") . $tstr;
}

##############################################################################
# Given information about an API function, rewrite that function with
# updated tracing information.
#
my $file_api = 0;
my $file_args = 0;
my $total_api = 0;
my $total_args = 0;
sub rewrite_func ($$$$$) {
  my ($file, $type, $name, $args, $body) = @_;
  my ($arg, $trace, $argtrace);
  my (@arg_name, @arg_str, @arg_type);
  local $_;

  # Keep copy of original arguments
  my $orig_args = $args;

  # Parse return value
  my $rettype = argstring $file, $name, $type;

  # Parse arguments
  if ($args eq "void") {
    $trace = "H5TRACE0(\"$rettype\", \"\");\n";
    $argtrace = "H5ARG_TRACE0(\"\")";
  } else {
    # Split arguments.  First convert `/*in,out*/' to get rid of the
    # comma and remove lines beginning with a '#', then split the arguments
    # on commas.
    $args =~ s/(\/\*\s*in),\s*(out\s*\*\/)/$1_$2/g;     # Get rid of comma in 'in,out'
    $args =~ s/H5FL_TRACK_PARAMS//g; # Remove free list macro
    $args =~ s/\n#.*?\n/\n/g;        # Remove lines beginning with '#'
    my @args = split /,[\s\n]*/, $args;
    my $argno = 0;
    my %names;

    for $arg (@args) {
      if($arg=~/\w*\.{3}\w*/){  # Skip "..." for varargs parameter
        next;
      }
      unless ($arg=~/^((\s*[a-z_A-Z](\w|\*)*\s+)+(\s*\*\s*|\s*const\s*|\s*volatile\s*)*)
              ([a-z_A-Z]\w*)(\[.*?\])?
              (\s*\/\*\s*(in|out|in_out)\s*\*\/)?\s*$/x) {
        errmesg $file, $name, "unable to parse \`$arg\'";
        goto error;
      } else {
        my ($atype, $aname, $array, $adir) = ($1, $5, $6, $8);
        $names{$aname} = $argno++;
        $adir ||= "in";
        $atype =~ s/\s+$//;
        push @arg_name, $aname;
        push @arg_type, $atype;

        if ($adir eq "out") {
          push @arg_str, "x";
        } else {
          if (defined $array) {
            $atype .= "*";
            if ($array =~ /^\[\/\*([a-z_A-Z]\w*)\*\/\]$/) {
              my $asize = $1;
              if (exists $names{$asize}) {
                $atype .= '[a' . $names{$asize} . ']';
              } else {
                warn "bad array size: $asize";
                $atype .= "*";
              }
            }
          }
          push @arg_str, argstring $file, $name, $atype;
        }
      }
    }

    # Compose the trace macro
    $trace = "H5TRACE" . scalar(@arg_str) . "(\"$rettype\", \"";
    $argtrace = "H5ARG_TRACE" . scalar(@arg_str) . "(__func__, \"";
    $trace .= join("", @arg_str) . "\"";
    $argtrace .= join("", @arg_str) . "\"";

    # Add 4 for indenting the line
    my $len = 4 + length($trace);

    for my $i (0 .. $#arg_name) {
      # Handle wrapping

      # Be VERY careful here! clang-format and this script MUST agree
      # on which lines get wrapped or there will be churn as each tries
      # to undo the other's output.
      #
      # TWO cases must be handled:
      # 1) The argument is that last one and ');' will be appended
      # 2) The argument is NOT the last one and ',' will be appended
      #
      # NB: clang-format does NOT consider terminal newlines when
      #     counting columns for the ColumnLimit
      #
      # The extra '2' added after $len includes the ', ' that would be
      # added BEFORE the argument.
      #
      my $adjust = ($i + 1 == scalar(@arg_str)) ? 2 : 1;
      my $len_if_added = $len + 2 + length($arg_name[$i]) + $adjust;

      # Wrap lines that will be longer than the limit
      if ($len_if_added > $max_trace_macro_line_len) {
        # Wrap line, with indentation
        $trace .= ",\n             ";
        $len = 13;              # Set to 13, for indentation

        # Indent an extra space to account for extra digit in 'H5TRACE' macro
        if (scalar(@arg_str) >= 10) {
          $trace .= " ";
          $len++;
        }
      } else {
        $trace .= ", ";
        $len += 2;     # Add 2, for ', '
      }

      # Append argument
      $trace .= "$arg_name[$i]";
      $argtrace .= ", $arg_name[$i]";

      # Add length of appended argument name
      $len += length($arg_name[$i]);
    }

    # Append final ');' for macro
    $trace .= ");\n";
    $argtrace .= ")";
  }

  # Check for API / non-API routine name
  if( $name =~ /H5[A-Z]{0,2}[a-z].*/) {
      # The H5TRACE() statement, for API routines
      if ($body =~ /\/\*[ \t]*NO[ \t]*TRACE[ \t]*\*\//) {
        # Ignored due to NO TRACE comment.
      } else {
          # Check for known, but unsupported type
          if ( $trace =~ /(^#)|([^*]#)/ ) {
            # Check for unsupported return type
            if ( $type =~ /(^#)|([^*]#)/ ) {
              errmesg $file, $name, "unsupported type in return type\nAdd to TypeString hash in trace script and update H5_trace_args()";
              print "type = '$type'\n";
            }

            # Check for unsupported argument type
            $index = 0;
            for (@arg_str) {
              if ( $_ =~ /(^#)|([^*]#)/ ) {
                errmesg $file, $name, "unsupported type in args\nAdd to TypeString hash in trace script and update H5_trace_args()";
                print "type = $arg_type[$index]\n";
              }
              $index++;
            }
            goto error;
          }

          # Check for unknown (and therefore unsupported) type
          if ( $trace =~ /(^!)|([^*]!)/ ) {
            # Check for unsupported return type
            if ( $type =~ /(^!)|([^*]!)/ ) {
              errmesg $file, $name, "unknown type in return type\nAdd to TypeString hash in trace script and also update H5_trace_args() if used by value";
              print "type = '$type'\n";
            }

            # Check for unsupported argument type
            $index = 0;
            for (@arg_str) {
              if ( $_ =~ /(^!)|([^*]!)/ ) {
                errmesg $file, $name, "unknown type in args\nAdd to TypeString hash in trace script and also update H5_trace_args() if used by value";
                print "type = $arg_type[$index]\n";
              }
              $index++;
            }
            goto error;
          }

          if ($body =~ s/((\n[ \t]*)H5TRACE\d+\s*\(.*?\);)\n/"$2$trace"/es) {
            # Replaced an H5TRACE macro.
          } elsif ($body=~s/((\n[ \t]*)FUNC_ENTER\w*[ \t]*(\(.*?\))?;??)\n/"$1$2$trace"/es) {
            # Added an H5TRACE macro after a FUNC_ENTER macro.
          } else {
            errmesg $file, $name, "unable to insert tracing information";
            print "body = ", $body, "\n";
            goto error;
          }
      }

    #Increment # of API routines modified
    $file_api++;
  }

  # Check for H5ARG_TRACE macros in non-API routines
  if ( $body =~ /H5ARG_TRACE/ ) {
      # Check for untraceable type (deferred until $argtrace used)
      if ( $argtrace =~ /(^!)|([^*]!)/ ) {
        errmesg $file, $name, "untraceable type in args";
        print "args = '$orig_args'\n";
        goto error;
      }

      # Replace / update H5ARG_TRACE macro.
      $body =~ s/(H5ARG_TRACE(\d+\s*\(.*?\))?)/"$argtrace"/esg;

    #Increment # of non-API routines modified
    $file_args++;
  }

 error:
  return "\n$type\n$name($orig_args)\n$body";
}

##############################################################################
# Process each source file, rewriting API functions with updated
# tracing information.
#
for $file (@ARGV) {
  $file_api = 0;
  $file_args = 0;

  # Ignore some files that do not need tracing macros
  unless ($file eq "H5FDmulti.c" or $file eq "src/H5FDmulti.c" or $file eq "H5FDstdio.c" or $file eq "src/H5FDstdio.c" or $file eq "src/H5TS.c" or $file eq "src/H5FDperform.c") {

    # Snarf up the entire file
    open SOURCE, $file or die "$file: $!\n";
    $Source = join "", <SOURCE>;
    close SOURCE;

    # Make a copy of the original data
    my $original = $Source;

    # Make modifications
    $Source =~ s/\n([A-Za-z]\w*(\s+[A-Za-z]\w*)*\s*\**)\n #type
                     (H5[A-Z]{0,2}_?[a-zA-Z0-9_]\w*)      #name
                     \s*\((.*?)\)\s*                      #args
                     (\{.*?\n\}[^\n]*)                    #body
                     /rewrite_func($file,$1,$3,$4,$5)/segx;

# If the source changed then print out the new version
    if ($original ne $Source) {
      printf "%s: instrumented %d API function%s and %d argument list%s\n",
             $file, $file_api, (1 == $file_api ? "" : "s"),
             $file_args, (1 == $file_args ? "" : "s");
      rename $file, "$file~" or die "unable to make backup";
      open SOURCE, ">$file" or die "unable to modify source";
      print SOURCE $Source;
      close SOURCE;

      $total_api += $file_api;
      $total_args += $file_args;
    }
  }
}

if ($found_errors eq 1) {
    printf "\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    printf "*** ERRORS FOUND *** ERRORS FOUND *** ERRORS FOUND ****\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    exit 1;
} else {
    printf "Finished processing HDF5 API calls:\n";
    printf "\tinstrumented %d API function%s and %d argument list%s\n",
             $total_api, (1 == $total_api ? "" : "s"),
             $total_args, (1 == $total_args ? "" : "s");
}