ITS#8731 Add the documentation and scripts

2025-01-30 13:30:57 +08:00 · 2019-02-15 16:47:33 +00:00 · 2019-02-15 16:47:33 +00:00 · e5b5232516
commit e5b5232516
parent a70b887e6c
8 changed files with 745 additions and 0 deletions
--- a/doc/devel/variadic_debug/03-libldap_Debug.cocci
+++ b/doc/devel/variadic_debug/03-libldap_Debug.cocci
@ -0,0 +1,70 @@
+using "equivalence.iso"
+
+@initialize:ocaml@
+@@
+// count the number of % characters in the format string
+let fmtn(fmt,n) =
+    List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1
+
+# replace osip_debug/oslocal_debug with Debug() macros first
+@@
+expression E;
+expression list args;
+@@
+(
+-osip_debug
+|
+-oslocal_debug
+)
+Debug
+ (
+-E,
+LDAP_DEBUG_TRACE,
+ args );
+
+// replace Debug( ..., arg1, arg2, 0 ) with Debug2( ..., arg1, arg2 )
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,2) };
+expression list[2] args;
+expression E;
+@@
+
+-Debug
+Debug2
+ ( E, _(fmt), args
+-, 0
+ );
+
+// replace Debug( ..., arg1, 0, 0 ) with Debug1()
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,1) };
+expression list[1] args;
+expression E;
+@@
+
+-Debug
+Debug1
+ ( E, _(fmt), args
+-, 0, 0
+ );
+
+// Zero-argument Debug() -> Debug0()
+@@
+expression E, S;
+@@
+
+-Debug
+Debug0
+ ( E, S
+-, 0, 0, 0
+ );
+
+// everything else is a regular 3-argument debug macro, replace with Debug3()
+@@
+expression E, S;
+expression list[3] args;
+@@
+
+-Debug
+Debug3
+ ( E, S, args );
--- a/doc/devel/variadic_debug/04-variadic.cocci
+++ b/doc/devel/variadic_debug/04-variadic.cocci
@ -0,0 +1,165 @@
+@initialize:ocaml@
+@@
+// count the number of % characters in the format string
+let fmtn(fmt,n) =
+    List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1
+
+@@
+identifier Logs =~ "Log[0-9]";
+@@
+-Logs
+Log
+
+@@
+@@
+-StatslogTest
+LogTest
+
+// Process two-argument Debug() macros with an extra zero
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,2) };
+expression list[2] args;
+expression E;
+@@
+
+Debug( E, fmt, args
+-, 0
+ );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,2) };
+expression list[2] args;
+expression E;
+@@
+
+Debug( E, fmt, args
+-, NULL
+ );
+
+// Single argument Debug() macros with two extra zeroes
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,1) };
+expression list[1] args;
+expression E;
+@@
+
+Debug( E, fmt, args
+-, 0, 0
+ );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,1) };
+expression list[1] args;
+expression E;
+@@
+
+Debug( E, fmt, args
+-, NULL, NULL
+ );
+
+// Debug() macros with no arguments just padded with zeroes
+@@
+expression E, S;
+@@
+
+Debug( E, S
+-, 0, 0, 0
+ );
+
+@@
+expression E, S;
+@@
+
+Debug( E, S
+-, NULL, NULL, NULL
+ );
+
+// Similar to above, just for Statslog
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,5) };
+expression list[5] args;
+expression E;
+@@
+
+-Statslog
+Debug
+ ( E, fmt, args );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,4) };
+expression list[4] args;
+expression E;
+@@
+
+-Statslog
+Debug
+ ( E, fmt, args
+-, 0
+ );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,3) };
+expression list[3] args;
+expression E;
+@@
+
+-Statslog
+Debug
+ ( E, fmt, args
+-, 0, 0
+ );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,2) };
+expression list[2] args;
+expression E;
+@@
+
+-Statslog
+Debug
+ ( E, fmt, args
+-, 0, 0, 0
+ );
+
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,1) };
+expression list[1] args;
+expression E;
+@@
+
+-Statslog
+Debug
+ ( E, fmt, args
+-, 0, 0, 0, 0
+ );
+
+@@
+expression E, S;
+@@
+
+-Statslog
+Debug
+ ( E, S
+-, 0, 0, 0, 0, 0
+ );
+
+// And StatslogEtime
+@@
+char[] fmt : script:ocaml() { fmtn(fmt,4) };
+expression list[4] args;
+expression E;
+@@
+
+StatslogEtime( E, fmt, args
+-, 0
+ );
+
+@@
+identifier Stats =~ "^Statslog";
+@@
+(
+ StatslogEtime
+|
+-Stats
+Debug
+)
--- a/doc/devel/variadic_debug/07-shortcut.cocci
+++ b/doc/devel/variadic_debug/07-shortcut.cocci
@ -0,0 +1,216 @@
+// Splice string `s` into the format string `fmtstring` replacing the
+// %-parameter at position `pos`
+@initialize:python@
+@@
+
+# regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
+import re
+fmtstring = '''\
+(                                  # start of capture group 1
+%                                  # literal "%"
+(?:                                # first option
+(?:[-+0 #]{0,5})                   # optional flags
+(?:\d+|\*)?                        # width
+(?:\.(?:\d+|\*))?                  # precision
+(?:h|l|ll|w|I|I32|I64)?            # size
+[cCdiouxXeEfgGaAnpsSZ]             # type
+) |                                # OR
+%%)                                # literal "%%"
+'''
+
+regex = re.compile(fmtstring, re.X)
+
+def parse_format(f):
+    return tuple((m.span(), m.group()) for m in
+        regex.finditer(f))
+
+def insert_at_pos(fmt, s, pos):
+    formats = parse_format(fmt)
+    span, format = formats[pos]
+    acc = fmt[:span[0]]
+    if s.startswith('"'):
+        acc += s[1:]
+    else:
+        acc += '" '
+        acc += s
+    if acc.endswith('"'):
+        acc = acc[:-1] + fmt[span[1]:]
+    else:
+        acc += ' "'
+        acc += fmt[span[1]:]
+    return acc
+
+// rest of the file implements the same as 09-merge.cocci
+// The main difference is that we only match on snprintf and Debug that are
+// directly adjacent, not based on control flow information which trips
+// coccinelle's model-checker
+@shortcut@
+identifier buf;
+expression E, L;
+expression list args_before, args, args_after;
+expression format1, format2;
+position p1, p2;
+@@
+
+snprintf@p1( buf, E, format1, args );
+Debug@p2( L, format2, args_before, buf, args_after );
+
+// use insert_at_pos above to construct the new format-string
+@script:python shortcut_process@
+format1 << shortcut.format1;
+format2 << shortcut.format2;
+args_before << shortcut.args_before;
+merged;
+@@
+
+pos = len(args_before.elements)
+coccinelle.merged = insert_at_pos(format2, format1, pos)
+
+@shortcut_replace@
+position shortcut.p1, shortcut.p2;
+identifier shortcut_process.merged;
+
+identifier buf;
+expression E, L;
+expression list args_before, args, args_after;
+expression format1, format2;
+@@
+
+-snprintf@p1( buf, E, format1, args );
+-Debug@p2( L, format2, args_before, buf, args_after );
+Debug( L, merged, args_before, args, args_after );
+
+@shortcut_locked@
+identifier buf;
+expression E, L, lock;
+expression list args_before, args, args_after;
+expression format1, format2;
+position p1, p2;
+@@
+
+ldap_pvt_thread_mutex_lock(lock);
+snprintf@p1( buf, E, format1, args );
+ldap_pvt_thread_mutex_unlock(lock);
+Debug@p2( L, format2, args_before, buf, args_after );
+
+// use insert_at_pos above to construct the new format-string
+@script:python shortcut_locked_process@
+format1 << shortcut_locked.format1;
+format2 << shortcut_locked.format2;
+args_before << shortcut_locked.args_before;
+merged;
+@@
+
+pos = len(args_before.elements)
+coccinelle.merged = insert_at_pos(format2, format1, pos)
+
+@shortcut_locked_replace@
+position shortcut_locked.p1, shortcut_locked.p2;
+identifier shortcut_locked_process.merged;
+
+identifier buf;
+expression E, L, lock;
+expression list args_before, args, args_after;
+expression format1, format2;
+@@
+
+ldap_pvt_thread_mutex_lock(lock);
+-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
+ldap_pvt_thread_mutex_unlock(lock);
+-Debug@p2( L, format2, args_before, buf, args_after );
+
+// so long as we don't reference 'buf' afterwards, no need to keep it defined.
+// A lot of pattern-matching is spelled out explicitly to work around the fact
+// that the state space doesn't get compressed otherwise.
+@@
+type T;
+identifier buf, id;
+expression E, lock;
+initializer I;
+@@
+{
+-\( T buf = I; \| T buf; \)
+(
+ ldap_pvt_thread_mutex_lock(lock);
+|
+)
+(
+ Debug( ... );
+&
+ ... when != buf
+)
+(
+ ldap_pvt_thread_mutex_unlock(lock);
+|
+)
+(
+|
+ continue;
+|
+ break;
+|
+ goto id;
+|
+ \(
+  return E;
+ \&
+  ... when != buf
+ \)
+)
+}
+
+// the rest identifies and removes a (newly-)reduntant LogTest check
+@if_guard@
+position p;
+statement s;
+@@
+
+(
+ if ( ... ) {@p 
+  Debug( ... );
+ } else s
+|
+ if ( ... ) {@p
+  Debug( ... );
+ }
+)
+
+@else_guard@
+position p;
+statement s;
+@@
+
+if ( ... ) s
+else {@p
+ Debug( ... );
+}
+
+@loop_guard@
+position p;
+@@
+
+(
+ while ( ... ) {@p
+  Debug( ... );
+ }
+|
+ for ( ...;...;... ) {@p
+  Debug( ... );
+ }
+)
+
+@@
+position p != { if_guard.p , else_guard.p, loop_guard.p };
+@@
+-{@p
+ Debug( ... );
+-}
+
+@useless_if@
+expression L;
+@@
+
+-if ( LogTest( L ) ) {
+ Debug( L, ... );
+-}
--- a/doc/devel/variadic_debug/09-merge.cocci
+++ b/doc/devel/variadic_debug/09-merge.cocci
@ -0,0 +1,147 @@
+// Note that this file has not actually been used in the end, since
+// 07-shortcut.cocci covers everything we needed in the project, but being
+// simpler, it makes the intent of 07-shortcut.cocci clearer
+
+
+// Splice string `s` into the format string `fmtstring` replacing the
+// %-parameter at position `pos`
+@initialize:python@
+@@
+
+#regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
+import re
+fmtstring = '''\
+(                                  # start of capture group 1
+%                                  # literal "%"
+(?:                                # first option
+(?:[-+0 #]{0,5})                   # optional flags
+(?:\d+|\*)?                        # width
+(?:\.(?:\d+|\*))?                  # precision
+(?:h|l|ll|w|I|I32|I64)?            # size
+[cCdiouxXeEfgGaAnpsSZ]             # type
+) |                                # OR
+%%)                                # literal "%%"
+'''
+
+regex = re.compile(fmtstring, re.X)
+
+def parse_format(f):
+    return tuple((m.span(), m.group()) for m in
+        regex.finditer(f))
+
+def insert_at_pos(fmt, s, pos):
+    formats = parse_format(fmt)
+    span, format = formats[pos]
+    acc = fmt[:span[0]]
+    if s.startswith('"'):
+        acc += s[1:]
+    else:
+        acc += '" '
+        acc += s
+    if acc.endswith('"'):
+        acc = acc[:-1] + fmt[span[1]:]
+    else:
+        acc += ' "'
+        acc += fmt[span[1]:]
+    return acc
+
+// Identify the redundant snprintfs (within a locked region)
+@a exists@
+expression lock, E, L;
+expression list args_before, args, args_after;
+identifier buf;
+expression format1, format2;
+type T;
+position p1, p2;
+@@
+
+{
+...
+T buf;
+...
+ldap_pvt_thread_mutex_lock(lock);
+...
+snprintf@p1( buf, E, format1, args );
+...
+ldap_pvt_thread_mutex_unlock(lock);
+...
+Debug@p2( L, format2, args_before, buf, args_after );
+...
+}
+
+// Merge the format strings with insert_at_pos above
+@script:python a_process@
+format1 << a.format1;
+format2 << a.format2;
+args_before << a.args_before;
+merged;
+@@
+
+pos = len(args_before.elements)
+coccinelle.merged = insert_at_pos(format2, format1, pos)
+
+// And merge the two together, replacing the extra buffer that's not used anymore
+@a_replace@
+position a.p1, a.p2;
+identifier a_process.merged;
+
+expression lock, E, L;
+expression list args_before, args, args_after;
+identifier buf;
+expression format1, format2;
+type T;
+@@
+
+{
+...
+-T buf;
+...
+ldap_pvt_thread_mutex_lock(lock);
+...
+-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
+...
+ldap_pvt_thread_mutex_unlock(lock);
+...
+-Debug@p2( L, format2, args_before, buf, args_after );
+...
+}
+
+// Once again (same as the 'a' series above, but those that remain to be sorted
+// now don't need to stay within a locked region
+@b exists@
+expression E, L;
+expression list args_before, args, args_after;
+identifier buf;
+expression format1, format2;
+position p1, p2;
+@@
+
+snprintf@p1( buf, E, format1, args );
+...
+Debug@p2( L, format2, args_before, buf, args_after );
+
+@script:python b_process@
+format1 << b.format1;
+format2 << b.format2;
+args_before << b.args_before;
+merged;
+@@
+
+pos = len(args_before.elements)
+coccinelle.merged = insert_at_pos(format2, format1, pos)
+
+@b_replace@
+position b.p1, b.p2;
+identifier b_process.merged;
+
+expression E, L;
+expression list args_before, args, args_after;
+identifier buf;
+expression format1, format2;
+@@
+
+-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
+...
+-Debug@p2( L, format2, args_before, buf, args_after );
--- a/doc/devel/variadic_debug/README
+++ b/doc/devel/variadic_debug/README
@ -0,0 +1,39 @@
+Most of the project now depends on the compiler supporting C99 variadic
+macros. This is used in the Debug() macro everywhere except libldap and
+its dependencies.
+
+From now on, any time Debug( level, fmt, args... ) is used, you can and
+should provide the appropriate number of arguments. The coccinelle
+patches in this directory implement the transformations used to bring
+the project in line with this.
+
+As we still aim to support libldap on platforms that only provide C89,
+Debug0/1/2/3 macros are used instead.
+
+If you need to adapt your own fork, see ITS#8731, the rest of this
+README and scripts in this directory on what you'll need to achieve
+this.
+
+Coccinelle as of git hash e65a7bdc04ac9122acdae2353422c5736b7998ba from
+https://github.com/coccinelle/coccinelle has been used to run the
+transformations performed. One notable limitation at the time of writing
+is that multi-part (format) strings are always merged onto the same line.
+
+Some sources cannot be processed, nssov overlay being a prime example,
+being wrapped in non-trivial macros.
+
+The following semantic patches are involved:
+- 03-libldap_Debug.cocci: converts the libraries to use the Debug[0123]
+  macros as appropriate
+- 04-variadic.cocci: converts the rest of the project to use the Debug
+  macro with the right number of arguments (as opposed to padding with
+  zeroes)
+- 09-merge.cocci will merge an 'snprintf(s, len, "fmt", args...);
+  Debug(level, "... %s ...", ..., s, ...);' sequence together
+- 07-shortcut.cocci is actually used to apply the above since
+  coccinelle's model-checker seems to struggle with state space
+  explosion in some of the very long and complex functions we have - 
+  09-merge.cocci doesn't finish in any reasonable time
+
+The equivalence.iso and macros.h files aid coccinelle to parse our
+sources correctly and simplify the semantic patches.
--- a/doc/devel/variadic_debug/equivalence.iso
+++ b/doc/devel/variadic_debug/equivalence.iso
@ -0,0 +1,12 @@
+Expression
+@ NULL @
+@@
+
+NULL <=> 0
+
+Expression
+@ underscore_func @
+expression E;
+@@
+
+_(E) => E
--- a/doc/devel/variadic_debug/macros.h
+++ b/doc/devel/variadic_debug/macros.h
@ -0,0 +1,23 @@
+#define LDAP_PF_LOCAL_SENDMSG_ARG(x)
+
+#define LDAP_P(x) x
+#define LDAP_F(x) extern x
+#define LDAP_V(x) extern x
+
+#define LDAP_GCCATTR(x)
+#define LDAP_XSTRING(x) ""
+#define LDAP_CONCAT(x,y) x
+
+#define LDAP_CONST const
+#define LDAP_BEGIN_DECL
+#define LDAP_END_DECL
+
+#define SLAP_EVENT_DECL
+#define SLAP_EVENT_FNAME
+
+/* contrib/slapd-modules/smbk5pwd/smbk5pwd.c */
+#define HDB int*
+
+#define BACKSQL_ARBITRARY_KEY
+#define BACKSQL_IDNUMFMT "%llu"
+#define BACKSQL_IDFMT "%s"
--- a/doc/devel/variadic_debug/script.sh
+++ b/doc/devel/variadic_debug/script.sh
@ -0,0 +1,73 @@
+#!/bin/bash
+
+set -e
+
+PATCH_DIR=doc/devel/variadic_debug
+
+SPATCH=${SPATCH:-spatch}
+SPATCH_OPTS=( --macro-file-builtins "$PATCH_DIR/macros.h" )
+#SPATCH_OPTS+=( --timeout 300 )
+
+SED_TRANSFORMATIONS=()
+
+# strip trailing whitespace copied from source on affected lines
+SED_TRANSFORMATIONS+=( -e 's/^\(+.*\)\s\+$/\1/' )
+
+# split out multipart strings back to original form (one per line)
+SED_TRANSFORMATIONS+=( -e 's/^\(+\s*\)\(.*"\) \(".*\)"$/\1\2\n+\1\3/' )
+
+# re-add whitespace around parentheses
+SED_TRANSFORMATIONS+=( -e 's/^\(+.*Debug[0-3]\?(\)\s*$/\1 /' )
+SED_TRANSFORMATIONS+=( -e 's/^\(+.*[^ ]\));$/\1 );/' )
+
+# fix whitespace errors in source we touch
+SED_TRANSFORMATIONS+=( -e 's/^\(+.*\)    \t/\1\t\t/' )
+SED_TRANSFORMATIONS+=( -e 's/^\(+\t*\) \{1,3\}\t/\1\t/' )
+
+normalise() {
+    patch="$1"
+    shift
+
+    # iterate until we've reached fixpoint
+    while ! cmp "$patch" "${patch}.new"; do
+        if [ -e "${patch}.new" ]; then
+            mv -- "${patch}.new" "$patch"
+        fi
+        sed "${SED_TRANSFORMATIONS[@]}" -- "$patch" >"${patch}.new"
+    done
+    rediff "$patch" >"${patch}.new"
+    mv -- "${patch}.new" "$patch"
+}
+
+git add "$PATCH_DIR"
+git commit -m "ITS#8731 Add the documentation and scripts"
+
+git am "$PATCH_DIR/00-fixes.patch"
+git am "$PATCH_DIR/01-logging.patch"
+git am "$PATCH_DIR/02-manual.patch"
+
+$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/03-libldap_Debug.cocci" \
+    -dir libraries/libldap \
+    >"$PATCH_DIR/03-libldap_Debug.patch"
+normalise "$PATCH_DIR/03-libldap_Debug.patch"
+git apply --index --directory libraries/libldap "$PATCH_DIR/03-libldap_Debug.patch"
+git commit -m "ITS#8731 Apply $PATCH_DIR/03-libldap_Debug.cocci"
+
+$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/04-variadic.cocci" \
+    -dir . \
+    >"$PATCH_DIR/04-variadic.patch"
+normalise "$PATCH_DIR/04-variadic.patch"
+git apply --index "$PATCH_DIR/04-variadic.patch"
+git commit -m "ITS#8731 Apply $PATCH_DIR/04-variadic.cocci"
+
+git am "$PATCH_DIR/05-back-sql.patch"
+git am "$PATCH_DIR/06-nssov.patch"
+
+$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/07-shortcut.cocci" \
+    -dir . \
+    >"$PATCH_DIR/07-shortcut.patch"
+normalise "$PATCH_DIR/07-shortcut.patch"
+git apply --index "$PATCH_DIR/07-shortcut.patch"
+git commit -m "ITS#8731 Apply $PATCH_DIR/07-shortcut.cocci"
+
+git am "$PATCH_DIR/08-snprintf-manual.patch"