ITS#8731 Add the documentation and scripts

This commit is contained in:
Ondřej Kuzník 2019-02-15 16:47:33 +00:00
parent a70b887e6c
commit e5b5232516
8 changed files with 745 additions and 0 deletions

View File

@ -0,0 +1,70 @@
using "equivalence.iso"
@initialize:ocaml@
@@
// count the number of % characters in the format string
let fmtn(fmt,n) =
List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1
# replace osip_debug/oslocal_debug with Debug() macros first
@@
expression E;
expression list args;
@@
(
-osip_debug
|
-oslocal_debug
)
+Debug
(
-E,
+LDAP_DEBUG_TRACE,
args );
// replace Debug( ..., arg1, arg2, 0 ) with Debug2( ..., arg1, arg2 )
@@
char[] fmt : script:ocaml() { fmtn(fmt,2) };
expression list[2] args;
expression E;
@@
-Debug
+Debug2
( E, _(fmt), args
-, 0
);
// replace Debug( ..., arg1, 0, 0 ) with Debug1()
@@
char[] fmt : script:ocaml() { fmtn(fmt,1) };
expression list[1] args;
expression E;
@@
-Debug
+Debug1
( E, _(fmt), args
-, 0, 0
);
// Zero-argument Debug() -> Debug0()
@@
expression E, S;
@@
-Debug
+Debug0
( E, S
-, 0, 0, 0
);
// everything else is a regular 3-argument debug macro, replace with Debug3()
@@
expression E, S;
expression list[3] args;
@@
-Debug
+Debug3
( E, S, args );

View File

@ -0,0 +1,165 @@
@initialize:ocaml@
@@
// count the number of % characters in the format string
let fmtn(fmt,n) =
List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1
@@
identifier Logs =~ "Log[0-9]";
@@
-Logs
+Log
@@
@@
-StatslogTest
+LogTest
// Process two-argument Debug() macros with an extra zero
@@
char[] fmt : script:ocaml() { fmtn(fmt,2) };
expression list[2] args;
expression E;
@@
Debug( E, fmt, args
-, 0
);
@@
char[] fmt : script:ocaml() { fmtn(fmt,2) };
expression list[2] args;
expression E;
@@
Debug( E, fmt, args
-, NULL
);
// Single argument Debug() macros with two extra zeroes
@@
char[] fmt : script:ocaml() { fmtn(fmt,1) };
expression list[1] args;
expression E;
@@
Debug( E, fmt, args
-, 0, 0
);
@@
char[] fmt : script:ocaml() { fmtn(fmt,1) };
expression list[1] args;
expression E;
@@
Debug( E, fmt, args
-, NULL, NULL
);
// Debug() macros with no arguments just padded with zeroes
@@
expression E, S;
@@
Debug( E, S
-, 0, 0, 0
);
@@
expression E, S;
@@
Debug( E, S
-, NULL, NULL, NULL
);
// Similar to above, just for Statslog
@@
char[] fmt : script:ocaml() { fmtn(fmt,5) };
expression list[5] args;
expression E;
@@
-Statslog
+Debug
( E, fmt, args );
@@
char[] fmt : script:ocaml() { fmtn(fmt,4) };
expression list[4] args;
expression E;
@@
-Statslog
+Debug
( E, fmt, args
-, 0
);
@@
char[] fmt : script:ocaml() { fmtn(fmt,3) };
expression list[3] args;
expression E;
@@
-Statslog
+Debug
( E, fmt, args
-, 0, 0
);
@@
char[] fmt : script:ocaml() { fmtn(fmt,2) };
expression list[2] args;
expression E;
@@
-Statslog
+Debug
( E, fmt, args
-, 0, 0, 0
);
@@
char[] fmt : script:ocaml() { fmtn(fmt,1) };
expression list[1] args;
expression E;
@@
-Statslog
+Debug
( E, fmt, args
-, 0, 0, 0, 0
);
@@
expression E, S;
@@
-Statslog
+Debug
( E, S
-, 0, 0, 0, 0, 0
);
// And StatslogEtime
@@
char[] fmt : script:ocaml() { fmtn(fmt,4) };
expression list[4] args;
expression E;
@@
StatslogEtime( E, fmt, args
-, 0
);
@@
identifier Stats =~ "^Statslog";
@@
(
StatslogEtime
|
-Stats
+Debug
)

View File

@ -0,0 +1,216 @@
// Splice string `s` into the format string `fmtstring` replacing the
// %-parameter at position `pos`
@initialize:python@
@@
# regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
import re
fmtstring = '''\
( # start of capture group 1
% # literal "%"
(?: # first option
(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|l|ll|w|I|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZ] # type
) | # OR
%%) # literal "%%"
'''
regex = re.compile(fmtstring, re.X)
def parse_format(f):
return tuple((m.span(), m.group()) for m in
regex.finditer(f))
def insert_at_pos(fmt, s, pos):
formats = parse_format(fmt)
span, format = formats[pos]
acc = fmt[:span[0]]
if s.startswith('"'):
acc += s[1:]
else:
acc += '" '
acc += s
if acc.endswith('"'):
acc = acc[:-1] + fmt[span[1]:]
else:
acc += ' "'
acc += fmt[span[1]:]
return acc
// rest of the file implements the same as 09-merge.cocci
// The main difference is that we only match on snprintf and Debug that are
// directly adjacent, not based on control flow information which trips
// coccinelle's model-checker
@shortcut@
identifier buf;
expression E, L;
expression list args_before, args, args_after;
expression format1, format2;
position p1, p2;
@@
snprintf@p1( buf, E, format1, args );
Debug@p2( L, format2, args_before, buf, args_after );
// use insert_at_pos above to construct the new format-string
@script:python shortcut_process@
format1 << shortcut.format1;
format2 << shortcut.format2;
args_before << shortcut.args_before;
merged;
@@
pos = len(args_before.elements)
coccinelle.merged = insert_at_pos(format2, format1, pos)
@shortcut_replace@
position shortcut.p1, shortcut.p2;
identifier shortcut_process.merged;
identifier buf;
expression E, L;
expression list args_before, args, args_after;
expression format1, format2;
@@
-snprintf@p1( buf, E, format1, args );
-Debug@p2( L, format2, args_before, buf, args_after );
+Debug( L, merged, args_before, args, args_after );
@shortcut_locked@
identifier buf;
expression E, L, lock;
expression list args_before, args, args_after;
expression format1, format2;
position p1, p2;
@@
ldap_pvt_thread_mutex_lock(lock);
snprintf@p1( buf, E, format1, args );
ldap_pvt_thread_mutex_unlock(lock);
Debug@p2( L, format2, args_before, buf, args_after );
// use insert_at_pos above to construct the new format-string
@script:python shortcut_locked_process@
format1 << shortcut_locked.format1;
format2 << shortcut_locked.format2;
args_before << shortcut_locked.args_before;
merged;
@@
pos = len(args_before.elements)
coccinelle.merged = insert_at_pos(format2, format1, pos)
@shortcut_locked_replace@
position shortcut_locked.p1, shortcut_locked.p2;
identifier shortcut_locked_process.merged;
identifier buf;
expression E, L, lock;
expression list args_before, args, args_after;
expression format1, format2;
@@
ldap_pvt_thread_mutex_lock(lock);
-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
ldap_pvt_thread_mutex_unlock(lock);
-Debug@p2( L, format2, args_before, buf, args_after );
// so long as we don't reference 'buf' afterwards, no need to keep it defined.
// A lot of pattern-matching is spelled out explicitly to work around the fact
// that the state space doesn't get compressed otherwise.
@@
type T;
identifier buf, id;
expression E, lock;
initializer I;
@@
{
-\( T buf = I; \| T buf; \)
(
ldap_pvt_thread_mutex_lock(lock);
|
)
(
Debug( ... );
&
... when != buf
)
(
ldap_pvt_thread_mutex_unlock(lock);
|
)
(
|
continue;
|
break;
|
goto id;
|
\(
return E;
\&
... when != buf
\)
)
}
// the rest identifies and removes a (newly-)reduntant LogTest check
@if_guard@
position p;
statement s;
@@
(
if ( ... ) {@p
Debug( ... );
} else s
|
if ( ... ) {@p
Debug( ... );
}
)
@else_guard@
position p;
statement s;
@@
if ( ... ) s
else {@p
Debug( ... );
}
@loop_guard@
position p;
@@
(
while ( ... ) {@p
Debug( ... );
}
|
for ( ...;...;... ) {@p
Debug( ... );
}
)
@@
position p != { if_guard.p , else_guard.p, loop_guard.p };
@@
-{@p
Debug( ... );
-}
@useless_if@
expression L;
@@
-if ( LogTest( L ) ) {
Debug( L, ... );
-}

View File

@ -0,0 +1,147 @@
// Note that this file has not actually been used in the end, since
// 07-shortcut.cocci covers everything we needed in the project, but being
// simpler, it makes the intent of 07-shortcut.cocci clearer
// Splice string `s` into the format string `fmtstring` replacing the
// %-parameter at position `pos`
@initialize:python@
@@
#regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
import re
fmtstring = '''\
( # start of capture group 1
% # literal "%"
(?: # first option
(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|l|ll|w|I|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZ] # type
) | # OR
%%) # literal "%%"
'''
regex = re.compile(fmtstring, re.X)
def parse_format(f):
return tuple((m.span(), m.group()) for m in
regex.finditer(f))
def insert_at_pos(fmt, s, pos):
formats = parse_format(fmt)
span, format = formats[pos]
acc = fmt[:span[0]]
if s.startswith('"'):
acc += s[1:]
else:
acc += '" '
acc += s
if acc.endswith('"'):
acc = acc[:-1] + fmt[span[1]:]
else:
acc += ' "'
acc += fmt[span[1]:]
return acc
// Identify the redundant snprintfs (within a locked region)
@a exists@
expression lock, E, L;
expression list args_before, args, args_after;
identifier buf;
expression format1, format2;
type T;
position p1, p2;
@@
{
...
T buf;
...
ldap_pvt_thread_mutex_lock(lock);
...
snprintf@p1( buf, E, format1, args );
...
ldap_pvt_thread_mutex_unlock(lock);
...
Debug@p2( L, format2, args_before, buf, args_after );
...
}
// Merge the format strings with insert_at_pos above
@script:python a_process@
format1 << a.format1;
format2 << a.format2;
args_before << a.args_before;
merged;
@@
pos = len(args_before.elements)
coccinelle.merged = insert_at_pos(format2, format1, pos)
// And merge the two together, replacing the extra buffer that's not used anymore
@a_replace@
position a.p1, a.p2;
identifier a_process.merged;
expression lock, E, L;
expression list args_before, args, args_after;
identifier buf;
expression format1, format2;
type T;
@@
{
...
-T buf;
...
ldap_pvt_thread_mutex_lock(lock);
...
-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
...
ldap_pvt_thread_mutex_unlock(lock);
...
-Debug@p2( L, format2, args_before, buf, args_after );
...
}
// Once again (same as the 'a' series above, but those that remain to be sorted
// now don't need to stay within a locked region
@b exists@
expression E, L;
expression list args_before, args, args_after;
identifier buf;
expression format1, format2;
position p1, p2;
@@
snprintf@p1( buf, E, format1, args );
...
Debug@p2( L, format2, args_before, buf, args_after );
@script:python b_process@
format1 << b.format1;
format2 << b.format2;
args_before << b.args_before;
merged;
@@
pos = len(args_before.elements)
coccinelle.merged = insert_at_pos(format2, format1, pos)
@b_replace@
position b.p1, b.p2;
identifier b_process.merged;
expression E, L;
expression list args_before, args, args_after;
identifier buf;
expression format1, format2;
@@
-snprintf@p1( buf, E, format1, args );
+Debug( L, merged, args_before, args, args_after );
...
-Debug@p2( L, format2, args_before, buf, args_after );

View File

@ -0,0 +1,39 @@
Most of the project now depends on the compiler supporting C99 variadic
macros. This is used in the Debug() macro everywhere except libldap and
its dependencies.
From now on, any time Debug( level, fmt, args... ) is used, you can and
should provide the appropriate number of arguments. The coccinelle
patches in this directory implement the transformations used to bring
the project in line with this.
As we still aim to support libldap on platforms that only provide C89,
Debug0/1/2/3 macros are used instead.
If you need to adapt your own fork, see ITS#8731, the rest of this
README and scripts in this directory on what you'll need to achieve
this.
Coccinelle as of git hash e65a7bdc04ac9122acdae2353422c5736b7998ba from
https://github.com/coccinelle/coccinelle has been used to run the
transformations performed. One notable limitation at the time of writing
is that multi-part (format) strings are always merged onto the same line.
Some sources cannot be processed, nssov overlay being a prime example,
being wrapped in non-trivial macros.
The following semantic patches are involved:
- 03-libldap_Debug.cocci: converts the libraries to use the Debug[0123]
macros as appropriate
- 04-variadic.cocci: converts the rest of the project to use the Debug
macro with the right number of arguments (as opposed to padding with
zeroes)
- 09-merge.cocci will merge an 'snprintf(s, len, "fmt", args...);
Debug(level, "... %s ...", ..., s, ...);' sequence together
- 07-shortcut.cocci is actually used to apply the above since
coccinelle's model-checker seems to struggle with state space
explosion in some of the very long and complex functions we have -
09-merge.cocci doesn't finish in any reasonable time
The equivalence.iso and macros.h files aid coccinelle to parse our
sources correctly and simplify the semantic patches.

View File

@ -0,0 +1,12 @@
Expression
@ NULL @
@@
NULL <=> 0
Expression
@ underscore_func @
expression E;
@@
_(E) => E

View File

@ -0,0 +1,23 @@
#define LDAP_PF_LOCAL_SENDMSG_ARG(x)
#define LDAP_P(x) x
#define LDAP_F(x) extern x
#define LDAP_V(x) extern x
#define LDAP_GCCATTR(x)
#define LDAP_XSTRING(x) ""
#define LDAP_CONCAT(x,y) x
#define LDAP_CONST const
#define LDAP_BEGIN_DECL
#define LDAP_END_DECL
#define SLAP_EVENT_DECL
#define SLAP_EVENT_FNAME
/* contrib/slapd-modules/smbk5pwd/smbk5pwd.c */
#define HDB int*
#define BACKSQL_ARBITRARY_KEY
#define BACKSQL_IDNUMFMT "%llu"
#define BACKSQL_IDFMT "%s"

View File

@ -0,0 +1,73 @@
#!/bin/bash
set -e
PATCH_DIR=doc/devel/variadic_debug
SPATCH=${SPATCH:-spatch}
SPATCH_OPTS=( --macro-file-builtins "$PATCH_DIR/macros.h" )
#SPATCH_OPTS+=( --timeout 300 )
SED_TRANSFORMATIONS=()
# strip trailing whitespace copied from source on affected lines
SED_TRANSFORMATIONS+=( -e 's/^\(+.*\)\s\+$/\1/' )
# split out multipart strings back to original form (one per line)
SED_TRANSFORMATIONS+=( -e 's/^\(+\s*\)\(.*"\) \(".*\)"$/\1\2\n+\1\3/' )
# re-add whitespace around parentheses
SED_TRANSFORMATIONS+=( -e 's/^\(+.*Debug[0-3]\?(\)\s*$/\1 /' )
SED_TRANSFORMATIONS+=( -e 's/^\(+.*[^ ]\));$/\1 );/' )
# fix whitespace errors in source we touch
SED_TRANSFORMATIONS+=( -e 's/^\(+.*\) \t/\1\t\t/' )
SED_TRANSFORMATIONS+=( -e 's/^\(+\t*\) \{1,3\}\t/\1\t/' )
normalise() {
patch="$1"
shift
# iterate until we've reached fixpoint
while ! cmp "$patch" "${patch}.new"; do
if [ -e "${patch}.new" ]; then
mv -- "${patch}.new" "$patch"
fi
sed "${SED_TRANSFORMATIONS[@]}" -- "$patch" >"${patch}.new"
done
rediff "$patch" >"${patch}.new"
mv -- "${patch}.new" "$patch"
}
git add "$PATCH_DIR"
git commit -m "ITS#8731 Add the documentation and scripts"
git am "$PATCH_DIR/00-fixes.patch"
git am "$PATCH_DIR/01-logging.patch"
git am "$PATCH_DIR/02-manual.patch"
$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/03-libldap_Debug.cocci" \
-dir libraries/libldap \
>"$PATCH_DIR/03-libldap_Debug.patch"
normalise "$PATCH_DIR/03-libldap_Debug.patch"
git apply --index --directory libraries/libldap "$PATCH_DIR/03-libldap_Debug.patch"
git commit -m "ITS#8731 Apply $PATCH_DIR/03-libldap_Debug.cocci"
$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/04-variadic.cocci" \
-dir . \
>"$PATCH_DIR/04-variadic.patch"
normalise "$PATCH_DIR/04-variadic.patch"
git apply --index "$PATCH_DIR/04-variadic.patch"
git commit -m "ITS#8731 Apply $PATCH_DIR/04-variadic.cocci"
git am "$PATCH_DIR/05-back-sql.patch"
git am "$PATCH_DIR/06-nssov.patch"
$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/07-shortcut.cocci" \
-dir . \
>"$PATCH_DIR/07-shortcut.patch"
normalise "$PATCH_DIR/07-shortcut.patch"
git apply --index "$PATCH_DIR/07-shortcut.patch"
git commit -m "ITS#8731 Apply $PATCH_DIR/07-shortcut.cocci"
git am "$PATCH_DIR/08-snprintf-manual.patch"