Import Solar Designer's public domain MD5 for use by localedef.

Locale archives contain embedded MD5 hashes for integrity protection.
glibc's locale-reading code does not check these, but localedef does
generate them.  It was reusing crypt/md5.c for the implementation.
Rather than moving that file over to locale/, import Alexander
Peslyak (aka Solar Designer)'s public domain MD5 implementation, which
is simpler, and in particular, completely agnostic to endianness.  The
API uses different names, because Peslyak wanted to be API-compatible
with openssl, but is otherwise equivalent.

glibc's *tests* of the MD5 core (crypt/md5test.c and crypt/md5test-giant.c)
are transferred to the locale directory, and the new implementation is
verified to pass both.  (The "giant" test takes 90 seconds to run on a
2018-era x86; it was in xtests in crypt and it remains in xtests after
this patch.)  I converted both of them to the new test driver while I
was in there.

crypt/md5c-test.c is a test of MD5 *password hashing*, not of the MD5
core, so it is not moved.

This patch was compile-tested with both --enable-crypt and the default
--disable-crypt.
This commit is contained in:
Zack Weinberg 2023-09-21 14:45:59 -04:00
parent 5d00c201b9
commit f59011763c
9 changed files with 378 additions and 59 deletions

View File

@ -42,11 +42,7 @@ LDLIBS-crypt.so = -lfreebl3
else
libcrypt-routines += md5 sha256 sha512
tests += md5test sha256test sha512test
# The test md5test-giant uses up to 400 MB of RSS and runs on a fast
# machine over a minute.
xtests = md5test-giant
tests += sha256test sha512test
endif
include ../Rules
@ -56,8 +52,6 @@ md5-routines := md5 $(filter md5%,$(libcrypt-sysdep_routines))
sha256-routines := sha256 $(filter sha256%,$(libcrypt-sysdep_routines))
sha512-routines := sha512 $(filter sha512%,$(libcrypt-sysdep_routines))
$(objpfx)md5test: $(patsubst %, $(objpfx)%.o,$(md5-routines))
$(objpfx)md5test-giant: $(patsubst %, $(objpfx)%.o,$(md5-routines))
$(objpfx)sha256test: $(patsubst %, $(objpfx)%.o,$(sha256-routines))
$(objpfx)sha512test: $(patsubst %, $(objpfx)%.o,$(sha512-routines))
endif

View File

@ -27,7 +27,7 @@ headers = langinfo.h locale.h bits/locale.h \
routines = setlocale findlocale loadlocale loadarchive \
localeconv nl_langinfo nl_langinfo_l mb_cur_max \
newlocale duplocale freelocale uselocale
tests = tst-C-locale tst-locname tst-duplocale
tests = tst-C-locale tst-locname tst-duplocale tst-md5
tests-container = tst-localedef-path-norm
categories = ctype messages monetary numeric time paper name \
address telephone measurement identification collate
@ -38,28 +38,31 @@ others = localedef locale
install-bin = localedef locale
extra-objs = $(localedef-modules:=.o) $(localedef-aux:=.o) \
$(locale-modules:=.o) $(lib-modules:=.o)
generated += C-translit.h
generated += C-translit.h
before-compile += $(objpfx)C-translit.h
extra-libs = libBrokenLocale
extra-libs-others = $(extra-libs)
# This test requires multiple gigabytes of address space (not necessarily
# committed RAM) and takes 90s to run on a workstation-grade x86-64 CPU
# from 2018.
xtests = tst-md5-giant
libBrokenLocale-routines = broken_cur_max
subdir-dirs = programs
vpath %.c programs ../crypt
vpath %.c programs
vpath %.h programs
vpath %.gperf programs
localedef-modules := localedef $(categories:%=ld-%) \
charmap linereader locfile \
repertoire locarchive
localedef-aux := md5
repertoire locarchive md5
locale-modules := locale locale-spec
lib-modules := charmap-dir simple-hash xmalloc xstrdup \
record-status xasprintf
GPERF = gperf
GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C
@ -69,8 +72,6 @@ endif
include ../Rules
CFLAGS-md5.c += -I../crypt
programs/%-kw.h: programs/%-kw.gperf
cd programs \
&& $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new
@ -80,6 +81,7 @@ $(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o)
$(objpfx)localedef: $(localedef-aux:%=$(objpfx)%.o)
$(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
$(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
$(objpfx)tst-md5 $(objpfx)tst-md5-giant: $(objpfx)md5.o
$(objpfx)C-translit.h: C-translit.h.in gen-translit.py
$(make-target-directory)

View File

@ -62,7 +62,7 @@ struct namehashent
struct sumhashent
{
/* MD5 sum. */
char sum[16];
unsigned char sum[16];
/* Offset of the file in the archive. */
uint32_t file_offset;
};
@ -101,7 +101,7 @@ typedef struct locale_category_data
{
off64_t size;
void *addr;
char sum[16];
unsigned char sum[16];
} locale_data_t[__LC_LAST];
#endif /* locarchive.h */

281
locale/md5.c Normal file
View File

@ -0,0 +1,281 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* (This is a heavily cut-down "BSD license".)
*
* This differs from Colin Plumb's older public domain implementation in that
* no exactly 32-bit integer data type is required (any 32-bit or wider
* unsigned integer data type will do), there's no compile-time endianness
* configuration, and the function prototypes match OpenSSL's. No code from
* Colin Plumb's implementation has been reused; this comment merely compares
* the properties of the two independent implementations.
*
* The primary goals of this implementation are portability and ease of use.
* It is meant to be fast, but not as fast as possible. Some known
* optimizations are not included to reduce source code size and avoid
* compile-time configuration.
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "md5.h"
#include <stdlib.h>
#include <string.h>
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) (((x) ^ (y)) ^ (z))
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b);
/*
* SET reads 4 input bytes in little-endian byte order and stores them in a
* properly aligned word in host byte order.
*/
#define SET(n) \
(ctx->block[(n)] = \
(uint32_t)ptr[(n) * 4] | \
((uint32_t)ptr[(n) * 4 + 1] << 8) | \
((uint32_t)ptr[(n) * 4 + 2] << 16) | \
((uint32_t)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
(ctx->block[(n)])
/*
* This processes one or more 64-byte data blocks, but does NOT update the bit
* counters. There are no alignment requirements.
*/
static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
{
const unsigned char *ptr;
uint32_t a, b, c, d;
uint32_t saved_a, saved_b, saved_c, saved_d;
ptr = (const unsigned char *)data;
a = ctx->a;
b = ctx->b;
c = ctx->c;
d = ctx->d;
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
/* Round 1 */
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
/* Round 3 */
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while (size -= 64);
ctx->a = a;
ctx->b = b;
ctx->c = c;
ctx->d = d;
return ptr;
}
void MD5_Init(MD5_CTX *ctx)
{
ctx->a = 0x67452301;
ctx->b = 0xefcdab89;
ctx->c = 0x98badcfe;
ctx->d = 0x10325476;
ctx->buflen = 0;
}
void MD5_Update(MD5_CTX *ctx, const void *data, size_t size)
{
uint64_t old_buflen;
unsigned long used, available;
// Note: It does not matter if this addition overflows, because
// buflen is only used to compute the tail padding, and RFC 1321
// specifies that only the low 64 bits of the message length are
// used in the tail padding.
old_buflen = ctx->buflen;
ctx->buflen += size;
used = old_buflen & 0x3f;
if (used) {
available = 64 - used;
if (size < available) {
memcpy(&ctx->buffer[used], data, size);
return;
}
memcpy(&ctx->buffer[used], data, available);
data = (const unsigned char *)data + available;
size -= available;
body(ctx, ctx->buffer, 64);
}
if (size >= 64) {
data = body(ctx, data, size & ~(unsigned long)0x3f);
size &= 0x3f;
}
memcpy(ctx->buffer, data, size);
}
#define OUT(dst, src) \
(dst)[0] = (unsigned char)(src); \
(dst)[1] = (unsigned char)((src) >> 8); \
(dst)[2] = (unsigned char)((src) >> 16); \
(dst)[3] = (unsigned char)((src) >> 24);
void MD5_Final(uint8_t result[16], MD5_CTX *ctx)
{
unsigned long used, available;
used = ctx->buflen & 0x3f;
ctx->buffer[used++] = 0x80;
available = 64 - used;
if (available < 8) {
memset(&ctx->buffer[used], 0, available);
body(ctx, ctx->buffer, 64);
used = 0;
available = 64;
}
memset(&ctx->buffer[used], 0, available - 8);
OUT(&ctx->buffer[56], (ctx->buflen << 3) & ((UINT64_C(1) << 32) - 1));
OUT(&ctx->buffer[60], ctx->buflen >> 29);
body(ctx, ctx->buffer, 64);
OUT(&result[0], ctx->a)
OUT(&result[4], ctx->b)
OUT(&result[8], ctx->c)
OUT(&result[12], ctx->d)
}
void MD5_Buffer(const void *data, size_t size, uint8_t result[16])
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, data, size);
MD5_Final(result, &ctx);
}

45
locale/md5.h Normal file
View File

@ -0,0 +1,45 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* See md5.c for more information.
*/
#ifndef _LOCALE_PROGS_MD5_H
#define _LOCALE_PROGS_MD5_H 1
#include <stddef.h>
#include <stdint.h>
typedef struct {
uint64_t buflen;
uint32_t a, b, c, d;
uint32_t block[16];
uint8_t buffer[64];
} MD5_CTX;
extern void MD5_Init(MD5_CTX *ctx);
extern void MD5_Update(MD5_CTX *ctx, const void *data, size_t size);
extern void MD5_Final(uint8_t result[16], MD5_CTX *ctx);
extern void MD5_Buffer(const void *data, size_t size, uint8_t result[16]);
#endif /* alg-md5.h */

View File

@ -41,11 +41,11 @@
#include <libc-mmap.h>
#include <libc-pointer-arith.h>
#include "../../crypt/md5.h"
#include "../localeinfo.h"
#include "../locarchive.h"
#include "localedef.h"
#include "locfile.h"
#include "md5.h"
/* Define the hash function. We define the function as static inline.
We must change the name so as not to conflict with simple-hash.h. */
@ -499,8 +499,8 @@ enlarge_archive (struct locarhandle *ah, const struct locarhead *head)
old_data[idx].addr
= ((char *) ah->addr + GET (oldlocrec->record[idx].offset));
__md5_buffer (old_data[idx].addr, old_data[idx].size,
old_data[idx].sum);
MD5_Buffer (old_data[idx].addr, old_data[idx].size,
old_data[idx].sum);
}
if (cnt > 0 && oldlocrecarray[cnt - 1].locrec == oldlocrec)
@ -908,7 +908,7 @@ add_locale (struct locarhandle *ah,
memcpy (ptr, data[cnt].addr, data[cnt].size);
ptr += (data[cnt].size + 15) & -16;
}
__md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum);
MD5_Buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum);
/* For each locale category data set determine whether the same data
is already somewhere in the archive. */
@ -1501,7 +1501,7 @@ add_locales_to_archive (size_t nlist, char *list[], bool replace)
}
data[cnt].size = st.st_size;
__md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum);
MD5_Buffer (data[cnt].addr, st.st_size, data[cnt].sum);
/* We don't need the file descriptor anymore. */
close (fd);

View File

@ -30,11 +30,11 @@
#include <assert.h>
#include <wchar.h>
#include "../../crypt/md5.h"
#include "localedef.h"
#include "localeinfo.h"
#include "locfile.h"
#include "simple-hash.h"
#include "../md5.h"
#include "locfile-kw.h"
@ -738,8 +738,8 @@ write_locale_data (const char *output_path, int catidx, const char *category,
endp = mempcpy (endp, vec[cnt].iov_base, vec[cnt].iov_len);
/* Compute the MD5 sum for the data. */
__md5_buffer (to_archive[catidx].addr, to_archive[catidx].size,
to_archive[catidx].sum);
MD5_Buffer (to_archive[catidx].addr, to_archive[catidx].size,
to_archive[catidx].sum);
return;
}

View File

@ -21,15 +21,15 @@
#include <string.h>
#include <sys/mman.h>
#include <support/test-driver.h>
#include "md5.h"
/* This test will not work with 32-bit size_t, so let it succeed
there. */
/* This test will not work with 32-bit size_t. */
#if SIZE_MAX <= UINT32_MAX
static int
do_test (void)
{
return 0;
return EXIT_UNSUPPORTED;
}
#else
@ -58,7 +58,7 @@ static const struct test_data_s
};
static int
report (const char *id, const char *md5, size_t len, const char *ref)
report (const char *id, const uint8_t *md5, size_t len, const char *ref)
{
if (memcmp (md5, ref, 16))
{
@ -68,38 +68,34 @@ report (const char *id, const char *md5, size_t len, const char *ref)
return 0;
}
/* Test md5 in a single md5_process_bytes call. */
/* Test feeding the data to MD5_Update all at once. */
static int
test_single (void *buf, size_t len, const char *ref)
{
char sum[16];
struct md5_ctx ctx;
__md5_init_ctx (&ctx);
__md5_process_bytes (buf, len, &ctx);
__md5_finish_ctx (&ctx, sum);
uint8_t sum[16];
MD5_Buffer(buf, len, sum);
return report ("single", sum, len, ref);
}
/* Test md5 with two md5_process_bytes calls to trigger a
different path in md5_process_block for sizes > 2 GB. */
/* Test feeding the data in two chunks, first the initial 2GB and
then the rest. */
static int
test_double (void *buf, size_t len, const char *ref)
{
char sum[16];
struct md5_ctx ctx;
uint8_t sum[16];
MD5_CTX ctx;
__md5_init_ctx (&ctx);
MD5_Init (&ctx);
if (len >= CONST_2G)
{
__md5_process_bytes (buf, CONST_2G, &ctx);
__md5_process_bytes (buf + CONST_2G, len - CONST_2G, &ctx);
MD5_Update (&ctx, buf, CONST_2G);
MD5_Update (&ctx, buf + CONST_2G, len - CONST_2G);
}
else
__md5_process_bytes (buf, len, &ctx);
MD5_Update (&ctx, buf, len);
__md5_finish_ctx (&ctx, sum);
MD5_Final (sum, &ctx);
return report ("double", sum, len, ref);
}
@ -122,9 +118,9 @@ do_test (void)
for (j = 0; j < sizeof (test_data) / sizeof (struct test_data_s); j++)
{
if (test_single (buf, test_data[j].len, test_data[j].ref))
result = 1;
return 1;
if (test_double (buf, test_data[j].len, test_data[j].ref))
result = 1;
return 1;
}
return result;
@ -133,5 +129,4 @@ do_test (void)
/* This needs on a fast machine 90s. */
#define TIMEOUT 480
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
#include <support/test-driver.c>

View File

@ -25,11 +25,11 @@ static const struct
};
int
main (int argc, char *argv[])
static int
do_test(void)
{
struct md5_ctx ctx;
char sum[16];
MD5_CTX ctx;
unsigned char sum[16];
int result = 0;
int cnt;
@ -37,17 +37,19 @@ main (int argc, char *argv[])
{
int i;
__md5_init_ctx (&ctx);
__md5_process_bytes (tests[cnt].input, strlen (tests[cnt].input), &ctx);
__md5_finish_ctx (&ctx, sum);
MD5_Init (&ctx);
MD5_Update (&ctx, tests[cnt].input, strlen (tests[cnt].input));
MD5_Final (sum, &ctx);
result |= memcmp (tests[cnt].result, sum, 16);
__md5_init_ctx (&ctx);
MD5_Init (&ctx);
for (i = 0; tests[cnt].input[i] != '\0'; ++i)
__md5_process_bytes (&tests[cnt].input[i], 1, &ctx);
__md5_finish_ctx (&ctx, sum);
MD5_Update (&ctx, &tests[cnt].input[i], 1);
MD5_Final (sum, &ctx);
result |= memcmp (tests[cnt].result, sum, 16);
}
return result;
}
#include <support/test-driver.c>