openldap/libraries/liblmdb/mdb_load.c
Orivej Desh 00f635d304 ITS#8320 mdb_load: fix loading data from simple text files
mdb_load -T was supposed to read escaped text, but 21b51cb7 "Add mdb_load"
made it read hex.
2015-11-23 02:00:49 +00:00

457 lines
10 KiB
C

/* mdb_load.c - memory-mapped database load tool */
/*
* Copyright 2011-2015 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include "lmdb.h"
#define PRINT 1
#define NOHDR 2
static int mode;
static char *subname = NULL;
static size_t lineno;
static int version;
static int flags;
static char *prog;
static int Eof;
static MDB_envinfo info;
static MDB_val kbuf, dbuf;
#ifdef _WIN32
#define Z "I"
#else
#define Z "z"
#endif
#define STRLENOF(s) (sizeof(s)-1)
typedef struct flagbit {
int bit;
char *name;
int len;
} flagbit;
#define S(s) s, STRLENOF(s)
flagbit dbflags[] = {
{ MDB_REVERSEKEY, S("reversekey") },
{ MDB_DUPSORT, S("dupsort") },
{ MDB_INTEGERKEY, S("integerkey") },
{ MDB_DUPFIXED, S("dupfixed") },
{ MDB_INTEGERDUP, S("integerdup") },
{ MDB_REVERSEDUP, S("reversedup") },
{ 0, NULL, 0 }
};
static void readhdr(void)
{
char *ptr;
while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
lineno++;
if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
if (version > 3) {
fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n",
prog, lineno, version);
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
break;
} else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
mode |= PRINT;
else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
if (ptr) *ptr = '\0';
if (subname) free(subname);
subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
} else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
int i;
ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
if (ptr) *ptr = '\0';
i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
if (i != 1) {
fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
int i;
ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
if (ptr) *ptr = '\0';
i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize);
if (i != 1) {
fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
int i;
ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
if (ptr) *ptr = '\0';
i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
if (i != 1) {
fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
exit(EXIT_FAILURE);
}
} else {
int i;
for (i=0; dbflags[i].bit; i++) {
if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
flags |= dbflags[i].bit;
break;
}
}
if (!dbflags[i].bit) {
ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
if (!ptr) {
fprintf(stderr, "%s: line %" Z "d: unexpected format\n",
prog, lineno);
exit(EXIT_FAILURE);
} else {
*ptr = '\0';
fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n",
prog, lineno, (char *)dbuf.mv_data);
}
}
}
}
}
static void badend(void)
{
fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n",
prog, lineno);
}
static int unhex(unsigned char *c2)
{
int x, c;
x = *c2++ & 0x4f;
if (x & 0x40)
x -= 55;
c = x << 4;
x = *c2 & 0x4f;
if (x & 0x40)
x -= 55;
c |= x;
return c;
}
static int readline(MDB_val *out, MDB_val *buf)
{
unsigned char *c1, *c2, *end;
size_t len, l2;
int c;
if (!(mode & NOHDR)) {
c = fgetc(stdin);
if (c == EOF) {
Eof = 1;
return EOF;
}
if (c != ' ') {
lineno++;
if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
badend:
Eof = 1;
badend();
return EOF;
}
if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
return EOF;
goto badend;
}
}
if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
Eof = 1;
return EOF;
}
lineno++;
c1 = buf->mv_data;
len = strlen((char *)c1);
l2 = len;
/* Is buffer too short? */
while (c1[len-1] != '\n') {
buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
if (!buf->mv_data) {
Eof = 1;
fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n",
prog, lineno);
return EOF;
}
c1 = buf->mv_data;
c1 += l2;
if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
Eof = 1;
badend();
return EOF;
}
buf->mv_size *= 2;
len = strlen((char *)c1);
l2 += len;
}
c1 = c2 = buf->mv_data;
len = l2;
c1[--len] = '\0';
end = c1 + len;
if (mode & PRINT) {
while (c2 < end) {
if (*c2 == '\\') {
if (c2[1] == '\\') {
c1++; c2 += 2;
} else {
if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
Eof = 1;
badend();
return EOF;
}
*c1++ = unhex(++c2);
c2 += 2;
}
} else {
c1++; c2++;
}
}
} else {
/* odd length not allowed */
if (len & 1) {
Eof = 1;
badend();
return EOF;
}
while (c2 < end) {
if (!isxdigit(*c2) || !isxdigit(c2[1])) {
Eof = 1;
badend();
return EOF;
}
*c1++ = unhex(c2);
c2 += 2;
}
}
c2 = out->mv_data = buf->mv_data;
out->mv_size = c1 - c2;
return 0;
}
static void usage(void)
{
fprintf(stderr, "usage: %s [-V] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
int i, rc;
MDB_env *env;
MDB_txn *txn;
MDB_cursor *mc;
MDB_dbi dbi;
char *envname;
int envflags = 0, putflags = 0;
int dohdr = 0;
prog = argv[0];
if (argc < 2) {
usage();
}
/* -f: load file instead of stdin
* -n: use NOSUBDIR flag on env_open
* -s: load into named subDB
* -N: use NOOVERWRITE on puts
* -T: read plaintext
* -V: print version and exit
*/
while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) {
switch(i) {
case 'V':
printf("%s\n", MDB_VERSION_STRING);
exit(0);
break;
case 'f':
if (freopen(optarg, "r", stdin) == NULL) {
fprintf(stderr, "%s: %s: reopen: %s\n",
prog, optarg, strerror(errno));
exit(EXIT_FAILURE);
}
break;
case 'n':
envflags |= MDB_NOSUBDIR;
break;
case 's':
subname = strdup(optarg);
break;
case 'N':
putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
break;
case 'T':
mode |= NOHDR | PRINT;
break;
default:
usage();
}
}
if (optind != argc - 1)
usage();
dbuf.mv_size = 4096;
dbuf.mv_data = malloc(dbuf.mv_size);
if (!(mode & NOHDR))
readhdr();
envname = argv[optind];
rc = mdb_env_create(&env);
if (rc) {
fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
return EXIT_FAILURE;
}
mdb_env_set_maxdbs(env, 2);
if (info.me_maxreaders)
mdb_env_set_maxreaders(env, info.me_maxreaders);
if (info.me_mapsize)
mdb_env_set_mapsize(env, info.me_mapsize);
if (info.me_mapaddr)
envflags |= MDB_FIXEDMAP;
rc = mdb_env_open(env, envname, envflags, 0664);
if (rc) {
fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
kbuf.mv_data = malloc(kbuf.mv_size);
while(!Eof) {
MDB_val key, data;
int batch = 0;
flags = 0;
if (!dohdr) {
dohdr = 1;
} else if (!(mode & NOHDR))
readhdr();
rc = mdb_txn_begin(env, NULL, 0, &txn);
if (rc) {
fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
if (rc) {
fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
rc = mdb_cursor_open(txn, dbi, &mc);
if (rc) {
fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
while(1) {
rc = readline(&key, &kbuf);
if (rc) /* rc == EOF */
break;
rc = readline(&data, &dbuf);
if (rc) {
fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
goto txn_abort;
}
rc = mdb_cursor_put(mc, &key, &data, putflags);
if (rc == MDB_KEYEXIST && putflags)
continue;
if (rc) {
fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
batch++;
if (batch == 100) {
rc = mdb_txn_commit(txn);
if (rc) {
fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
prog, lineno, mdb_strerror(rc));
goto env_close;
}
rc = mdb_txn_begin(env, NULL, 0, &txn);
if (rc) {
fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
rc = mdb_cursor_open(txn, dbi, &mc);
if (rc) {
fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
batch = 0;
}
}
rc = mdb_txn_commit(txn);
txn = NULL;
if (rc) {
fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
prog, lineno, mdb_strerror(rc));
goto env_close;
}
mdb_dbi_close(env, dbi);
}
txn_abort:
mdb_txn_abort(txn);
env_close:
mdb_env_close(env);
return rc ? EXIT_FAILURE : EXIT_SUCCESS;
}