Add mdb_load

This commit is contained in:
Howard Chu 2014-06-20 08:49:59 -07:00
parent 3e83b5b27b
commit 21b51cb746
4 changed files with 487 additions and 7 deletions

View File

@ -29,8 +29,8 @@ prefix = /usr/local
IHDRS = lmdb.h IHDRS = lmdb.h
ILIBS = liblmdb.a liblmdb.so ILIBS = liblmdb.a liblmdb.so
IPROGS = mdb_stat mdb_copy mdb_dump IPROGS = mdb_stat mdb_copy mdb_dump mdb_load
IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1
PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5
all: $(ILIBS) $(PROGS) all: $(ILIBS) $(PROGS)
@ -57,6 +57,7 @@ liblmdb.so: mdb.o midl.o
mdb_stat: mdb_stat.o liblmdb.a mdb_stat: mdb_stat.o liblmdb.a
mdb_copy: mdb_copy.o liblmdb.a mdb_copy: mdb_copy.o liblmdb.a
mdb_dump: mdb_dump.o liblmdb.a mdb_dump: mdb_dump.o liblmdb.a
mdb_load: mdb_load.o liblmdb.a
mtest: mtest.o liblmdb.a mtest: mtest.o liblmdb.a
mtest2: mtest2.o liblmdb.a mtest2: mtest2.o liblmdb.a
mtest3: mtest3.o liblmdb.a mtest3: mtest3.o liblmdb.a

View File

@ -43,9 +43,13 @@ names will be listed, no data will be output.
Dump an LMDB database which does not use subdirectories. Dump an LMDB database which does not use subdirectories.
.TP .TP
.BR \-p .BR \-p
If characters in either the key or data items are printing characters (as defined by isprint(3)), output them directly. This option permits users to use standard text editors and tools to modify the contents of databases. If characters in either the key or data items are printing characters (as
defined by isprint(3)), output them directly. This option permits users to
use standard text editors and tools to modify the contents of databases.
Note: different systems may have different notions about what characters are considered printing characters, and databases dumped in this manner may be less portable to external systems. Note: different systems may have different notions about what characters
are considered printing characters, and databases dumped in this manner may
be less portable to external systems.
.TP .TP
.BR \-a .BR \-a
Dump all of the subdatabases in the environment. Dump all of the subdatabases in the environment.
@ -57,9 +61,10 @@ Exit status is zero if no errors occur.
Errors result in a non-zero exit status and Errors result in a non-zero exit status and
a diagnostic message being written to standard error. a diagnostic message being written to standard error.
Dumping and reloading databases that use user-defined comparison functions will result in new databases that use the Dumping and reloading databases that use user-defined comparison functions
default comparison functions. \fB In this case it is quite likely that the reloaded database will be damaged beyond will result in new databases that use the default comparison functions.
repair permitting neither record storage nor retrieval.\fP \fBIn this case it is quite likely that the reloaded database will be
damaged beyond repair permitting neither record storage nor retrieval.\fP
The only available workaround is to modify the source for the The only available workaround is to modify the source for the
.BR mdb_load (1) .BR mdb_load (1)

View File

@ -0,0 +1,77 @@
.TH MDB_LOAD 1 "2014/06/20" "LMDB 0.9.14"
.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_load \- LMDB environment import tool
.SH SYNOPSIS
.B mdb_load
.BR \ envpath
[\c
.BR \-V ]
[\c
.BI \-f \ file\fR]
[\c
.BR \-n ]
[\c
.BI \-s \ subdb\fR]
[\c
.BR \-N ]
[\c
.BR \-T ]
.SH DESCRIPTION
The
.B mdb_load
utility reads from the standard input and loads it into the
LMDB environment
.BR envpath .
The input to
.B mdb_load
must be in the output format specified by the
.BR mdb_dump (1)
utility or as specified by the
.B -T
option below.
.SH OPTIONS
.TP
.BR \-V
Write the library version number to the standard output, and exit.
.TP
.BR \-f \ file
Read from the specified file instead of from the standard input.
.TP
.BR \-n
Load an LMDB database which does not use subdirectories.
.TP
.BR \-s \ subdb
Load a specific subdatabase. If no database is specified, data is loaded into the main database.
.TP
.BR \-N
Don't overwrite existing records when loading into an already existing database; just skip them.
.TP
.BR \-T
Load data from simple text files. The input must be paired lines of text, where the first
line of the pair is the key item, and the second line of the pair is its corresponding
data item.
A simple escape mechanism, where newline and backslash (\\) characters are special, is
applied to the text input. Newline characters are interpreted as record separators.
Backslash characters in the text will be interpreted in one of two ways: If the backslash
character precedes another backslash character, the pair will be interpreted as a literal
backslash. If the backslash character precedes any other character, the two characters
following the backslash will be interpreted as a hexadecimal specification of a single
character; for example, \\0a is a newline character in the ASCII character set.
For this reason, any backslash or newline characters that naturally occur in the text
input must be escaped to avoid misinterpretation by
.BR mdb_load .
.SH DIAGNOSTICS
Exit status is zero if no errors occur.
Errors result in a non-zero exit status and
a diagnostic message being written to standard error.
.SH "SEE ALSO"
.BR mdb_dump (1)
.SH AUTHOR
Howard Chu of Symas Corporation <http://www.symas.com>

View File

@ -0,0 +1,397 @@
/* mdb_load.c - memory-mapped database load tool */
/*
* Copyright 2011-2014 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include "lmdb.h"
#define PRINT 1
#define NOHDR 2
static int mode;
static char *subname = NULL;
static size_t lineno;
static int version;
static int flags;
static char *prog;
static int eof;
static MDB_val kbuf, dbuf;
#define STRLENOF(s) (sizeof(s)-1)
typedef struct flagbit {
int bit;
char *name;
int len;
} flagbit;
#define S(s) s, STRLENOF(s)
flagbit dbflags[] = {
{ MDB_REVERSEKEY, S("reversekey") },
{ MDB_DUPSORT, S("dupsort") },
{ MDB_INTEGERKEY, S("integerkey") },
{ MDB_DUPFIXED, S("dupfixed") },
{ MDB_INTEGERDUP, S("integerdup") },
{ MDB_REVERSEDUP, S("reversedup") },
{ 0, NULL, 0 }
};
static const char hexc[] = "0123456789abcdef";
static void readhdr()
{
char *ptr;
while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
lineno++;
if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
version=atoi(dbuf.mv_data+STRLENOF("VERSION="));
if (version > 3) {
fprintf(stderr, "%s: line %zd: unsupported VERSION %d\n",
prog, lineno, version);
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
break;
} else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
if (!strncmp(dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
mode |= PRINT;
else if (strncmp(dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
fprintf(stderr, "%s: line %zd: unsupported FORMAT %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
exit(EXIT_FAILURE);
}
} else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
if (ptr) *ptr = '\0';
if (subname) free(subname);
subname = strdup(dbuf.mv_data+STRLENOF("database="));
} else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
if (strncmp(dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
fprintf(stderr, "%s: line %zd: unsupported type %s\n",
prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
exit(EXIT_FAILURE);
}
} else {
int i;
for (i=0; dbflags[i].bit; i++) {
if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
flags |= dbflags[i].bit;
break;
}
}
if (!dbflags[i].bit) {
ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
if (!ptr) {
fprintf(stderr, "%s: line %zd: unexpected format\n",
prog, lineno);
exit(EXIT_FAILURE);
} else {
*ptr = '\0';
fprintf(stderr, "%s: line %zd: unrecognized keyword ignored: %s\n",
prog, lineno, (char *)dbuf.mv_data);
}
}
}
}
}
static void badend()
{
fprintf(stderr, "%s: line %zd: unexpected end of input\n",
prog, lineno);
}
static int unhex(unsigned char *c2)
{
int x, c;
x = *c2++ & 0x4f;
if (x & 0x40)
x -= 54;
c = x << 4;
x = *c2 & 0x4f;
if (x & 0x40)
x -= 54;
c |= x;
return c;
}
static int readline(MDB_val *out, MDB_val *buf)
{
unsigned char *c1, *c2, *end;
size_t len;
int c;
if (!(mode & NOHDR)) {
c = fgetc(stdin);
if (c == EOF) {
eof = 1;
return EOF;
}
if (c != ' ') {
if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
badend:
eof = 1;
badend();
return EOF;
}
if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
return EOF;
goto badend;
}
}
if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
eof = 1;
return EOF;
}
lineno++;
c1 = buf->mv_data;
len = strlen((char *)c1);
/* Is buffer too short? */
while (c1[len-1] != '\n') {
buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
if (!buf->mv_data) {
eof = 1;
fprintf(stderr, "%s: line %zd: out of memory, line too long\n",
prog, lineno);
return EOF;
}
c1 = buf->mv_data;
c1 += buf->mv_size;
if (fgets((char *)c1, buf->mv_size, stdin) == NULL) {
eof = 1;
badend();
return EOF;
}
buf->mv_size *= 2;
len = strlen((char *)c1);
}
c1 = c2 = buf->mv_data;
len = strlen((char *)c1);
c1[--len] = '\0';
end = c1 + len;
if (mode & PRINT) {
while (c2 < end) {
if (*c2 == '\\') {
if (c2[1] == '\\') {
c1++; c2 += 2;
} else {
if (c2+3 >= end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
eof = 1;
badend();
return EOF;
}
*c1++ = unhex(++c2);
c2 += 2;
}
} else {
c1++; c2++;
}
}
} else {
/* odd length not allowed */
if (len & 1) {
eof = 1;
badend();
return EOF;
}
while (c2 < end) {
if (!isxdigit(*c2) || !isxdigit(c2[1])) {
eof = 1;
badend();
return EOF;
}
*c1++ = unhex(c2);
c2 += 2;
}
}
c2 = out->mv_data = buf->mv_data;
out->mv_size = c1 - c2;
return 0;
}
static void usage()
{
fprintf(stderr, "usage: %s dbpath [-V] [-f input] [-n] [-s name] [-N] [-T]\n", prog);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
int i, rc;
MDB_env *env;
MDB_txn *txn;
MDB_cursor *mc;
MDB_dbi dbi;
char *envname;
int envflags = 0, putflags = 0;
prog = argv[0];
if (argc < 2) {
usage(prog);
}
/* -f: load file instead of stdin
* -n: use NOSUBDIR flag on env_open
* -s: load into named subDB
* -N: use NOOVERWRITE on puts
* -T: read plaintext
* -V: print version and exit
*/
while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) {
switch(i) {
case 'V':
printf("%s\n", MDB_VERSION_STRING);
exit(0);
break;
case 'f':
if (freopen(optarg, "r", stdin) == NULL) {
fprintf(stderr, "%s: %s: reopen: %s\n",
prog, optarg, strerror(errno));
exit(EXIT_FAILURE);
}
break;
case 'n':
envflags |= MDB_NOSUBDIR;
break;
case 's':
subname = strdup(optarg);
break;
case 'N':
putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
break;
case 'T':
mode |= NOHDR;
break;
default:
usage(prog);
}
}
if (optind != argc - 1)
usage(prog);
envname = argv[optind];
rc = mdb_env_create(&env);
if (subname) {
mdb_env_set_maxdbs(env, 2);
}
rc = mdb_env_open(env, envname, envflags, 0664);
if (rc) {
printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
kbuf.mv_data = malloc(kbuf.mv_size);
dbuf.mv_size = 4096;
dbuf.mv_data = malloc(dbuf.mv_size);
while(!eof) {
MDB_val key, data;
int batch = 0;
flags = 0;
if (!(mode & NOHDR))
readhdr();
rc = mdb_txn_begin(env, NULL, 0, &txn);
if (rc) {
printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
rc = mdb_open(txn, subname, flags, &dbi);
if (rc) {
printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
rc = mdb_cursor_open(txn, dbi, &mc);
if (rc) {
printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
while(1) {
rc = readline(&key, &kbuf);
if (rc == EOF)
break;
if (rc)
goto txn_abort;
rc = readline(&data, &dbuf);
if (rc)
goto txn_abort;
rc = mdb_cursor_put(mc, &key, &data, putflags);
if (rc == MDB_KEYEXIST && putflags)
continue;
if (rc)
goto txn_abort;
batch++;
if (batch == 100) {
rc = mdb_txn_commit(txn);
if (rc) {
fprintf(stderr, "%s: line %zd: txn_commit: %s\n",
prog, lineno, mdb_strerror(rc));
goto env_close;
}
rc = mdb_txn_begin(env, NULL, 0, &txn);
if (rc) {
printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
goto env_close;
}
rc = mdb_cursor_open(txn, dbi, &mc);
if (rc) {
printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort;
}
batch = 0;
}
}
rc = mdb_txn_commit(txn);
txn = NULL;
if (rc) {
fprintf(stderr, "%s: line %zd: txn_commit: %s\n",
prog, lineno, mdb_strerror(rc));
goto env_close;
}
mdb_dbi_close(env, dbi);
}
txn_abort:
mdb_txn_abort(txn);
env_close:
mdb_env_close(env);
return rc ? EXIT_FAILURE : EXIT_SUCCESS;
}