2000-03-17 03:34:46 +08:00
|
|
|
/*
|
2000-05-27 00:03:32 +08:00
|
|
|
* Copyright 1999, Dmitry Kovalev <mit@openldap.org>, All rights reserved.
|
2000-03-17 03:34:46 +08:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms are permitted only
|
|
|
|
* as authorized by the OpenLDAP Public License. A copy of this
|
|
|
|
* license is available at http://www.OpenLDAP.org/license.html or
|
|
|
|
* in file LICENSE in the top-level directory of the distribution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "portable.h"
|
|
|
|
|
2000-03-19 14:18:27 +08:00
|
|
|
#ifdef SLAPD_SQL
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "slap.h"
|
|
|
|
#include "back-sql.h"
|
|
|
|
#include "sql-wrap.h"
|
|
|
|
#include "schema-map.h"
|
|
|
|
#include "entry-id.h"
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
int backsql_attrlist_add(backsql_srch_info *bsi,char *at_name)
|
|
|
|
{
|
|
|
|
char **p=bsi->attrs;
|
|
|
|
int n_attrs=0;
|
|
|
|
|
|
|
|
if (bsi->attrs==NULL)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
while(*p)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_attrlist_add(): attribute '%s' is in list\n",*p,0,0);
|
|
|
|
if (!strcasecmp(*p,at_name))
|
|
|
|
return 1;
|
|
|
|
n_attrs++;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_attrlist_add(): adding '%s' to list\n",at_name,0,0);
|
|
|
|
bsi->attrs=(char**)ch_realloc(bsi->attrs,(n_attrs+2)*sizeof(char*));
|
2000-05-27 00:03:32 +08:00
|
|
|
bsi->attrs[n_attrs]=ch_strdup(at_name);
|
2000-03-17 03:34:46 +08:00
|
|
|
bsi->attrs[n_attrs+1]=NULL;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void backsql_init_search(backsql_srch_info *bsi,backsql_info *bi,char *nbase,int scope,
|
|
|
|
int slimit,int tlimit,time_t stoptime,Filter *filter,
|
2001-12-31 19:35:52 +08:00
|
|
|
SQLHDBC dbh,BackendDB *be,Connection *conn,Operation *op,AttributeName *attrs)
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
2001-12-31 19:35:52 +08:00
|
|
|
AttributeName *p;
|
2000-03-17 03:34:46 +08:00
|
|
|
bsi->base_dn=nbase;
|
|
|
|
bsi->scope=scope;
|
|
|
|
bsi->slimit=slimit;
|
|
|
|
bsi->tlimit=tlimit;
|
|
|
|
bsi->filter=filter;
|
|
|
|
bsi->dbh=dbh;
|
|
|
|
bsi->be=be;
|
|
|
|
bsi->conn=conn;
|
|
|
|
bsi->op=op;
|
|
|
|
if (attrs!=NULL)
|
|
|
|
{
|
|
|
|
bsi->attrs=(char**)ch_calloc(1,sizeof(char*));
|
|
|
|
bsi->attrs[0]=NULL;
|
2002-01-03 13:41:50 +08:00
|
|
|
for(p=attrs;p->an_name.bv_val;p++)
|
2001-12-31 19:35:52 +08:00
|
|
|
backsql_attrlist_add(bsi,p->an_name.bv_val);
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
else
|
2001-12-31 19:35:52 +08:00
|
|
|
bsi->attrs=NULL;
|
2000-03-17 03:34:46 +08:00
|
|
|
bsi->abandon=0;
|
|
|
|
bsi->id_list=NULL;
|
|
|
|
bsi->stoptime=stoptime;
|
|
|
|
bsi->bi=bi;
|
|
|
|
bsi->sel=NULL; bsi->from=NULL; bsi->join_where=NULL; bsi->flt_where=NULL;
|
|
|
|
bsi->sel_len=0; bsi->from_len=0; bsi->jwhere_len=0; bsi->fwhere_len=0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int backsql_process_filter_list(backsql_srch_info *bsi,Filter *f,int op)
|
|
|
|
{
|
|
|
|
char *sub_clause=NULL;
|
|
|
|
int len=0,res;
|
|
|
|
|
2000-06-30 05:14:43 +08:00
|
|
|
if (!f)
|
|
|
|
return 0;
|
2000-03-17 03:34:46 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",NULL);
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
res=backsql_process_filter(bsi,f);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
if (res < 0)
|
|
|
|
return -1; /* TimesTen : If the query has no answers,
|
|
|
|
don't bother to run the query. */
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
f=f->f_next;
|
|
|
|
if (f==NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
switch (op)
|
|
|
|
{
|
|
|
|
case LDAP_FILTER_AND:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len," AND ",NULL);
|
|
|
|
break;
|
|
|
|
case LDAP_FILTER_OR:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len," OR ",NULL);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,")",NULL);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int backsql_process_sub_filter(backsql_srch_info *bsi,Filter *f)
|
|
|
|
{
|
|
|
|
int i;
|
2001-10-22 21:23:05 +08:00
|
|
|
backsql_at_map_rec *at=backsql_at_with_name(bsi->oc,f->f_sub_desc->ad_cname.bv_val);
|
2000-06-30 05:14:43 +08:00
|
|
|
|
|
|
|
if (!f)
|
|
|
|
return 0;
|
2000-03-17 03:34:46 +08:00
|
|
|
|
2000-05-27 00:03:32 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",NULL);
|
2001-09-07 21:04:11 +08:00
|
|
|
/* TimesTen*/
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"expr: '%s' '%s'\n",at->sel_expr,
|
|
|
|
at->sel_expr_u?at->sel_expr_u:"<NULL>",0);
|
2000-05-27 00:03:32 +08:00
|
|
|
if (bsi->bi->upper_func)
|
|
|
|
{
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
/* If a pre-upper-cased version of the column exists, use it. */
|
|
|
|
if (at->sel_expr_u) {
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,
|
|
|
|
at->sel_expr_u," LIKE '",NULL);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,
|
|
|
|
bsi->bi->upper_func,"(",at->sel_expr,")",
|
|
|
|
" LIKE '",NULL);
|
|
|
|
}
|
2000-05-27 00:03:32 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,at->sel_expr,
|
2000-03-17 03:34:46 +08:00
|
|
|
" LIKE '",NULL);
|
2000-05-27 00:03:32 +08:00
|
|
|
}
|
2000-03-17 03:34:46 +08:00
|
|
|
if (f->f_sub_initial!=NULL)
|
2000-05-27 00:03:32 +08:00
|
|
|
{
|
2000-10-26 05:23:04 +08:00
|
|
|
if (bsi->bi->upper_func)
|
|
|
|
{
|
2000-11-02 21:18:45 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,ldap_pvt_str2upper(f->f_sub_initial->bv_val),NULL);
|
2000-10-26 05:23:04 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,f->f_sub_initial->bv_val,NULL);
|
2000-05-27 00:03:32 +08:00
|
|
|
}
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"%",NULL);
|
|
|
|
|
|
|
|
if (f->f_sub_any!=NULL)
|
|
|
|
for(i=0;f->f_sub_any[i]!=NULL;i++)
|
2000-05-27 00:03:32 +08:00
|
|
|
{
|
2001-09-07 21:04:11 +08:00
|
|
|
/*Debug(LDAP_DEBUG_TRACE,"==>backsql_process_sub_filter(): sub_any='%s'\n",f->f_sub_any[i]->bv_val,0,0);*/
|
2000-10-26 05:23:04 +08:00
|
|
|
if (bsi->bi->upper_func)
|
|
|
|
{
|
2000-11-02 21:18:45 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,ldap_pvt_str2upper(f->f_sub_any[i]->bv_val),"%",NULL);
|
2000-10-26 05:23:04 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,f->f_sub_any[i]->bv_val,"%",NULL);
|
2000-05-27 00:03:32 +08:00
|
|
|
}
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
if (f->f_sub_final!=NULL)
|
2000-10-26 05:23:04 +08:00
|
|
|
if (bsi->bi->upper_func)
|
|
|
|
{
|
2000-11-02 21:18:45 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,ldap_pvt_str2upper(f->f_sub_final->bv_val),NULL);
|
2000-10-26 05:23:04 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,f->f_sub_final->bv_val,NULL);
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"')",NULL);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int backsql_process_filter(backsql_srch_info *bsi,Filter *f)
|
|
|
|
{
|
|
|
|
backsql_at_map_rec *at;
|
|
|
|
backsql_at_map_rec oc_attr={"objectClass","","",NULL,NULL,NULL,NULL};
|
|
|
|
char *at_name=NULL;
|
|
|
|
int done=0,len=0;
|
2001-09-07 21:04:11 +08:00
|
|
|
int rc=0; /* TimesTen */
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_process_filter()\n",0,0,0);
|
2000-10-31 04:36:29 +08:00
|
|
|
if (f==NULL || f->f_choice==SLAPD_FILTER_COMPUTED)
|
2000-06-30 05:14:43 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
switch(f->f_choice)
|
|
|
|
{
|
|
|
|
case LDAP_FILTER_OR:
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
rc = backsql_process_filter_list(bsi,f->f_or,LDAP_FILTER_OR);
|
2000-03-17 03:34:46 +08:00
|
|
|
done=1;
|
|
|
|
break;
|
|
|
|
case LDAP_FILTER_AND:
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
rc = backsql_process_filter_list(bsi,f->f_and,LDAP_FILTER_AND);
|
2000-03-17 03:34:46 +08:00
|
|
|
done=1;
|
|
|
|
break;
|
|
|
|
case LDAP_FILTER_NOT:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"NOT (",NULL);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
rc = backsql_process_filter(bsi,f->f_not);
|
2000-03-17 03:34:46 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,")",NULL);
|
|
|
|
done=1;
|
|
|
|
break;
|
|
|
|
case LDAP_FILTER_PRESENT:
|
2001-10-22 21:23:05 +08:00
|
|
|
at_name=f->f_desc->ad_cname.bv_val;
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
default:
|
2001-10-22 21:23:05 +08:00
|
|
|
at_name=f->f_av_desc->ad_cname.bv_val;
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
}
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
|
|
|
if (rc == -1)
|
|
|
|
goto impossible; /* TimesTen : Don't run the query */
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
if (done)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (strcasecmp(at_name,"objectclass"))
|
|
|
|
at=backsql_at_with_name(bsi->oc,at_name);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
at=&oc_attr;
|
|
|
|
at->sel_expr=backsql_strcat(at->sel_expr,&len,"'",bsi->oc->name,"'",NULL);
|
|
|
|
}
|
|
|
|
if (at==NULL)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_process_filter(): attribute '%s' is not defined for objectclass '%s'\n",
|
|
|
|
at_name,bsi->oc->name,0);
|
2000-05-27 00:03:32 +08:00
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len," 1=0 ",NULL);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
goto impossible;
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
backsql_merge_from_clause(&bsi->from,&bsi->from_len,at->from_tbls);
|
2001-09-07 21:04:11 +08:00
|
|
|
/*need to add this attribute to list of attrs to load, so that we could do test_filter() later*/
|
2000-03-17 03:34:46 +08:00
|
|
|
backsql_attrlist_add(bsi,at_name);
|
|
|
|
|
|
|
|
if (at->join_where != NULL && strstr(bsi->join_where,at->join_where)==NULL)
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len," AND ",at->join_where,NULL);
|
|
|
|
|
2001-09-07 21:04:11 +08:00
|
|
|
/*if (at!=&oc_attr)
|
|
|
|
bsi->sel=backsql_strcat(bsi->sel,&bsi->sel_len,",",at->sel_expr," AS ",at->name,NULL);
|
|
|
|
*/
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
switch(f->f_choice)
|
|
|
|
{
|
|
|
|
case LDAP_FILTER_EQUALITY:
|
2001-09-07 21:04:11 +08:00
|
|
|
/*maybe we should check type of at->sel_expr here somehow,
|
|
|
|
* to know whether upper_func is applicable, but for now
|
|
|
|
* upper_func stuff is made for Oracle, where UPPER is
|
|
|
|
* safely applicable to NUMBER etc.
|
|
|
|
*/
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
if (bsi->bi->upper_func) {
|
|
|
|
if (at->sel_expr_u)
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",
|
|
|
|
at->sel_expr_u,"='",
|
|
|
|
ldap_pvt_str2upper(f->f_av_value->bv_val),"')",
|
|
|
|
NULL);
|
|
|
|
else
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",
|
2000-05-27 00:03:32 +08:00
|
|
|
bsi->bi->upper_func,"(",at->sel_expr,")='",
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
ldap_pvt_str2upper(f->f_av_value->bv_val),"')",NULL);
|
|
|
|
}
|
2000-05-27 00:03:32 +08:00
|
|
|
else
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",at->sel_expr,"='",
|
2000-06-30 05:14:43 +08:00
|
|
|
f->f_av_value->bv_val,"')",NULL);
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
case LDAP_FILTER_GE:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",at->sel_expr,">=",
|
2000-06-30 05:14:43 +08:00
|
|
|
f->f_av_value->bv_val,")",NULL);
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
case LDAP_FILTER_LE:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"(",at->sel_expr,"<=",
|
2000-06-30 05:14:43 +08:00
|
|
|
f->f_av_value->bv_val,")",NULL);
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
case LDAP_FILTER_PRESENT:
|
|
|
|
bsi->flt_where=backsql_strcat(bsi->flt_where,&bsi->fwhere_len,"NOT (",at->sel_expr,
|
|
|
|
" IS NULL)",NULL);
|
|
|
|
break;
|
|
|
|
case LDAP_FILTER_SUBSTRINGS:
|
|
|
|
backsql_process_sub_filter(bsi,f);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
if (oc_attr.sel_expr!=NULL)
|
|
|
|
free(oc_attr.sel_expr);
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_process_filter()\n",0,0,0);
|
|
|
|
return 1;
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
|
|
|
impossible:
|
|
|
|
if (oc_attr.sel_expr!=NULL)
|
|
|
|
free(oc_attr.sel_expr);
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_process_filter() returns -1\n",0,0,0);
|
|
|
|
return -1;
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
char* backsql_srch_query(backsql_srch_info *bsi)
|
|
|
|
{
|
|
|
|
char *query=NULL;
|
|
|
|
int q_len=0;
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
int rc;
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_srch_query()\n",0,0,0);
|
|
|
|
bsi->sel=NULL;
|
|
|
|
bsi->from=NULL;
|
|
|
|
bsi->join_where=NULL;
|
|
|
|
bsi->flt_where=NULL;
|
|
|
|
bsi->sel_len=bsi->from_len=bsi->jwhere_len=bsi->fwhere_len=0;
|
|
|
|
|
|
|
|
bsi->sel=backsql_strcat(bsi->sel,&bsi->sel_len,
|
2000-11-04 20:52:06 +08:00
|
|
|
"SELECT DISTINCT ldap_entries.id,",bsi->oc->keytbl,".",bsi->oc->keycol,
|
2000-03-17 03:34:46 +08:00
|
|
|
", '",bsi->oc->name,"' AS objectClass",
|
|
|
|
", ldap_entries.dn AS dn",
|
|
|
|
NULL);
|
|
|
|
bsi->from=backsql_strcat(bsi->from,&bsi->from_len," FROM ldap_entries,",bsi->oc->keytbl,NULL);
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len," WHERE ",
|
|
|
|
bsi->oc->keytbl,".",bsi->oc->keycol,"=ldap_entries.keyval AND ",
|
2000-06-30 05:14:43 +08:00
|
|
|
"ldap_entries.oc_map_id=? AND ",NULL);
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
switch(bsi->scope)
|
|
|
|
{
|
|
|
|
case LDAP_SCOPE_BASE:
|
2000-05-27 00:03:32 +08:00
|
|
|
if (bsi->bi->upper_func)
|
|
|
|
{
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len,
|
|
|
|
bsi->bi->upper_func,"(","ldap_entries.dn)=(?)",NULL);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len,
|
2000-03-17 03:34:46 +08:00
|
|
|
"ldap_entries.dn=?",NULL);
|
2000-05-27 00:03:32 +08:00
|
|
|
}
|
2000-03-17 03:34:46 +08:00
|
|
|
break;
|
|
|
|
case LDAP_SCOPE_ONELEVEL:
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len,
|
|
|
|
"ldap_entries.parent=?",NULL);
|
|
|
|
break;
|
|
|
|
case LDAP_SCOPE_SUBTREE:
|
|
|
|
bsi->join_where=backsql_strcat(bsi->join_where,&bsi->jwhere_len,
|
|
|
|
bsi->bi->subtree_cond,NULL);
|
|
|
|
break;
|
|
|
|
}
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
|
|
|
rc = backsql_process_filter(bsi, bsi->filter);
|
|
|
|
if (rc>0) {
|
2000-03-17 03:34:46 +08:00
|
|
|
query=backsql_strcat(query,&q_len,bsi->sel,bsi->from,bsi->join_where," AND ",bsi->flt_where,NULL);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
}
|
|
|
|
else if (rc < 0) {
|
|
|
|
/* Indicates that there's no possible way the filter matches
|
|
|
|
anything. No need to issue the query. */
|
2000-03-17 03:34:46 +08:00
|
|
|
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_srch_query() returns NULL\n",0,0,0);
|
|
|
|
free(query);
|
|
|
|
query = NULL;
|
|
|
|
}
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
free(bsi->sel);
|
|
|
|
free(bsi->from);
|
|
|
|
free(bsi->join_where);
|
|
|
|
free(bsi->flt_where);
|
|
|
|
bsi->sel_len=bsi->from_len=bsi->jwhere_len=bsi->fwhere_len=0;
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_srch_query()\n",0,0,0);
|
|
|
|
return query;
|
|
|
|
}
|
|
|
|
|
|
|
|
int backsql_oc_get_candidates(backsql_oc_map_rec *oc,backsql_srch_info *bsi)
|
|
|
|
{
|
|
|
|
char *query=NULL;
|
|
|
|
SQLHSTMT sth;
|
|
|
|
RETCODE rc;
|
|
|
|
backsql_entryID base_id,*res,*c_id;
|
2001-09-08 02:50:52 +08:00
|
|
|
/*Entry *e;*/
|
2000-03-17 03:34:46 +08:00
|
|
|
BACKSQL_ROW_NTS row;
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
int i;
|
|
|
|
int j;
|
2001-09-08 02:50:52 +08:00
|
|
|
char temp_base_dn[BACKSQL_MAX_DN_LEN+1]; /* TimesTen*/
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_oc_get_candidates(): oc='%s'\n",oc->name,0,0);
|
|
|
|
bsi->oc=oc;
|
|
|
|
query=backsql_srch_query(bsi);
|
|
|
|
if (query==NULL)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): could not construct query for objectclass\n",0,0,0);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"Constructed query: %s\n",query,0,0);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
if ((rc=backsql_Prepare(bsi->dbh,&sth,query,0)) != SQL_SUCCESS)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error preparing query\n",0,0,0);
|
|
|
|
backsql_PrintErrors(bsi->bi->db_env,bsi->dbh,sth,rc);
|
|
|
|
free(query);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
free(query);
|
|
|
|
|
|
|
|
if (backsql_BindParamID(sth,1,&bsi->oc->id) != SQL_SUCCESS)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error binding objectclass id parameter\n",0,0,0);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
switch(bsi->scope)
|
|
|
|
{
|
|
|
|
case LDAP_SCOPE_BASE:
|
|
|
|
if ((rc=backsql_BindParamStr(sth,2,bsi->base_dn,BACKSQL_MAX_DN_LEN)) != SQL_SUCCESS)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error binding base_dn parameter\n",0,0,0);
|
|
|
|
backsql_PrintErrors(bsi->bi->db_env,bsi->dbh,sth,rc);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
|
|
|
case LDAP_SCOPE_SUBTREE:
|
|
|
|
/* Sets the parameters for the SQL built earlier */
|
|
|
|
/* NOTE that all the databases could actually use the TimesTen version,
|
|
|
|
which would be cleaner and would also eliminate the need for the
|
|
|
|
subtree_cond line in the configuration file. For now, I'm leaving
|
|
|
|
it the way it is, so non-TimesTen databases use the original code.
|
|
|
|
But at some point this should get cleaned up. */
|
|
|
|
/* If "dn" is being used, do a suffix search.
|
|
|
|
If "dn_ru" is being used, do a prefix search. */
|
|
|
|
|
|
|
|
if (bsi->bi->has_ldapinfo_dn_ru) {
|
|
|
|
temp_base_dn[0] = '\0';
|
|
|
|
for ((i=0, j=strlen(bsi->base_dn)-1); j >= 0; (i++, j--)) {
|
|
|
|
*(temp_base_dn+i) = toupper(*(bsi->base_dn+j));
|
|
|
|
}
|
|
|
|
*(temp_base_dn+i) = '%';
|
|
|
|
*(temp_base_dn+i+1) = '\0';
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
strcpy(temp_base_dn, "%");
|
|
|
|
for (i = 0; *(bsi->base_dn+i); i++) {
|
|
|
|
*(temp_base_dn+i+1) = toupper(*(bsi->base_dn+i));
|
|
|
|
}
|
|
|
|
*(temp_base_dn+i+1) = '\0';
|
|
|
|
}
|
|
|
|
Debug(LDAP_DEBUG_TRACE, "dn '%s'\n", temp_base_dn, 0, 0);
|
|
|
|
|
|
|
|
if ((rc=backsql_BindParamStr(sth,2,temp_base_dn,BACKSQL_MAX_DN_LEN)) !=
|
|
|
|
SQL_SUCCESS)
|
|
|
|
{
|
2001-09-07 21:04:11 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error binding base_dn parameter (2)\n",0,0,0);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
backsql_PrintErrors(bsi->bi->db_env,bsi->dbh,sth,rc);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
case LDAP_SCOPE_ONELEVEL:
|
2000-05-27 00:03:32 +08:00
|
|
|
res=backsql_dn2id(bsi->bi,&base_id,bsi->dbh,bsi->base_dn);
|
2000-03-17 03:34:46 +08:00
|
|
|
if (res==NULL)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): could not retrieve base_dn id - no such entry\n",0,0,0);
|
|
|
|
bsi->status=LDAP_NO_SUCH_OBJECT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (backsql_BindParamID(sth,2,&base_id.id) != SQL_SUCCESS)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error binding base id parameter\n",0,0,0);
|
|
|
|
free(base_id.dn);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
free(base_id.dn);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((rc=SQLExecute(sth)) != SQL_SUCCESS && rc!= SQL_SUCCESS_WITH_INFO)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): error executing query\n",0,0,0);
|
|
|
|
backsql_PrintErrors(bsi->bi->db_env,bsi->dbh,sth,rc);
|
|
|
|
SQLFreeStmt(sth,SQL_DROP);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
backsql_BindRowAsStrings(sth,&row);
|
|
|
|
while ((rc=SQLFetch(sth)) == SQL_SUCCESS || rc==SQL_SUCCESS_WITH_INFO)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
e=(Entry*)ch_calloc(1,sizeof(Entry));
|
|
|
|
for (i=1;i<row.ncols;i++)
|
|
|
|
{
|
|
|
|
if (row.is_null[i]>0)
|
|
|
|
{
|
|
|
|
backsql_entry_addattr(e,row.col_names[i],row.cols[i],row.col_prec[i]);
|
2001-09-08 02:50:52 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"prec=%d\n",(int)row.col_prec[i],0,0);
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
2001-09-08 02:50:52 +08:00
|
|
|
else
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"NULL value in this row for attribute '%s'\n",row.col_names[i],0,0);
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
c_id=(backsql_entryID*)ch_calloc(1,sizeof(backsql_entryID));
|
|
|
|
c_id->id=atoi(row.cols[0]);
|
|
|
|
c_id->keyval=atoi(row.cols[1]);
|
|
|
|
c_id->oc_id=bsi->oc->id;
|
2000-05-27 00:03:32 +08:00
|
|
|
c_id->dn=ch_strdup(row.cols[3]);
|
2000-03-17 03:34:46 +08:00
|
|
|
c_id->next=bsi->id_list;
|
|
|
|
bsi->id_list=c_id;
|
2000-05-27 00:03:32 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_oc_get_candidates(): added entry id=%d, keyval=%d dn='%s'\n",
|
|
|
|
c_id->id,c_id->keyval,row.cols[3]);
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
backsql_FreeRow(&row);
|
|
|
|
SQLFreeStmt(sth,SQL_DROP);
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
2000-03-17 03:34:46 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_oc_get_candidates()\n",0,0,0);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2000-05-27 00:03:32 +08:00
|
|
|
int backsql_search(BackendDB *be,Connection *conn,Operation *op,
|
|
|
|
const char *base, const char *nbase, int scope,int deref,int slimit,int tlimit,
|
2001-12-31 19:35:52 +08:00
|
|
|
Filter *filter, const char *filterstr,AttributeName *attrs,int attrsonly)
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
|
|
|
backsql_info *bi=(backsql_info*)be->be_private;
|
|
|
|
SQLHDBC dbh;
|
|
|
|
int sres;
|
|
|
|
int nentries;
|
2000-06-30 05:14:43 +08:00
|
|
|
Entry *entry,*res;
|
2000-03-17 03:34:46 +08:00
|
|
|
int manageDSAit = get_manageDSAit( op );
|
|
|
|
struct berval **v2refs = NULL;
|
|
|
|
time_t stoptime;
|
|
|
|
backsql_srch_info srch_info;
|
|
|
|
backsql_entryID *eid=NULL;
|
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"==>backsql_search(): base='%s', filter='%s', scope=%d,",
|
2000-05-27 00:03:32 +08:00
|
|
|
nbase,filterstr,scope);
|
2000-03-17 03:34:46 +08:00
|
|
|
Debug(LDAP_DEBUG_TRACE," deref=%d, attrsonly=%d, attributes to load: %s\n",
|
|
|
|
deref,attrsonly,attrs==NULL?"all":"custom list");
|
|
|
|
dbh=backsql_get_db_conn(be,conn);
|
|
|
|
|
|
|
|
if (!dbh)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_search(): could not get connection handle - exiting\n",0,0,0);
|
|
|
|
send_ldap_result(conn,op,LDAP_OTHER,"","SQL-backend error",NULL,NULL);
|
|
|
|
return 1;
|
|
|
|
}
|
A big bunch of improvements, contributed by Sam Drake and Raj Damani.
Summary of changes is cited below.
The patch still needs some cosmetic changes to be made, but is ready for testing.
-----Original Message-----
From: Sam Drake [mailto:drake@timesten.com]
Sent: Saturday, April 07, 2001 10:40 PM
To: 'mitya@seismic.ru'
Cc: openldap-devel@OpenLDAP.org
Subject: RE: Slapd frontend performance issues
FYI, here is a short description of the changes I made. I'll package up the
changes asap, but it may take a couple of days.
The performance numbers quoted in this report were seen at my location with
a 100,000 object database ... the slower numbers I mentioned earlier were
reported by a customer with a 1,000,000 object database.
I also can't explain the very poor performance I saw with OpenLDAP and LDBM
with a 100,000 object database.
...Sam Drake / TimesTen Performance Software
----------
Work Performed
OpenLDAP 2.0.9, including back-sql, was built successfully on Solaris
8 using gcc. The LDAP server itself, slapd, passed all tests bundled
with OpenLDAP. OpenLDAP was built using Sleepycat LDBM release 3.1.17
as the "native" storage manager.
The experimental back-sql facility in slapd was also built
successfully. It was built using Oracle release 8.1.7 and the Oracle
ODBC driver and ODBC Driver Manager from Merant. Rudimentary testing
was performed with the data and examples provided with back-sql, and
back-sql was found to be functional.
Slapd and back-sql were then tested with TimesTen, using TimesTen
4.1.1. Back-sql was not immediately functional with TimesTen due to a
number of SQL limitations in the TimesTen product.
Functional issues encountered were:
1. Back-sql issued SELECT statements including the construct,
"UPPER(?)". While TimesTen supports UPPER, it does not support the
use of parameters as input to builtin functions. Back-sql was
modified to convert the parameter to upper case prior to giving it
to the underlying database ... a change that is appropriate for all
databases.
2. Back-sql issued SELECT statements using the SQL CONCAT function.
TimesTen does not support this function. Back-sql was modified to
concatentate the necessary strings itself (in "C" code) prior to
passing the parameters to SQL. This change is also appropriate for
all databases, not just TimesTen.
Once these two issues were resolved, back-sql could successfully
process LDAP searches using the sample data and examples provided with
back-sql.
While performance was not measured at this point, numerous serious
performance problems were observed with the back-sql code and the
generated SQL. In particular:
1. In the process of implementing an LDAP search, back-sql will
generate and execute a SQL query for all object classes stored in
back-sql. During the source of generating each SQL query, it is
common for back-sql to determine that a particular object class can
not possibly have any members satisfying the search. For example,
this can occur if the query searches an attribute of the LDAP
object that does not exist in the SQL schema. In this case,
back-sql would generate and issue the SQL query anyway, including a
clause such as "WHERE 1=0" in the generated SELECT. The overhead
of parsing, optimizing and executing the query is non-trivial, and
the answer (the empty set) is known in advance. Solution: Back-sql
was modified to stop executing a SQL query when it can be
predetermined that the query will return no rows.
2. Searches in LDAP are fundamentally case-insensitive ("abc" is equal
to "aBc"). However, in SQL this is not normally the case.
Back-sql thus generated SQL SELECT statements including clauses of
the form, "WHERE UPPER(attribute) = 'JOE'". Even if an index is
defined on the attribute in the relational database, the index can
not be used to satisfy the query, as the index is case sensitive.
The relational database then is forced to scan all rows in the
table in order to satisfy the query ... an expensive and
non-scalable proposition. Solution: Back-sql was modified to allow
the schema designer to add additional "upper cased" columns to the
SQL schema. These columns, if present, contain an upper cased
version of the "standard" field, and will be used preferentially
for searching. Such columns can be provided for all searchable
columns, some columns, or no columns. An application using
database "triggers" or similar mechanisms can automatically
maintain these upper cased columns when the standard column is
changed.
3. In order to implement the hierarchical nature of LDAP object
hierarchies, OpenLDAP uses suffix searches in SQL. For example, to
find all objects in the subtree "o=TimesTen,c=us", a SQL SELECT
statement of the form, "WHERE UPPER(dn) LIKE '%O=TIMESTEN,C=US'"
would be employed. Aside from the UPPER issue discussed above, a
second performance problem in this query is the use of suffix
search. In TimesTen (and most relational databases), indexes can
be used to optimize exact-match searches and prefix searches.
However, suffix searches must be performed by scanning every row in
the table ... an expensive and non-scalable proposition. Solution:
Back-sql was modified to optionally add a new "dn_ru" column to the
ldap_entries table. This additional column, if present, contains a
byte-reversed and upper cased version of the DN. This allows
back-sql to generate indexable prefix searches. This column is
also easily maintained automatically through the use of triggers.
Results
A simple database schema was generated holding the LDAP objects and
attributes specified by our customer. An application was written to
generate test databases. Both TimesTen and Oracle 8.1.7 were
populated with 100,000 entry databases.
Load Times
Using "slapadd" followed by "slapindex", loading and indexing 100,000
entries in an LDBM database ran for 19 minutes 10 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
a disk based RDBMS took 17 minutes 53 seconds.
Using a C++ application that used ODBC, loading 100,000 entries into
TimesTen took 1 minute 40 seconds.
Search Times
The command, "timex timesearch.sh '(cn=fname210100*)'" was used to
test search times. This command issues the same LDAP search 4000
times over a single LDAP connection. Both the client and server
(slapd) were run on the same machine.
With TimesTen as the database, 4000 queries took 14.93 seconds, for a
rate of 267.9 per second.
With a disk based RDBMS as the database, 4000 queries took 77.79 seconds,
for a
rate of 51.42 per second.
With LDBM as the database, 1 query takes 76 seconds, or 0.076 per
second. Something is clearly broken.
2001-08-03 01:28:59 +08:00
|
|
|
|
|
|
|
/* TimesTen : Pass it along to the lower level routines */
|
|
|
|
srch_info.isTimesTen = bi->isTimesTen;
|
2000-03-17 03:34:46 +08:00
|
|
|
|
2001-12-26 03:48:26 +08:00
|
|
|
if (tlimit == 0 && be_isroot(be,&op->o_ndn))
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
|
|
|
tlimit = -1; /* allow root to set no limit */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tlimit = (tlimit > be->be_timelimit || tlimit < 1) ?
|
|
|
|
be->be_timelimit : tlimit;
|
|
|
|
stoptime = op->o_time + tlimit;
|
|
|
|
}
|
|
|
|
|
2001-12-26 03:48:26 +08:00
|
|
|
if (slimit == 0 && be_isroot(be,&op->o_ndn))
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
|
|
|
slimit = -1; /* allow root to set no limit */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
slimit = (slimit > be->be_sizelimit || slimit < 1) ?
|
|
|
|
be->be_sizelimit : slimit;
|
|
|
|
}
|
|
|
|
|
2000-05-27 00:03:32 +08:00
|
|
|
backsql_init_search(&srch_info,bi,(char*)nbase,scope,slimit,tlimit,stoptime,filter,dbh,
|
|
|
|
be,conn,op,attrs);
|
2000-03-17 03:34:46 +08:00
|
|
|
|
2001-09-08 02:50:52 +08:00
|
|
|
/*for each objectclass we try to construct query which gets IDs
|
|
|
|
*of entries matching LDAP query filter and scope (or at least candidates),
|
|
|
|
*and get the IDs
|
|
|
|
*/
|
2000-03-17 03:34:46 +08:00
|
|
|
avl_apply(bi->oc_by_name,(AVL_APPLY)backsql_oc_get_candidates,&srch_info,0,AVL_INORDER);
|
|
|
|
|
|
|
|
nentries=0;
|
2001-09-08 02:50:52 +08:00
|
|
|
/*now we load candidate entries (only those attrubutes mentioned in attrs and filter),
|
|
|
|
*test it against full filter and then send to client
|
|
|
|
*/
|
2000-03-17 03:34:46 +08:00
|
|
|
for(eid=srch_info.id_list;eid!=NULL;eid=eid->next)
|
|
|
|
{
|
|
|
|
/* check for abandon */
|
|
|
|
if (op->o_abandon)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check time limit */
|
|
|
|
if ( tlimit != -1 && slap_get_time() > stoptime)
|
|
|
|
{
|
|
|
|
send_search_result( conn, op, LDAP_TIMELIMIT_EXCEEDED,
|
|
|
|
NULL, NULL, v2refs, NULL, nentries );
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_search(): loading data for entry id=%d, oc_id=%d, keyval=%d\n",
|
|
|
|
eid->id,eid->oc_id,eid->keyval);
|
|
|
|
|
2000-06-30 05:14:43 +08:00
|
|
|
entry=(Entry *)ch_calloc(sizeof(Entry),1);
|
|
|
|
res=backsql_id2entry(&srch_info,entry,eid);
|
2000-03-17 03:34:46 +08:00
|
|
|
if (res==NULL)
|
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_search(): error in backsql_id2entry() - skipping entry\n",0,0,0);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( !manageDSAit && scope != LDAP_SCOPE_BASE &&
|
2000-06-30 05:14:43 +08:00
|
|
|
is_entry_referral( entry ) )
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
2000-06-30 05:14:43 +08:00
|
|
|
struct berval **refs = get_entry_referrals(be,conn,op,entry);
|
2000-03-17 03:34:46 +08:00
|
|
|
|
2000-06-30 05:14:43 +08:00
|
|
|
send_search_reference( be, conn, op, entry, refs, scope, NULL, &v2refs );
|
2000-03-17 03:34:46 +08:00
|
|
|
ber_bvecfree( refs );
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2000-06-30 05:14:43 +08:00
|
|
|
if (test_filter(be,conn,op,entry,filter)==LDAP_COMPARE_TRUE)
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
2000-06-30 05:14:43 +08:00
|
|
|
if ((sres=send_search_entry(be,conn,op,entry,attrs,attrsonly,NULL))==-1)
|
2000-03-17 03:34:46 +08:00
|
|
|
{
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"backsql_search(): connection lost\n",0,0,0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
nentries+=!sres;
|
|
|
|
}
|
2000-06-30 05:14:43 +08:00
|
|
|
entry_free(entry);
|
2000-03-17 03:34:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for(eid=srch_info.id_list;eid!=NULL;eid=backsql_free_entryID(eid));
|
|
|
|
|
2000-05-27 00:03:32 +08:00
|
|
|
charray_free(srch_info.attrs);
|
2000-03-17 03:34:46 +08:00
|
|
|
|
|
|
|
if (nentries>0)
|
|
|
|
send_search_result( conn, op,
|
|
|
|
v2refs == NULL ? LDAP_SUCCESS : LDAP_REFERRAL,
|
|
|
|
NULL, NULL, v2refs, NULL, nentries );
|
|
|
|
else
|
|
|
|
send_ldap_result(conn,op,LDAP_NO_SUCH_OBJECT,NULL,NULL,NULL,0);
|
|
|
|
|
|
|
|
Debug(LDAP_DEBUG_TRACE,"<==backsql_search()\n",0,0,0);
|
|
|
|
return 0;
|
|
|
|
}
|
2000-03-19 14:18:27 +08:00
|
|
|
|
2000-05-27 00:03:32 +08:00
|
|
|
#endif /* SLAPD_SQL */
|