mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-23 19:39:53 +08:00
Assorted minor cleanups in the test_json_parser module
Per gripes from Michael Paquier Discussion: https://postgr.es/m/ZhTQ6_w1vwOhqTQI@paquier.xyz Along the way, also clean up a handful of typos in3311ea86ed
andea7b4e9a2a
, found by Alexander Lakhin, and a couple of stylistic snafus noted by Daniel Westermann and Daniel Gustafsson.
This commit is contained in:
parent
b8a7bfa333
commit
42fa4b6601
@ -34,7 +34,7 @@
|
||||
#define BLOCKS_PER_READ 512
|
||||
|
||||
/*
|
||||
* we expect the find the last lines of the manifest, including the checksum,
|
||||
* We expect to find the last lines of the manifest, including the checksum,
|
||||
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
|
||||
* parse step if we are about to overflow MAX_CHUNK bytes.
|
||||
*/
|
||||
@ -88,8 +88,8 @@ struct IncrementalBackupInfo
|
||||
* Files extracted from the backup manifest.
|
||||
*
|
||||
* We don't really need this information, because we use WAL summaries to
|
||||
* figure what's changed. It would be unsafe to just rely on the list of
|
||||
* files that existed before, because it's possible for a file to be
|
||||
* figure out what's changed. It would be unsafe to just rely on the list
|
||||
* of files that existed before, because it's possible for a file to be
|
||||
* removed and a new one created with the same name and different
|
||||
* contents. In such cases, the whole file must still be sent. We can tell
|
||||
* from the WAL summaries whether that happened, but not from the file
|
||||
|
@ -356,7 +356,7 @@ makeJsonLexContextCstringLen(JsonLexContext *lex, char *json,
|
||||
* need explicit stacks for predictions, field names and null indicators, but
|
||||
* we don't need the input, that will be handed in bit by bit to the
|
||||
* parse routine. We also need an accumulator for partial tokens in case
|
||||
* the boundary between chunks happns to fall in the middle of a token.
|
||||
* the boundary between chunks happens to fall in the middle of a token.
|
||||
*/
|
||||
#define JS_STACK_CHUNK_SIZE 64
|
||||
#define JS_MAX_PROD_LEN 10 /* more than we need */
|
||||
@ -1414,9 +1414,9 @@ json_lex(JsonLexContext *lex)
|
||||
}
|
||||
|
||||
/*
|
||||
* Add any remaining alpha_numeric chars. This takes care of the
|
||||
* Add any remaining alphanumeric chars. This takes care of the
|
||||
* {null, false, true} literals as well as any trailing
|
||||
* alpha-numeric junk on non-string tokens.
|
||||
* alphanumeric junk on non-string tokens.
|
||||
*/
|
||||
for (int i = added; i < lex->input_length; i++)
|
||||
{
|
||||
|
@ -806,7 +806,7 @@ json_manifest_finalize_wal_range(JsonManifestParseState *parse)
|
||||
* the rest of the file.
|
||||
*
|
||||
* For an incremental parse, this will just be called on the last chunk of the
|
||||
* manifest, and the cryptohash context paswed in. For a non-incremental
|
||||
* manifest, and the cryptohash context passed in. For a non-incremental
|
||||
* parse incr_ctx will be NULL.
|
||||
*/
|
||||
static void
|
||||
|
@ -4,22 +4,21 @@ Module `test_json_parser`
|
||||
This module contains two programs for testing the json parsers.
|
||||
|
||||
- `test_json_parser_incremental` is for testing the incremental parser, It
|
||||
reads in a file and pases it in very small chunks (60 bytes at a time) to
|
||||
the incremental parser. It's not meant to be a speed test but to test the
|
||||
accuracy of the incremental parser. It takes one argument: the name of the
|
||||
input file.
|
||||
reads in a file and passes it in very small chunks (default is 60 bytes at a
|
||||
time) to the incremental parser. It's not meant to be a speed test but to
|
||||
test the accuracy of the incremental parser. There are two option arguments,
|
||||
"-c nn" specifies an alternative chunk size, and "-s" specifies using
|
||||
semantic routines. The semantic routines re-output the json, although not in
|
||||
a very pretty form. The required non-option argument is the input file name.
|
||||
- `test_json_parser_perf` is for speed testing both the standard
|
||||
recursive descent parser and the non-recursive incremental
|
||||
parser. If given the `-i` flag it uses the non-recursive parser,
|
||||
otherwise the stardard parser. The remaining flags are the number of
|
||||
otherwise the standard parser. The remaining flags are the number of
|
||||
parsing iterations and the file containing the input. Even when
|
||||
using the non-recursive parser, the input is passed to the parser in a
|
||||
single chunk. The results are thus comparable to those of the
|
||||
standard parser.
|
||||
|
||||
The easiest way to use these is to run `make check` and `make speed-check`
|
||||
|
||||
The sample input file is a small extract from a list of `delicious`
|
||||
The sample input file is a small, sanitized extract from a list of `delicious`
|
||||
bookmarks taken some years ago, all wrapped in a single json
|
||||
array. 10,000 iterations of parsing this file gives a reasonable
|
||||
benchmark, and that is what the `speed-check` target does.
|
||||
array.
|
||||
|
@ -3,16 +3,17 @@
|
||||
* test_json_parser_incremental.c
|
||||
* Test program for incremental JSON parser
|
||||
*
|
||||
* Copyright (c) 2023, PostgreSQL Global Development Group
|
||||
* Copyright (c) 2024, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/test/modules/test_json_parser/test_json_parser_incremental.c
|
||||
*
|
||||
* This progam tests incremental parsing of json. The input is fed into
|
||||
* This program tests incremental parsing of json. The input is fed into
|
||||
* the parser in very small chunks. In practice you would normally use
|
||||
* much larger chunks, but doing this makes it more likely that the
|
||||
* full range of incement handling, especially in the lexer, is exercised.
|
||||
* If the "-c SIZE" option is provided, that chunk size is used instead.
|
||||
* full range of increment handling, especially in the lexer, is exercised.
|
||||
* If the "-c SIZE" option is provided, that chunk size is used instead
|
||||
* of the default of 60.
|
||||
*
|
||||
* The argument specifies the file containing the JSON input.
|
||||
*
|
||||
@ -31,6 +32,9 @@
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "pg_getopt.h"
|
||||
|
||||
#define BUFSIZE 6000
|
||||
#define DEFAULT_CHUNK_SIZE 60
|
||||
|
||||
typedef struct DoState
|
||||
{
|
||||
JsonLexContext *lex;
|
||||
@ -67,14 +71,13 @@ JsonSemAction sem = {
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
/* max delicious line length is less than this */
|
||||
char buff[6001];
|
||||
char buff[BUFSIZE];
|
||||
FILE *json_file;
|
||||
JsonParseErrorType result;
|
||||
JsonLexContext lex;
|
||||
StringInfoData json;
|
||||
int n_read;
|
||||
size_t chunk_size = 60;
|
||||
size_t chunk_size = DEFAULT_CHUNK_SIZE;
|
||||
struct stat statbuf;
|
||||
off_t bytes_left;
|
||||
JsonSemAction *testsem = &nullSemAction;
|
||||
@ -88,6 +91,11 @@ main(int argc, char **argv)
|
||||
{
|
||||
case 'c': /* chunksize */
|
||||
sscanf(optarg, "%zu", &chunk_size);
|
||||
if (chunk_size > BUFSIZE)
|
||||
{
|
||||
fprintf(stderr, "chunk size cannot exceed %d\n", BUFSIZE);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 's': /* do semantic processing */
|
||||
testsem = &sem;
|
||||
@ -121,6 +129,12 @@ main(int argc, char **argv)
|
||||
{
|
||||
n_read = fread(buff, 1, chunk_size, json_file);
|
||||
appendBinaryStringInfo(&json, buff, n_read);
|
||||
|
||||
/*
|
||||
* Append some trailing junk to the buffer passed to the parser. This
|
||||
* helps us ensure that the parser does the right thing even if the
|
||||
* chunk isn't terminated with a '\0'.
|
||||
*/
|
||||
appendStringInfoString(&json, "1+23 trailing junk");
|
||||
bytes_left -= n_read;
|
||||
if (bytes_left > 0)
|
||||
|
@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* test_json_parser_perf.c
|
||||
* Performancet est program for both flavors of the JSON parser
|
||||
* Performance test program for both flavors of the JSON parser
|
||||
*
|
||||
* Copyright (c) 2023, PostgreSQL Global Development Group
|
||||
* Copyright (c) 2024, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/test/modules/test_json_parser/test_json_parser_perf.c
|
||||
*
|
||||
* This progam tests either the standard (recursive descent) JSON parser
|
||||
* This program tests either the standard (recursive descent) JSON parser
|
||||
* or the incremental (table driven) parser, but without breaking the input
|
||||
* into chunks in the latter case. Thus it can be used to compare the pure
|
||||
* parsing speed of the two parsers. If the "-i" option is used, then the
|
||||
@ -28,11 +28,12 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define BUFSIZE 6000
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
/* max delicious line length is less than this */
|
||||
char buff[6001];
|
||||
char buff[BUFSIZE];
|
||||
FILE *json_file;
|
||||
JsonParseErrorType result;
|
||||
JsonLexContext *lex;
|
||||
|
Loading…
Reference in New Issue
Block a user