diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index a0ecc4f568..b5d9160040 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9846,17 +9846,75 @@ table2-mapping Functions and operators + + shows the operators that are + available for use with JSON (see ) data. + + + + JSON Operators + + + + Operator + Right Operand Type + Description + Example + + + + + -> + int + Get JSON array element + '[1,2,3]'::json->2 + + + -> + text + Get JSON object field + '{"a":1,"b":2}'::json->'b' + + + ->> + int + Get JSON array element as text + '[1,2,3]'::json->>2 + + + ->> + text + Get JSON object field as text + '{"a":1,"b":2}'::json->>'b' + + + #> + array of text + Get JSON object at specified path + '{"a":[1,2,3],"b":[4,5,6]}'::json#>'{a,2}' + + + #>> + array of text + Get JSON object at specified path as text + '{"a":[1,2,3],"b":[4,5,6]}'::json#>>'{a,2}' + + + +
+ shows the functions that are available - for creating JSON (see ) data. + for creating and manipulating JSON (see ) data. JSON Support Functions - + Function + Return Type Description Example Example Result @@ -9870,6 +9928,7 @@ table2-mapping array_to_json(anyarray [, pretty_bool]) + json Returns the array as JSON. A PostgreSQL multidimensional array becomes a JSON array of arrays. Line feeds will be added between @@ -9885,6 +9944,7 @@ table2-mapping row_to_json(record [, pretty_bool]) + json Returns the row as JSON. Line feeds will be added between level 1 elements if pretty_bool is true. @@ -9899,6 +9959,7 @@ table2-mapping to_json(anyelement) + json Returns the value as JSON. If the data type is not builtin, and there is a cast from the type to json, the cast function will be used to @@ -9909,6 +9970,182 @@ table2-mapping to_json('Fred said "Hi."' "Fred said \"Hi.\"" + + + + json_array_length + + json_array_length(json) + + int + + Returns the number of elements in the outermost json array. + + json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') + 5 + + + + + json_each + + json_each(json) + + SETOF key text, value json + + Expands the outermost json object into a set of key/value pairs. + + select * from json_each_as_text('{"a":"foo", "b":"bar"}') + + + key | value +-----+------- + a | "foo" + b | "bar" + + + + + + + json_each_text + + json_each_text(from_json json) + + SETOF key text, value text + + Expands the outermost json object into a set of key/value pairs. The + returned value will be of type text. + + select * from json_each_as_text('{"a":"foo", "b":"bar"}') + + + key | value +-----+------- + a | foo + b | bar + + + + + + + json_extract_path + + json_extract_path(from_json json, VARIADIC path_elems text[]) + + json + + Returns json object pointed to by path_elems. + + json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}','f4') + {"f5":99,"f6":"foo"} + + + + + json_extract_path_text + + json_extract_path_text(from_json json, VARIADIC path_elems text[]) + + text + + Returns json object pointed to by path_elems. + + json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}','f4', 'f6') + foo + + + + + json_object_keys + + json_object_keys(json) + + SETOF text + + Returns set of keys in the json object. Only the "outer" object will be displayed. + + json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}') + + + json_object_keys +------------------ + f1 + f2 + + + + + + + json_populate_record + + json_populate_record(base anyelement, from_json json, [, use_json_as_text bool=false] + + anyelement + + Expands the object in from_json to a row whose columns match + the record type defined by base. Conversion will be best + effort; columns in base with no corresponding key in from_json + will be left null. A column may only be specified once. + + json_populate_record(null::x, '{"a":1,"b":2}') + + + a | b +---+--- + 1 | 2 + + + + + + + json_populate_recordset + + json_populate_recordset(base anyelement, from_json json, [, use_json_as_text bool=false] + + SETOF anyelement + + Expands the outermost set of objects in from_json to a set + whose columns match the record type defined by base. + Conversion will be best effort; columns in base with no + corresponding key in from_json will be left null. A column + may only be specified once. + + json_populate_recordset(null::x, '[{"a":1,"b":2},{"a":3,"b":4}]') + + + a | b +---+--- + 1 | 2 + 3 | 4 + + + + + + + json_array_elements + + json_array_elements(json) + + SETOF json + + Expands a json array to a set of json elements. + + json_array_elements('[1,true, [2,false]]') + + + value +----------- + 1 + true + [2,false] + + +
@@ -9926,7 +10163,6 @@ table2-mapping function json_agg which aggregates record values as json efficiently. - diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index f727acd68f..57adbf607d 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -787,3 +787,11 @@ COMMENT ON FUNCTION ts_debug(text) IS CREATE OR REPLACE FUNCTION pg_start_backup(label text, fast boolean DEFAULT false) RETURNS text STRICT VOLATILE LANGUAGE internal AS 'pg_start_backup'; + +CREATE OR REPLACE FUNCTION + json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) + RETURNS anyelement LANGUAGE internal STABLE AS 'json_populate_record'; + +CREATE OR REPLACE FUNCTION + json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) + RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset'; diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index a929f4ae19..41a8982368 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -19,8 +19,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ array_userfuncs.o arrayutils.o bool.o \ cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ - geo_ops.o geo_selfuncs.o int.o int8.o json.o like.o lockfuncs.o \ - misc.o nabstime.o name.o numeric.o numutils.o \ + geo_ops.o geo_selfuncs.o int.o int8.o json.o jsonfuncs.o like.o \ + lockfuncs.o misc.o nabstime.o name.o numeric.o numutils.o \ oid.o oracle_compat.o pseudotypes.o rangetypes.o rangetypes_gist.o \ rowtypes.o regexp.o regproc.o ruleutils.o selfuncs.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 82be9a1b66..90404f34ef 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -26,59 +26,37 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/json.h" -#include "utils/syscache.h" +#include "utils/jsonapi.h" #include "utils/typcache.h" +#include "utils/syscache.h" -typedef enum /* types of JSON values */ -{ - JSON_VALUE_INVALID, /* non-value tokens are reported as this */ - JSON_VALUE_STRING, - JSON_VALUE_NUMBER, - JSON_VALUE_OBJECT, - JSON_VALUE_ARRAY, - JSON_VALUE_TRUE, - JSON_VALUE_FALSE, - JSON_VALUE_NULL -} JsonValueType; - -typedef struct /* state of JSON lexer */ -{ - char *input; /* whole string being parsed */ - char *token_start; /* start of current token within input */ - char *token_terminator; /* end of previous or current token */ - JsonValueType token_type; /* type of current token, once it's known */ -} JsonLexContext; - -typedef enum /* states of JSON parser */ +/* + * The context of the parser is maintained by the recursive descent + * mechanism, but is passed explicitly to the error reporting routine + * for better diagnostics. + */ +typedef enum /* contexts of JSON parser */ { JSON_PARSE_VALUE, /* expecting a value */ + JSON_PARSE_STRING, /* expecting a string (for a field name) */ JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */ JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */ JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */ JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */ JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */ - JSON_PARSE_OBJECT_COMMA /* saw object ',', expecting next label */ -} JsonParseState; + JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */ + JSON_PARSE_END /* saw the end of a document, expect nothing */ +} JsonParseContext; -typedef struct JsonParseStack /* the parser state has to be stackable */ -{ - JsonParseState state; - /* currently only need the state enum, but maybe someday more stuff */ -} JsonParseStack; - -typedef enum /* required operations on state stack */ -{ - JSON_STACKOP_NONE, /* no-op */ - JSON_STACKOP_PUSH, /* push new JSON_PARSE_VALUE stack item */ - JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */ - JSON_STACKOP_POP /* pop, or expect end of input if no stack */ -} JsonStackOp; - -static void json_validate_cstring(char *input); -static void json_lex(JsonLexContext *lex); -static void json_lex_string(JsonLexContext *lex); -static void json_lex_number(JsonLexContext *lex, char *s); -static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex); +static inline void json_lex(JsonLexContext *lex); +static inline void json_lex_string(JsonLexContext *lex); +static inline void json_lex_number(JsonLexContext *lex, char *s); +static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem); +static void parse_object_field(JsonLexContext *lex, JsonSemAction sem); +static void parse_object(JsonLexContext *lex, JsonSemAction sem); +static void parse_array_element(JsonLexContext *lex, JsonSemAction sem); +static void parse_array(JsonLexContext *lex, JsonSemAction sem); +static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex); static void report_invalid_token(JsonLexContext *lex); static int report_json_context(JsonLexContext *lex); static char *extract_mb_char(char *s); @@ -91,6 +69,77 @@ static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, static void array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds); +/* the null action object used for pure validation */ +static jsonSemAction nullSemAction = +{ + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; +static JsonSemAction NullSemAction = &nullSemAction; + +/* Recursive Descent parser support routines */ + +/* + * lex_peek + * + * what is the current look_ahead token? +*/ +static inline JsonTokenType +lex_peek(JsonLexContext *lex) +{ + return lex->token_type; +} + +/* + * lex_accept + * + * accept the look_ahead token and move the lexer to the next token if the + * look_ahead token matches the token parameter. In that case, and if required, + * also hand back the de-escaped lexeme. + * + * returns true if the token matched, false otherwise. + */ +static inline bool +lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme) +{ + if (lex->token_type == token) + { + if (lexeme != NULL) + { + if (lex->token_type == JSON_TOKEN_STRING) + { + if (lex->strval != NULL) + *lexeme = pstrdup(lex->strval->data); + } + else + { + int len = (lex->token_terminator - lex->token_start); + char *tokstr = palloc(len + 1); + + memcpy(tokstr, lex->token_start, len); + tokstr[len] = '\0'; + *lexeme = tokstr; + } + } + json_lex(lex); + return true; + } + return false; +} + +/* + * lex_accept + * + * move the lexer to the next token if the current look_ahead token matches + * the parameter token. Otherwise, report an error. + */ +static inline void +lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) +{ + if (!lex_accept(lex, token, NULL)) + report_parse_error(ctx, lex);; +} + /* * All the defined type categories are upper case , so use lower case here * so we avoid any possible clash. @@ -109,19 +158,22 @@ static void array_to_json_internal(Datum array, StringInfo result, (c) == '_' || \ IS_HIGHBIT_SET(c)) - /* * Input. */ Datum json_in(PG_FUNCTION_ARGS) { - char *text = PG_GETARG_CSTRING(0); + char *json = PG_GETARG_CSTRING(0); + text *result = cstring_to_text(json); + JsonLexContext *lex; - json_validate_cstring(text); + /* validate it */ + lex = makeJsonLexContext(result, false); + pg_parse_json(lex, NullSemAction); /* Internal representation is the same as text, for now */ - PG_RETURN_TEXT_P(cstring_to_text(text)); + PG_RETURN_TEXT_P(result); } /* @@ -160,293 +212,457 @@ json_recv(PG_FUNCTION_ARGS) text *result; char *str; int nbytes; + JsonLexContext *lex; str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); - /* - * We need a null-terminated string to pass to json_validate_cstring(). - * Rather than make a separate copy, make the temporary result one byte - * bigger than it needs to be. - */ - result = palloc(nbytes + 1 + VARHDRSZ); + result = palloc(nbytes + VARHDRSZ); SET_VARSIZE(result, nbytes + VARHDRSZ); memcpy(VARDATA(result), str, nbytes); - str = VARDATA(result); - str[nbytes] = '\0'; /* Validate it. */ - json_validate_cstring(str); + lex = makeJsonLexContext(result, false); + pg_parse_json(lex, NullSemAction); PG_RETURN_TEXT_P(result); } /* - * Check whether supplied input is valid JSON. + * makeJsonLexContext + * + * lex constructor, with or without StringInfo object + * for de-escaped lexemes. + * + * Without is better as it makes the processing faster, so only make one + * if really required. */ -static void -json_validate_cstring(char *input) +JsonLexContext * +makeJsonLexContext(text *json, bool need_escapes) { - JsonLexContext lex; - JsonParseStack *stack, - *stacktop; - int stacksize; + JsonLexContext *lex = palloc0(sizeof(JsonLexContext)); - /* Set up lexing context. */ - lex.input = input; - lex.token_terminator = lex.input; + lex->input = lex->token_terminator = lex->line_start = VARDATA(json); + lex->line_number = 1; + lex->input_length = VARSIZE(json) - VARHDRSZ; + if (need_escapes) + lex->strval = makeStringInfo(); + return lex; +} - /* Set up parse stack. */ - stacksize = 32; - stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize); - stack = stacktop; - stack->state = JSON_PARSE_VALUE; +/* + * pg_parse_json + * + * Publicly visible entry point for the JSON parser. + * + * lex is a lexing context, set up for the json to be processed by calling + * makeJsonLexContext(). sem is a strucure of function pointers to semantic + * action routines to be called at appropriate spots during parsing, and a + * pointer to a state object to be passed to those routines. + */ +void +pg_parse_json(JsonLexContext *lex, JsonSemAction sem) +{ + JsonTokenType tok; - /* Main parsing loop. */ - for (;;) + /* get the initial token */ + json_lex(lex); + + tok = lex_peek(lex); + + /* parse by recursive descent */ + switch (tok) { - JsonStackOp op; - - /* Fetch next token. */ - json_lex(&lex); - - /* Check for unexpected end of input. */ - if (lex.token_start == NULL) - report_parse_error(stack, &lex); - -redo: - /* Figure out what to do with this token. */ - op = JSON_STACKOP_NONE; - switch (stack->state) - { - case JSON_PARSE_VALUE: - if (lex.token_type != JSON_VALUE_INVALID) - op = JSON_STACKOP_POP; - else if (lex.token_start[0] == '[') - stack->state = JSON_PARSE_ARRAY_START; - else if (lex.token_start[0] == '{') - stack->state = JSON_PARSE_OBJECT_START; - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_ARRAY_START: - if (lex.token_type != JSON_VALUE_INVALID) - stack->state = JSON_PARSE_ARRAY_NEXT; - else if (lex.token_start[0] == ']') - op = JSON_STACKOP_POP; - else if (lex.token_start[0] == '[' || - lex.token_start[0] == '{') - { - stack->state = JSON_PARSE_ARRAY_NEXT; - op = JSON_STACKOP_PUSH_WITH_PUSHBACK; - } - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_ARRAY_NEXT: - if (lex.token_type != JSON_VALUE_INVALID) - report_parse_error(stack, &lex); - else if (lex.token_start[0] == ']') - op = JSON_STACKOP_POP; - else if (lex.token_start[0] == ',') - op = JSON_STACKOP_PUSH; - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_OBJECT_START: - if (lex.token_type == JSON_VALUE_STRING) - stack->state = JSON_PARSE_OBJECT_LABEL; - else if (lex.token_type == JSON_VALUE_INVALID && - lex.token_start[0] == '}') - op = JSON_STACKOP_POP; - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_OBJECT_LABEL: - if (lex.token_type == JSON_VALUE_INVALID && - lex.token_start[0] == ':') - { - stack->state = JSON_PARSE_OBJECT_NEXT; - op = JSON_STACKOP_PUSH; - } - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_OBJECT_NEXT: - if (lex.token_type != JSON_VALUE_INVALID) - report_parse_error(stack, &lex); - else if (lex.token_start[0] == '}') - op = JSON_STACKOP_POP; - else if (lex.token_start[0] == ',') - stack->state = JSON_PARSE_OBJECT_COMMA; - else - report_parse_error(stack, &lex); - break; - case JSON_PARSE_OBJECT_COMMA: - if (lex.token_type == JSON_VALUE_STRING) - stack->state = JSON_PARSE_OBJECT_LABEL; - else - report_parse_error(stack, &lex); - break; - default: - elog(ERROR, "unexpected json parse state: %d", - (int) stack->state); - } - - /* Push or pop the state stack, if needed. */ - switch (op) - { - case JSON_STACKOP_PUSH: - case JSON_STACKOP_PUSH_WITH_PUSHBACK: - stack++; - if (stack >= &stacktop[stacksize]) - { - /* Need to enlarge the stack. */ - int stackoffset = stack - stacktop; - - stacksize += 32; - stacktop = (JsonParseStack *) - repalloc(stacktop, - sizeof(JsonParseStack) * stacksize); - stack = stacktop + stackoffset; - } - stack->state = JSON_PARSE_VALUE; - if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK) - goto redo; - break; - case JSON_STACKOP_POP: - if (stack == stacktop) - { - /* Expect end of input. */ - json_lex(&lex); - if (lex.token_start != NULL) - report_parse_error(NULL, &lex); - return; - } - stack--; - break; - case JSON_STACKOP_NONE: - /* nothing to do */ - break; - } + case JSON_TOKEN_OBJECT_START: + parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + parse_array(lex, sem); + break; + default: + parse_scalar(lex, sem); /* json can be a bare scalar */ } + + lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); + +} + +/* + * Recursive Descent parse routines. There is one for each structural + * element in a json document: + * - scalar (string, number, true, false, null) + * - array ( [ ] ) + * - array element + * - object ( { } ) + * - object field + */ +static inline void +parse_scalar(JsonLexContext *lex, JsonSemAction sem) +{ + char *val = NULL; + json_scalar_action sfunc = sem->scalar; + char **valaddr; + JsonTokenType tok = lex_peek(lex); + + valaddr = sfunc == NULL ? NULL : &val; + + /* a scalar must be a string, a number, true, false, or null */ + switch (tok) + { + case JSON_TOKEN_TRUE: + lex_accept(lex, JSON_TOKEN_TRUE, valaddr); + break; + case JSON_TOKEN_FALSE: + lex_accept(lex, JSON_TOKEN_FALSE, valaddr); + break; + case JSON_TOKEN_NULL: + lex_accept(lex, JSON_TOKEN_NULL, valaddr); + break; + case JSON_TOKEN_NUMBER: + lex_accept(lex, JSON_TOKEN_NUMBER, valaddr); + break; + case JSON_TOKEN_STRING: + lex_accept(lex, JSON_TOKEN_STRING, valaddr); + break; + default: + report_parse_error(JSON_PARSE_VALUE, lex); + } + + if (sfunc != NULL) + (*sfunc) (sem->semstate, val, tok); +} + +static void +parse_object_field(JsonLexContext *lex, JsonSemAction sem) +{ + /* + * an object field is "fieldname" : value where value can be a scalar, + * object or array + */ + + char *fname = NULL; /* keep compiler quiet */ + json_ofield_action ostart = sem->object_field_start; + json_ofield_action oend = sem->object_field_end; + bool isnull; + char **fnameaddr = NULL; + JsonTokenType tok; + + if (ostart != NULL || oend != NULL) + fnameaddr = &fname; + + if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr)) + report_parse_error(JSON_PARSE_STRING, lex); + + lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); + + tok = lex_peek(lex); + isnull = tok == JSON_TOKEN_NULL; + + if (ostart != NULL) + (*ostart) (sem->semstate, fname, isnull); + + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + parse_array(lex, sem); + break; + default: + parse_scalar(lex, sem); + } + + if (oend != NULL) + (*oend) (sem->semstate, fname, isnull); + + if (fname != NULL) + pfree(fname); +} + +static void +parse_object(JsonLexContext *lex, JsonSemAction sem) +{ + /* + * an object is a possibly empty sequence of object fields, separated by + * commas and surrounde by curly braces. + */ + json_struct_action ostart = sem->object_start; + json_struct_action oend = sem->object_end; + JsonTokenType tok; + + if (ostart != NULL) + (*ostart) (sem->semstate); + + /* + * Data inside an object at at a higher nesting level than the object + * itself. Note that we increment this after we call the semantic routine + * for the object start and restore it before we call the routine for the + * object end. + */ + lex->lex_level++; + + /* we know this will succeeed, just clearing the token */ + lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START); + + tok = lex_peek(lex); + switch (tok) + { + case JSON_TOKEN_STRING: + parse_object_field(lex, sem); + while (lex_accept(lex, JSON_TOKEN_COMMA, NULL)) + parse_object_field(lex, sem); + break; + case JSON_TOKEN_OBJECT_END: + break; + default: + /* case of an invalid initial token inside the object */ + report_parse_error(JSON_PARSE_OBJECT_START, lex); + } + + lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); + + lex->lex_level--; + + if (oend != NULL) + (*oend) (sem->semstate); +} + +static void +parse_array_element(JsonLexContext *lex, JsonSemAction sem) +{ + json_aelem_action astart = sem->array_element_start; + json_aelem_action aend = sem->array_element_end; + JsonTokenType tok = lex_peek(lex); + + bool isnull; + + isnull = tok == JSON_TOKEN_NULL; + + if (astart != NULL) + (*astart) (sem->semstate, isnull); + + /* an array element is any object, array or scalar */ + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + parse_array(lex, sem); + break; + default: + parse_scalar(lex, sem); + } + + if (aend != NULL) + (*aend) (sem->semstate, isnull); +} + +static void +parse_array(JsonLexContext *lex, JsonSemAction sem) +{ + /* + * an array is a possibly empty sequence of array elements, separated by + * commas and surrounded by square brackets. + */ + json_struct_action astart = sem->array_start; + json_struct_action aend = sem->array_end; + + if (astart != NULL) + (*astart) (sem->semstate); + + /* + * Data inside an array at at a higher nesting level than the array + * itself. Note that we increment this after we call the semantic routine + * for the array start and restore it before we call the routine for the + * array end. + */ + lex->lex_level++; + + lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START); + if (lex_peek(lex) != JSON_TOKEN_ARRAY_END) + { + + parse_array_element(lex, sem); + + while (lex_accept(lex, JSON_TOKEN_COMMA, NULL)) + parse_array_element(lex, sem); + } + + lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END); + + lex->lex_level--; + + if (aend != NULL) + (*aend) (sem->semstate); } /* * Lex one token from the input stream. */ -static void +static inline void json_lex(JsonLexContext *lex) { char *s; + int len; /* Skip leading whitespace. */ s = lex->token_terminator; - while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') - s++; + len = s - lex->input; + while (len < lex->input_length && + (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')) + { + if (*s == '\n') + ++lex->line_number; + ++s; + ++len; + } lex->token_start = s; /* Determine token type. */ - if (strchr("{}[],:", s[0]) != NULL) + if (len >= lex->input_length) { - /* strchr() is willing to match a zero byte, so test for that. */ - if (s[0] == '\0') - { - /* End of string. */ - lex->token_start = NULL; - lex->token_terminator = s; - } - else - { - /* Single-character token, some kind of punctuation mark. */ - lex->token_terminator = s + 1; - } - lex->token_type = JSON_VALUE_INVALID; - } - else if (*s == '"') - { - /* String. */ - json_lex_string(lex); - lex->token_type = JSON_VALUE_STRING; - } - else if (*s == '-') - { - /* Negative number. */ - json_lex_number(lex, s + 1); - lex->token_type = JSON_VALUE_NUMBER; - } - else if (*s >= '0' && *s <= '9') - { - /* Positive number. */ - json_lex_number(lex, s); - lex->token_type = JSON_VALUE_NUMBER; + lex->token_start = NULL; + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s; + lex->token_type = JSON_TOKEN_END; } else - { - char *p; - - /* - * We're not dealing with a string, number, legal punctuation mark, or - * end of string. The only legal tokens we might find here are true, - * false, and null, but for error reporting purposes we scan until we - * see a non-alphanumeric character. That way, we can report the - * whole word as an unexpected token, rather than just some - * unintuitive prefix thereof. - */ - for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++) - /* skip */ ; - - if (p == s) + switch (*s) { - /* - * We got some sort of unexpected punctuation or an otherwise - * unexpected character, so just complain about that one - * character. (It can't be multibyte because the above loop will - * advance over any multibyte characters.) - */ - lex->token_terminator = s + 1; - report_invalid_token(lex); - } + /* Single-character token, some kind of punctuation mark. */ + case '{': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_OBJECT_START; + break; + case '}': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_OBJECT_END; + break; + case '[': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_ARRAY_START; + break; + case ']': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_ARRAY_END; + break; + case ',': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_COMMA; + break; + case ':': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_COLON; + break; + case '"': + /* string */ + json_lex_string(lex); + lex->token_type = JSON_TOKEN_STRING; + break; + case '-': + /* Negative number. */ + json_lex_number(lex, s + 1); + lex->token_type = JSON_TOKEN_NUMBER; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* Positive number. */ + json_lex_number(lex, s); + lex->token_type = JSON_TOKEN_NUMBER; + break; + default: + { + char *p; - /* - * We've got a real alphanumeric token here. If it happens to be - * true, false, or null, all is well. If not, error out. - */ - lex->token_terminator = p; - if (p - s == 4) - { - if (memcmp(s, "true", 4) == 0) - lex->token_type = JSON_VALUE_TRUE; - else if (memcmp(s, "null", 4) == 0) - lex->token_type = JSON_VALUE_NULL; - else - report_invalid_token(lex); - } - else if (p - s == 5 && memcmp(s, "false", 5) == 0) - lex->token_type = JSON_VALUE_FALSE; - else - report_invalid_token(lex); - } + /* + * We're not dealing with a string, number, legal + * punctuation mark, or end of string. The only legal + * tokens we might find here are true, false, and null, + * but for error reporting purposes we scan until we see a + * non-alphanumeric character. That way, we can report + * the whole word as an unexpected token, rather than just + * some unintuitive prefix thereof. + */ + for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++) + /* skip */ ; + + /* + * We got some sort of unexpected punctuation or an + * otherwise unexpected character, so just complain about + * that one character. + */ + if (p == s) + { + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + report_invalid_token(lex); + } + + /* + * We've got a real alphanumeric token here. If it + * happens to be true, false, or null, all is well. If + * not, error out. + */ + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = p; + if (p - s == 4) + { + if (memcmp(s, "true", 4) == 0) + lex->token_type = JSON_TOKEN_TRUE; + else if (memcmp(s, "null", 4) == 0) + lex->token_type = JSON_TOKEN_NULL; + else + report_invalid_token(lex); + } + else if (p - s == 5 && memcmp(s, "false", 5) == 0) + lex->token_type = JSON_TOKEN_FALSE; + else + report_invalid_token(lex); + + } + } /* end of switch */ } /* * The next token in the input stream is known to be a string; lex it. */ -static void +static inline void json_lex_string(JsonLexContext *lex) { char *s; + int len; - for (s = lex->token_start + 1; *s != '"'; s++) + if (lex->strval != NULL) + resetStringInfo(lex->strval); + + len = lex->token_start - lex->input; + len++; + for (s = lex->token_start + 1; *s != '"'; s++, len++) { - /* Per RFC4627, these characters MUST be escaped. */ - if ((unsigned char) *s < 32) + /* Premature end of the string. */ + if (len >= lex->input_length) { - /* A NUL byte marks the (premature) end of the string. */ - if (*s == '\0') - { - lex->token_terminator = s; - report_invalid_token(lex); - } + lex->token_terminator = s; + report_invalid_token(lex); + } + else if ((unsigned char) *s < 32) + { + /* Per RFC4627, these characters MUST be escaped. */ /* Since *s isn't printable, exclude it from the context string */ lex->token_terminator = s; ereport(ERROR, @@ -460,7 +676,8 @@ json_lex_string(JsonLexContext *lex) { /* OK, we have an escape character. */ s++; - if (*s == '\0') + len++; + if (len >= lex->input_length) { lex->token_terminator = s; report_invalid_token(lex); @@ -473,7 +690,8 @@ json_lex_string(JsonLexContext *lex) for (i = 1; i <= 4; i++) { s++; - if (*s == '\0') + len++; + if (len >= lex->input_length) { lex->token_terminator = s; report_invalid_token(lex); @@ -494,10 +712,66 @@ json_lex_string(JsonLexContext *lex) report_json_context(lex))); } } + if (lex->strval != NULL) + { + char utf8str[5]; + int utf8len; + char *converted; + + unicode_to_utf8(ch, (unsigned char *) utf8str); + utf8len = pg_utf_mblen((unsigned char *) utf8str); + utf8str[utf8len] = '\0'; + converted = pg_any_to_server(utf8str, 1, PG_UTF8); + appendStringInfoString(lex->strval, converted); + if (converted != utf8str) + pfree(converted); + + } + } + else if (lex->strval != NULL) + { + switch (*s) + { + case '"': + case '\\': + case '/': + appendStringInfoChar(lex->strval, *s); + break; + case 'b': + appendStringInfoChar(lex->strval, '\b'); + break; + case 'f': + appendStringInfoChar(lex->strval, '\f'); + break; + case 'n': + appendStringInfoChar(lex->strval, '\n'); + break; + case 'r': + appendStringInfoChar(lex->strval, '\r'); + break; + case 't': + appendStringInfoChar(lex->strval, '\t'); + break; + default: + /* Not a valid string escape, so error out. */ + lex->token_terminator = s + pg_mblen(s); + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Escape sequence \"\\%s\" is invalid.", + extract_mb_char(s)), + report_json_context(lex))); + } } else if (strchr("\"\\/bfnrt", *s) == NULL) { - /* Not a valid string escape, so error out. */ + /* + * Simpler processing if we're not bothered about de-escaping + * + * It's very tempting to remove the strchr() call here and + * replace it with a switch statement, but testing so far has + * shown it's not a performance win. + */ lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -506,10 +780,17 @@ json_lex_string(JsonLexContext *lex) extract_mb_char(s)), report_json_context(lex))); } + } + else if (lex->strval != NULL) + { + appendStringInfoChar(lex->strval, *s); + } + } /* Hooray, we found the end of the string! */ + lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; } @@ -539,57 +820,70 @@ json_lex_string(JsonLexContext *lex) * *------------------------------------------------------------------------- */ -static void +static inline void json_lex_number(JsonLexContext *lex, char *s) { bool error = false; char *p; + int len; + len = s - lex->input; /* Part (1): leading sign indicator. */ /* Caller already did this for us; so do nothing. */ /* Part (2): parse main digit string. */ if (*s == '0') + { s++; + len++; + } else if (*s >= '1' && *s <= '9') { do { s++; - } while (*s >= '0' && *s <= '9'); + len++; + } while (*s >= '0' && *s <= '9' && len < lex->input_length); } else error = true; /* Part (3): parse optional decimal portion. */ - if (*s == '.') + if (len < lex->input_length && *s == '.') { s++; - if (*s < '0' || *s > '9') + len++; + if (len == lex->input_length || *s < '0' || *s > '9') error = true; else { do { s++; - } while (*s >= '0' && *s <= '9'); + len++; + } while (*s >= '0' && *s <= '9' && len < lex->input_length); } } /* Part (4): parse optional exponent. */ - if (*s == 'e' || *s == 'E') + if (len < lex->input_length && (*s == 'e' || *s == 'E')) { s++; - if (*s == '+' || *s == '-') + len++; + if (len < lex->input_length && (*s == '+' || *s == '-')) + { s++; - if (*s < '0' || *s > '9') + len++; + } + if (len == lex->input_length || *s < '0' || *s > '9') error = true; else { do { s++; - } while (*s >= '0' && *s <= '9'); + len++; + } while (len < lex->input_length && *s >= '0' && *s <= '9'); } } @@ -598,8 +892,9 @@ json_lex_number(JsonLexContext *lex, char *s) * here should be considered part of the token for error-reporting * purposes. */ - for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++) + for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && len < lex->input_length; p++, len++) error = true; + lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = p; if (error) report_invalid_token(lex); @@ -611,13 +906,13 @@ json_lex_number(JsonLexContext *lex, char *s) * lex->token_start and lex->token_terminator must identify the current token. */ static void -report_parse_error(JsonParseStack *stack, JsonLexContext *lex) +report_parse_error(JsonParseContext ctx, JsonLexContext *lex) { char *token; int toklen; /* Handle case where the input ended prematurely. */ - if (lex->token_start == NULL) + if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), @@ -631,7 +926,7 @@ report_parse_error(JsonParseStack *stack, JsonLexContext *lex) token[toklen] = '\0'; /* Complain, with the appropriate detail message. */ - if (stack == NULL) + if (ctx == JSON_PARSE_END) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), @@ -640,7 +935,7 @@ report_parse_error(JsonParseStack *stack, JsonLexContext *lex) report_json_context(lex))); else { - switch (stack->state) + switch (ctx) { case JSON_PARSE_VALUE: ereport(ERROR, @@ -650,6 +945,14 @@ report_parse_error(JsonParseStack *stack, JsonLexContext *lex) token), report_json_context(lex))); break; + case JSON_PARSE_STRING: + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string, but found \"%s\".", + token), + report_json_context(lex))); + break; case JSON_PARSE_ARRAY_START: ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -699,8 +1002,7 @@ report_parse_error(JsonParseStack *stack, JsonLexContext *lex) report_json_context(lex))); break; default: - elog(ERROR, "unexpected json parse state: %d", - (int) stack->state); + elog(ERROR, "unexpected json parse state: %d", ctx); } } } @@ -795,7 +1097,7 @@ report_json_context(JsonLexContext *lex) * suffixing "..." if not ending at end of line. */ prefix = (context_start > line_start) ? "..." : ""; - suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : ""; + suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : ""; return errcontext("JSON data, line %d: %s%s%s", line_number, prefix, ctxt, suffix); diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c new file mode 100644 index 0000000000..63df1ac677 --- /dev/null +++ b/src/backend/utils/adt/jsonfuncs.c @@ -0,0 +1,1919 @@ +/*------------------------------------------------------------------------- + * + * jsonfuncs.c + * Functions to process JSON data type. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/jsonfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/hsearch.h" +#include "utils/json.h" +#include "utils/jsonapi.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/typcache.h" + +/* semantic action functions for json_object_keys */ +static void okeys_object_field_start(void *state, char *fname, bool isnull); +static void okeys_array_start(void *state); +static void okeys_scalar(void *state, char *token, JsonTokenType tokentype); + +/* semantic action functions for json_get* functions */ +static void get_object_start(void *state); +static void get_object_field_start(void *state, char *fname, bool isnull); +static void get_object_field_end(void *state, char *fname, bool isnull); +static void get_array_start(void *state); +static void get_array_element_start(void *state, bool isnull); +static void get_array_element_end(void *state, bool isnull); +static void get_scalar(void *state, char *token, JsonTokenType tokentype); + +/* common worker function for json getter functions */ +static inline Datum get_path_all(PG_FUNCTION_ARGS, bool as_text); +static inline text *get_worker(text *json, char *field, int elem_index, + char **tpath, int *ipath, int npath, + bool normalize_results); + +/* semantic action functions for json_array_length */ +static void alen_object_start(void *state); +static void alen_scalar(void *state, char *token, JsonTokenType tokentype); +static void alen_array_element_start(void *state, bool isnull); + +/* common worker for json_each* functions */ +static inline Datum each_worker(PG_FUNCTION_ARGS, bool as_text); + +/* semantic action functions for json_each */ +static void each_object_field_start(void *state, char *fname, bool isnull); +static void each_object_field_end(void *state, char *fname, bool isnull); +static void each_array_start(void *state); +static void each_scalar(void *state, char *token, JsonTokenType tokentype); + +/* semantic action functions for json_array_elements */ +static void elements_object_start(void *state); +static void elements_array_element_start(void *state, bool isnull); +static void elements_array_element_end(void *state, bool isnull); +static void elements_scalar(void *state, char *token, JsonTokenType tokentype); + +/* turn a json object into a hash table */ +static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text); + +/* semantic action functions for get_json_object_as_hash */ +static void hash_object_field_start(void *state, char *fname, bool isnull); +static void hash_object_field_end(void *state, char *fname, bool isnull); +static void hash_array_start(void *state); +static void hash_scalar(void *state, char *token, JsonTokenType tokentype); + +/* semantic action functions for populate_recordset */ +static void populate_recordset_object_field_start(void *state, char *fname, bool isnull); +static void populate_recordset_object_field_end(void *state, char *fname, bool isnull); +static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype); +static void populate_recordset_object_start(void *state); +static void populate_recordset_object_end(void *state); +static void populate_recordset_array_start(void *state); +static void populate_recordset_array_element_start(void *state, bool isnull); + +/* search type classification for json_get* functions */ +typedef enum +{ + JSON_SEARCH_OBJECT = 1, + JSON_SEARCH_ARRAY, + JSON_SEARCH_PATH +} JsonSearch; + +/* state for json_object_keys */ +typedef struct okeysState +{ + JsonLexContext *lex; + char **result; + int result_size; + int result_count; + int sent_count; +} okeysState, *OkeysState; + +/* state for json_get* functions */ +typedef struct getState +{ + JsonLexContext *lex; + JsonSearch search_type; + int search_index; + int array_index; + char *search_term; + char *result_start; + text *tresult; + bool result_is_null; + bool normalize_results; + bool next_scalar; + char **path; + int npath; + char **current_path; + bool *pathok; + int *array_level_index; + int *path_level_index; +} getState, *GetState; + +/* state for json_array_length */ +typedef struct alenState +{ + JsonLexContext *lex; + int count; +} alenState, *AlenState; + +/* state for json_each */ +typedef struct eachState +{ + JsonLexContext *lex; + Tuplestorestate *tuple_store; + TupleDesc ret_tdesc; + MemoryContext tmp_cxt; + char *result_start; + bool normalize_results; + bool next_scalar; + char *normalized_scalar; +} eachState, *EachState; + +/* state for json_array_elements */ +typedef struct elementsState +{ + JsonLexContext *lex; + Tuplestorestate *tuple_store; + TupleDesc ret_tdesc; + MemoryContext tmp_cxt; + char *result_start; +} elementsState, *ElementsState; + +/* state for get_json_object_as_hash */ +typedef struct jhashState +{ + JsonLexContext *lex; + HTAB *hash; + char *saved_scalar; + char *save_json_start; + bool use_json_as_text; + char *function_name; +} jhashState, *JHashState; + +/* used to build the hashtable */ +typedef struct jsonHashEntry +{ + char fname[NAMEDATALEN]; + char *val; + char *json; + bool isnull; +} jsonHashEntry, *JsonHashEntry; + +/* these two are stolen from hstore / record_out, used in populate_record* */ +typedef struct ColumnIOData +{ + Oid column_type; + Oid typiofunc; + Oid typioparam; + FmgrInfo proc; +} ColumnIOData; + +typedef struct RecordIOData +{ + Oid record_type; + int32 record_typmod; + int ncolumns; + ColumnIOData columns[1]; /* VARIABLE LENGTH ARRAY */ +} RecordIOData; + +/* state for populate_recordset */ +typedef struct populateRecordsetState +{ + JsonLexContext *lex; + HTAB *json_hash; + char *saved_scalar; + char *save_json_start; + bool use_json_as_text; + Tuplestorestate *tuple_store; + TupleDesc ret_tdesc; + HeapTupleHeader rec; + RecordIOData *my_extra; + MemoryContext fn_mcxt; /* used to stash IO funcs */ +} populateRecordsetState, *PopulateRecordsetState; + +/* + * SQL function json_object-keys + * + * Returns the set of keys for the object argument. + * + * This SRF operates in value-per-call mode. It processes the + * object during the first call, and the keys are simply stashed + * in an array, whise size is expanded as necessary. This is probably + * safe enough for a list of keys of a single object, since they are + * limited in size to NAMEDATALEN and the number of keys is unlikely to + * be so huge that it has major memory implications. + */ + + +Datum +json_object_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + OkeysState state; + int i; + + if (SRF_IS_FIRSTCALL()) + { + text *json = PG_GETARG_TEXT_P(0); + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction sem; + + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + state = palloc(sizeof(okeysState)); + sem = palloc0(sizeof(jsonSemAction)); + + state->lex = lex; + state->result_size = 256; + state->result_count = 0; + state->sent_count = 0; + state->result = palloc(256 * sizeof(char *)); + + sem->semstate = (void *) state; + sem->array_start = okeys_array_start; + sem->scalar = okeys_scalar; + sem->object_field_start = okeys_object_field_start; + /* remainder are all NULL, courtesy of palloc0 above */ + + pg_parse_json(lex, sem); + /* keys are now in state->result */ + + pfree(lex->strval->data); + pfree(lex->strval); + pfree(lex); + pfree(sem); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) state; + + } + + funcctx = SRF_PERCALL_SETUP(); + state = (OkeysState) funcctx->user_fctx; + + if (state->sent_count < state->result_count) + { + char *nxt = state->result[state->sent_count++]; + + SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt)); + } + + /* cleanup to reduce or eliminate memory leaks */ + for (i = 0; i < state->result_count; i++) + pfree(state->result[i]); + pfree(state->result); + pfree(state); + + SRF_RETURN_DONE(funcctx); +} + +static void +okeys_object_field_start(void *state, char *fname, bool isnull) +{ + OkeysState _state = (OkeysState) state; + + /* only collecting keys for the top level object */ + if (_state->lex->lex_level != 1) + return; + + /* enlarge result array if necessary */ + if (_state->result_count >= _state->result_size) + { + _state->result_size *= 2; + _state->result = + repalloc(_state->result, sizeof(char *) * _state->result_size); + } + + /* save a copy of the field name */ + _state->result[_state->result_count++] = pstrdup(fname); +} + +static void +okeys_array_start(void *state) +{ + OkeysState _state = (OkeysState) state; + + /* top level must be a json object */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_object_keys on an array"))); +} + +static void +okeys_scalar(void *state, char *token, JsonTokenType tokentype) +{ + OkeysState _state = (OkeysState) state; + + /* top level must be a json object */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_object_keys on a scalar"))); +} + +/* + * json getter functions + * these implement the -> ->> #> and #>> operators + * and the json_extract_path*(json, text, ...) functions + */ + + +Datum +json_object_field(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + text *result; + text *fname = PG_GETARG_TEXT_P(1); + char *fnamestr = text_to_cstring(fname); + + result = get_worker(json, fnamestr, -1, NULL, NULL, -1, false); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +json_object_field_text(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + text *result; + text *fname = PG_GETARG_TEXT_P(1); + char *fnamestr = text_to_cstring(fname); + + result = get_worker(json, fnamestr, -1, NULL, NULL, -1, true); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +json_array_element(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + text *result; + int element = PG_GETARG_INT32(1); + + result = get_worker(json, NULL, element, NULL, NULL, -1, false); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +json_array_element_text(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + text *result; + int element = PG_GETARG_INT32(1); + + result = get_worker(json, NULL, element, NULL, NULL, -1, true); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +json_extract_path(PG_FUNCTION_ARGS) +{ + return get_path_all(fcinfo, false); +} + +Datum +json_extract_path_text(PG_FUNCTION_ARGS) +{ + return get_path_all(fcinfo, true); +} + +/* + * common routine for extract_path functions + */ +static inline Datum +get_path_all(PG_FUNCTION_ARGS, bool as_text) +{ + text *json = PG_GETARG_TEXT_P(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + text *result; + Datum *pathtext; + bool *pathnulls; + int npath; + char **tpath; + int *ipath; + int i; + long ind; + char *endptr; + + if (array_contains_nulls(path)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call function with null path elements"))); + + + deconstruct_array(path, TEXTOID, -1, false, 'i', + &pathtext, &pathnulls, &npath); + + tpath = palloc(npath * sizeof(char *)); + ipath = palloc(npath * sizeof(int)); + + + for (i = 0; i < npath; i++) + { + tpath[i] = TextDatumGetCString(pathtext[i]); + if (*tpath[i] == '\0') + ereport( + ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call function with empty path elements"))); + + /* + * we have no idea at this stage what structure the document is so + * just convert anything in the path that we can to an integer and set + * all the other integers to -1 which will never match. + */ + ind = strtol(tpath[i], &endptr, 10); + if (*endptr == '\0' && ind <= INT_MAX && ind >= 0) + ipath[i] = (int) ind; + else + ipath[i] = -1; + } + + + result = get_worker(json, NULL, -1, tpath, ipath, npath, as_text); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +/* + * get_worker + * + * common worker for all the json getter functions + */ +static inline text * +get_worker(text *json, + char *field, + int elem_index, + char **tpath, + int *ipath, + int npath, + bool normalize_results) +{ + GetState state; + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction sem; + + /* only allowed to use one of these */ + Assert(elem_index < 0 || (tpath == NULL && ipath == NULL && field == NULL)); + Assert(tpath == NULL || field == NULL); + + state = palloc0(sizeof(getState)); + sem = palloc0(sizeof(jsonSemAction)); + + state->lex = lex; + /* is it "_as_text" variant? */ + state->normalize_results = normalize_results; + if (field != NULL) + { + /* single text argument */ + state->search_type = JSON_SEARCH_OBJECT; + state->search_term = field; + } + else if (tpath != NULL) + { + /* path array argument */ + state->search_type = JSON_SEARCH_PATH; + state->path = tpath; + state->npath = npath; + state->current_path = palloc(sizeof(char *) * npath); + state->pathok = palloc0(sizeof(bool) * npath); + state->pathok[0] = true; + state->array_level_index = palloc(sizeof(int) * npath); + state->path_level_index = ipath; + + } + else + { + /* single integer argument */ + state->search_type = JSON_SEARCH_ARRAY; + state->search_index = elem_index; + state->array_index = -1; + } + + sem->semstate = (void *) state; + + /* + * Not all variants need all the semantic routines. only set the ones + * that are actually needed for maximum efficiency. + */ + sem->object_start = get_object_start; + sem->array_start = get_array_start; + sem->scalar = get_scalar; + if (field != NULL || tpath != NULL) + { + sem->object_field_start = get_object_field_start; + sem->object_field_end = get_object_field_end; + } + if (field == NULL) + { + sem->array_element_start = get_array_element_start; + sem->array_element_end = get_array_element_end; + } + + pg_parse_json(lex, sem); + + return state->tresult; +} + +static void +get_object_start(void *state) +{ + GetState _state = (GetState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0 && _state->search_type == JSON_SEARCH_ARRAY) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract array element from a non-array"))); +} + +static void +get_object_field_start(void *state, char *fname, bool isnull) +{ + GetState _state = (GetState) state; + bool get_next = false; + int lex_level = _state->lex->lex_level; + + if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT && + strcmp(fname, _state->search_term) == 0) + { + + _state->tresult = NULL; + _state->result_start = NULL; + get_next = true; + } + else if (_state->search_type == JSON_SEARCH_PATH && + lex_level <= _state->npath && + _state->pathok[_state->lex->lex_level - 1] && + strcmp(fname, _state->path[lex_level - 1]) == 0) + { + /* path search, path so far is ok, and we have a match */ + + /* this object overrides any previous matching object */ + + _state->tresult = NULL; + _state->result_start = NULL; + + /* if not at end of path just mark path ok */ + if (lex_level < _state->npath) + _state->pathok[lex_level] = true; + + /* end of path, so we want this value */ + if (lex_level == _state->npath) + get_next = true; + } + + if (get_next) + { + if (_state->normalize_results && + _state->lex->token_type == JSON_TOKEN_STRING) + { + /* for as_text variants, tell get_scalar to set it for us */ + _state->next_scalar = true; + } + else + { + /* for non-as_text variants, just note the json starting point */ + _state->result_start = _state->lex->token_start; + } + } +} + +static void +get_object_field_end(void *state, char *fname, bool isnull) +{ + GetState _state = (GetState) state; + bool get_last = false; + int lex_level = _state->lex->lex_level; + + + /* same tests as in get_object_field_start, mutatis mutandis */ + if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT && + strcmp(fname, _state->search_term) == 0) + { + get_last = true; + } + else if (_state->search_type == JSON_SEARCH_PATH && + lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + strcmp(fname, _state->path[lex_level - 1]) == 0) + { + /* done with this field so reset pathok */ + if (lex_level < _state->npath) + _state->pathok[lex_level] = false; + + if (lex_level == _state->npath) + get_last = true; + } + + /* for as_test variants our work is already done */ + if (get_last && _state->result_start != NULL) + { + /* + * make a text object from the string from the prevously noted json + * start up to the end of the previous token (the lexer is by now + * ahead of us on whatevere came after what we're interested in). + */ + int len = _state->lex->prev_token_terminator - _state->result_start; + + if (isnull && _state->normalize_results) + _state->tresult = (text *) NULL; + else + _state->tresult = cstring_to_text_with_len(_state->result_start, len); + } + + /* + * don't need to reset _state->result_start b/c we're only returning one + * datum, the conditions should not occur more than once, and this lets us + * check cheaply that they don't (see object_field_start() ) + */ +} + +static void +get_array_start(void *state) +{ + GetState _state = (GetState) state; + int lex_level = _state->lex->lex_level; + + /* json structure check */ + if (lex_level == 0 && _state->search_type == JSON_SEARCH_OBJECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract field from a non-object"))); + /* initialize array count for this nesting level */ + if (_state->search_type == JSON_SEARCH_PATH && + lex_level <= _state->npath) + _state->array_level_index[lex_level] = -1; +} + +static void +get_array_element_start(void *state, bool isnull) +{ + GetState _state = (GetState) state; + bool get_next = false; + int lex_level = _state->lex->lex_level; + + if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY) + { + /* single integer search */ + _state->array_index++; + if (_state->array_index == _state->search_index) + get_next = true; + } + else if (_state->search_type == JSON_SEARCH_PATH && + lex_level <= _state->npath && + _state->pathok[lex_level - 1]) + { + /* + * path search, path so far is ok + * + * increment the array counter. no point doing this if we already know + * the path is bad. + * + * then check if we have a match. + */ + + if (++_state->array_level_index[lex_level - 1] == + _state->path_level_index[lex_level - 1]) + { + if (lex_level == _state->npath) + { + /* match and at end of path, so get value */ + get_next = true; + } + else + { + /* not at end of path just mark path ok */ + _state->pathok[lex_level] = true; + } + } + + } + + /* same logic as for objects */ + if (get_next) + { + if (_state->normalize_results && + _state->lex->token_type == JSON_TOKEN_STRING) + { + _state->next_scalar = true; + } + else + { + _state->result_start = _state->lex->token_start; + } + } +} + +static void +get_array_element_end(void *state, bool isnull) +{ + GetState _state = (GetState) state; + bool get_last = false; + int lex_level = _state->lex->lex_level; + + /* same logic as in get_object_end, modified for arrays */ + + if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY && + _state->array_index == _state->search_index) + { + get_last = true; + } + else if (_state->search_type == JSON_SEARCH_PATH && + lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + _state->array_level_index[lex_level - 1] == + _state->path_level_index[lex_level - 1]) + { + /* done with this element so reset pathok */ + if (lex_level < _state->npath) + _state->pathok[lex_level] = false; + + if (lex_level == _state->npath) + get_last = true; + } + if (get_last && _state->result_start != NULL) + { + int len = _state->lex->prev_token_terminator - _state->result_start; + + if (isnull && _state->normalize_results) + _state->tresult = (text *) NULL; + else + _state->tresult = cstring_to_text_with_len(_state->result_start, len); + } +} + +static void +get_scalar(void *state, char *token, JsonTokenType tokentype) +{ + GetState _state = (GetState) state; + + if (_state->lex->lex_level == 0 && _state->search_type != JSON_SEARCH_PATH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract element from a scalar"))); + if (_state->next_scalar) + { + /* a de-escaped text value is wanted, so supply it */ + _state->tresult = cstring_to_text(token); + /* make sure the next call to get_scalar doesn't overwrite it */ + _state->next_scalar = false; + } + +} + +/* + * SQL function json_array_length(json) -> int + */ +Datum +json_array_length(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + + AlenState state; + JsonLexContext *lex = makeJsonLexContext(json, false); + JsonSemAction sem; + + state = palloc0(sizeof(alenState)); + sem = palloc0(sizeof(jsonSemAction)); + + /* palloc0 does this for us */ +#if 0 + state->count = 0; +#endif + state->lex = lex; + + sem->semstate = (void *) state; + sem->object_start = alen_object_start; + sem->scalar = alen_scalar; + sem->array_element_start = alen_array_element_start; + + pg_parse_json(lex, sem); + + PG_RETURN_INT32(state->count); +} + +/* + * These next two check ensure that the json is an array (since it can't be + * a scalar or an object). + */ + +static void +alen_object_start(void *state) +{ + AlenState _state = (AlenState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a non-array"))); +} + +static void +alen_scalar(void *state, char *token, JsonTokenType tokentype) +{ + AlenState _state = (AlenState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a scalar"))); +} + +static void +alen_array_element_start(void *state, bool isnull) +{ + AlenState _state = (AlenState) state; + + /* just count up all the level 1 elements */ + if (_state->lex->lex_level == 1) + _state->count++; +} + +/* + * SQL function json_each and json_each_text + * + * decompose a json object into key value pairs. + * + * Unlike json_object_keys() these SRFs operate in materialize mode, + * stashing results into a Tuplestore object as they go. + * The construction of tuples is done using a temporary memory context + * that is cleared out after each tuple is built. + */ +Datum +json_each(PG_FUNCTION_ARGS) +{ + return each_worker(fcinfo, false); +} + +Datum +json_each_text(PG_FUNCTION_ARGS) +{ + return each_worker(fcinfo, true); +} + +static inline Datum +each_worker(PG_FUNCTION_ARGS, bool as_text) +{ + text *json = PG_GETARG_TEXT_P(0); + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction sem; + ReturnSetInfo *rsi; + MemoryContext old_cxt; + TupleDesc tupdesc; + EachState state; + + state = palloc0(sizeof(eachState)); + sem = palloc0(sizeof(jsonSemAction)); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo) || + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); + + + rsi->returnMode = SFRM_Materialize; + + (void) get_call_result_type(fcinfo, NULL, &tupdesc); + + /* make these in a sufficiently long-lived memory context */ + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + state->ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(state->ret_tdesc); + state->tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + sem->semstate = (void *) state; + sem->array_start = each_array_start; + sem->scalar = each_scalar; + sem->object_field_start = each_object_field_start; + sem->object_field_end = each_object_field_end; + + state->normalize_results = as_text; + state->next_scalar = false; + + state->lex = lex; + state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "json_each temporary cxt", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + pg_parse_json(lex, sem); + + rsi->setResult = state->tuple_store; + rsi->setDesc = state->ret_tdesc; + + PG_RETURN_NULL(); +} + + +static void +each_object_field_start(void *state, char *fname, bool isnull) +{ + EachState _state = (EachState) state; + + /* save a pointer to where the value starts */ + if (_state->lex->lex_level == 1) + { + /* + * next_scalar will be reset in the object_field_end handler, and + * since we know the value is a scalar there is no danger of it being + * on while recursing down the tree. + */ + if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING) + _state->next_scalar = true; + else + _state->result_start = _state->lex->token_start; + } +} + +static void +each_object_field_end(void *state, char *fname, bool isnull) +{ + EachState _state = (EachState) state; + MemoryContext old_cxt; + int len; + text *val; + HeapTuple tuple; + Datum values[2]; + bool nulls[2] = {false, false}; + + /* skip over nested objects */ + if (_state->lex->lex_level != 1) + return; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(_state->tmp_cxt); + + values[0] = CStringGetTextDatum(fname); + + if (isnull && _state->normalize_results) + { + nulls[1] = true; + values[1] = (Datum) NULL; + } + else if (_state->next_scalar) + { + values[1] = CStringGetTextDatum(_state->normalized_scalar); + _state->next_scalar = false; + } + else + { + len = _state->lex->prev_token_terminator - _state->result_start; + val = cstring_to_text_with_len(_state->result_start, len); + values[1] = PointerGetDatum(val); + } + + + tuple = heap_form_tuple(_state->ret_tdesc, values, nulls); + + tuplestore_puttuple(_state->tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(_state->tmp_cxt); +} + +static void +each_array_start(void *state) +{ + EachState _state = (EachState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot deconstruct an array as an object"))); +} + +static void +each_scalar(void *state, char *token, JsonTokenType tokentype) +{ + EachState _state = (EachState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot deconstruct a scalar"))); + + /* supply de-escaped value if required */ + if (_state->next_scalar) + _state->normalized_scalar = token; +} + +/* + * SQL function json_array_elements + * + * get the elements from a json array + * + * a lot of this processing is similar to the json_each* functions + */ +Datum +json_array_elements(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + + /* elements doesn't need any escaped strings, so use false here */ + JsonLexContext *lex = makeJsonLexContext(json, false); + JsonSemAction sem; + ReturnSetInfo *rsi; + MemoryContext old_cxt; + TupleDesc tupdesc; + ElementsState state; + + state = palloc0(sizeof(elementsState)); + sem = palloc0(sizeof(jsonSemAction)); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo) || + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); + + + rsi->returnMode = SFRM_Materialize; + + /* it's a simple type, so don't use get_call_result_type() */ + tupdesc = rsi->expectedDesc; + + /* make these in a sufficiently long-lived memory context */ + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + state->ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(state->ret_tdesc); + state->tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + sem->semstate = (void *) state; + sem->object_start = elements_object_start; + sem->scalar = elements_scalar; + sem->array_element_start = elements_array_element_start; + sem->array_element_end = elements_array_element_end; + + state->lex = lex; + state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "json_array_elements temporary cxt", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + pg_parse_json(lex, sem); + + rsi->setResult = state->tuple_store; + rsi->setDesc = state->ret_tdesc; + + PG_RETURN_NULL(); +} + +static void +elements_array_element_start(void *state, bool isnull) +{ + ElementsState _state = (ElementsState) state; + + /* save a pointer to where the value starts */ + if (_state->lex->lex_level == 1) + _state->result_start = _state->lex->token_start; +} + +static void +elements_array_element_end(void *state, bool isnull) +{ + ElementsState _state = (ElementsState) state; + MemoryContext old_cxt; + int len; + text *val; + HeapTuple tuple; + Datum values[1]; + static bool nulls[1] = {false}; + + /* skip over nested objects */ + if (_state->lex->lex_level != 1) + return; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(_state->tmp_cxt); + + len = _state->lex->prev_token_terminator - _state->result_start; + val = cstring_to_text_with_len(_state->result_start, len); + + values[0] = PointerGetDatum(val); + + tuple = heap_form_tuple(_state->ret_tdesc, values, nulls); + + tuplestore_puttuple(_state->tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(_state->tmp_cxt); +} + +static void +elements_object_start(void *state) +{ + ElementsState _state = (ElementsState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_array_elements on a non-array"))); +} + +static void +elements_scalar(void *state, char *token, JsonTokenType tokentype) +{ + ElementsState _state = (ElementsState) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_array_elements on a scalar"))); + + /* + * json_array_elements always returns json, so there's no need to think + * about de-escaped values here. + */ +} + +/* + * SQL function json_populate_record + * + * set fields in a record from the argument json + * + * Code adapted shamelessly from hstore's populate_record + * which is in turn partly adapted from record_out. + * + * The json is decomposed into a hash table, in which each + * field in the record is then looked up by name. + */ +Datum +json_populate_record(PG_FUNCTION_ARGS) +{ + Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); + text *json = PG_GETARG_TEXT_P(1); + bool use_json_as_text = PG_GETARG_BOOL(2); + HTAB *json_hash; + HeapTupleHeader rec; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + HeapTuple rettuple; + RecordIOData *my_extra; + int ncolumns; + int i; + Datum *values; + bool *nulls; + char fname[NAMEDATALEN]; + JsonHashEntry hashentry; + + + if (!type_is_rowtype(argtype)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("first argument must be a rowtype"))); + + if (PG_ARGISNULL(0)) + { + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + rec = NULL; + + /* + * have no tuple to look at, so the only source of type info is the + * argtype. The lookup_rowtype_tupdesc call below will error out if we + * don't have a known composite type oid here. + */ + tupType = argtype; + tupTypmod = -1; + } + else + { + rec = PG_GETARG_HEAPTUPLEHEADER(0); + + if (PG_ARGISNULL(1)) + PG_RETURN_POINTER(rec); + + /* Extract type info from the tuple itself */ + tupType = HeapTupleHeaderGetTypeId(rec); + tupTypmod = HeapTupleHeaderGetTypMod(rec); + } + + json_hash = get_json_object_as_hash(json, "json_populate_record", use_json_as_text); + + /* + * if the input json is empty, we can only skip the rest if we were passed + * in a non-null record, since otherwise there may be issues with domain + * nulls. + */ + if (hash_get_num_entries(json_hash) == 0 && rec) + PG_RETURN_POINTER(rec); + + + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + if (rec) + { + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(rec); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = rec; + } + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(RecordIOData) - sizeof(ColumnIOData) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + sizeof(RecordIOData) - sizeof(ColumnIOData) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + if (rec) + { + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + } + else + { + for (i = 0; i < ncolumns; ++i) + { + values[i] = (Datum) 0; + nulls[i] = true; + } + } + + for (i = 0; i < ncolumns; ++i) + { + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = tupdesc->attrs[i]->atttypid; + char *value; + + /* Ignore dropped columns in datatype */ + if (tupdesc->attrs[i]->attisdropped) + { + nulls[i] = true; + continue; + } + + memset(fname, 0, NAMEDATALEN); + strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); + hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); + + /* + * we can't just skip here if the key wasn't found since we might have + * a domain to deal with. If we were passed in a non-null record + * datum, we assume that the existing values are valid (if they're + * not, then it's not our fault), but if we were passed in a null, + * then every field which we don't populate needs to be run through + * the input function just in case it's a domain type. + */ + if (hashentry == NULL && rec) + continue; + + /* + * Prepare to convert the column value from text + */ + if (column_info->column_type != column_type) + { + getTypeInputInfo(column_type, + &column_info->typiofunc, + &column_info->typioparam); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + fcinfo->flinfo->fn_mcxt); + column_info->column_type = column_type; + } + if (hashentry == NULL || hashentry->isnull) + { + /* + * need InputFunctionCall to happen even for nulls, so that domain + * checks are done + */ + values[i] = InputFunctionCall(&column_info->proc, NULL, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = true; + } + else + { + value = hashentry->val; + + values[i] = InputFunctionCall(&column_info->proc, value, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = false; + } + } + + rettuple = heap_form_tuple(tupdesc, values, nulls); + + ReleaseTupleDesc(tupdesc); + + PG_RETURN_DATUM(HeapTupleGetDatum(rettuple)); +} + +/* + * get_json_object_as_hash + * + * decompose a json object into a hash table. + * + * Currently doesn't allow anything but a flat object. Should this + * change? + * + * funcname argument allows caller to pass in its name for use in + * error messages. + */ +static HTAB * +get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text) +{ + HASHCTL ctl; + HTAB *tab; + JHashState state; + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction sem; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = NAMEDATALEN; + ctl.entrysize = sizeof(jsonHashEntry); + ctl.hcxt = CurrentMemoryContext; + tab = hash_create("json object hashtable", + 100, + &ctl, + HASH_ELEM | HASH_CONTEXT); + + state = palloc0(sizeof(jhashState)); + sem = palloc0(sizeof(jsonSemAction)); + + state->function_name = funcname; + state->hash = tab; + state->lex = lex; + state->use_json_as_text = use_json_as_text; + + sem->semstate = (void *) state; + sem->array_start = hash_array_start; + sem->scalar = hash_scalar; + sem->object_field_start = hash_object_field_start; + sem->object_field_end = hash_object_field_end; + + pg_parse_json(lex, sem); + + return tab; +} + +static void +hash_object_field_start(void *state, char *fname, bool isnull) +{ + JHashState _state = (JHashState) state; + + if (_state->lex->lex_level > 1) + return; + + if (_state->lex->token_type == JSON_TOKEN_ARRAY_START || + _state->lex->token_type == JSON_TOKEN_OBJECT_START) + { + if (!_state->use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a nested object", + _state->function_name))); + _state->save_json_start = _state->lex->token_start; + } + else + { + /* must be a scalar */ + _state->save_json_start = NULL; + } +} + +static void +hash_object_field_end(void *state, char *fname, bool isnull) +{ + JHashState _state = (JHashState) state; + JsonHashEntry hashentry; + bool found; + char name[NAMEDATALEN]; + + /* + * ignore field names >= NAMEDATALEN - they can't match a record field + * ignore nested fields. + */ + if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN) + return; + + memset(name, 0, NAMEDATALEN); + strncpy(name, fname, NAMEDATALEN); + + hashentry = hash_search(_state->hash, name, HASH_ENTER, &found); + + /* + * found being true indicates a duplicate. We don't do anything about + * that, a later field with the same name overrides the earlier field. + */ + + hashentry->isnull = isnull; + if (_state->save_json_start != NULL) + { + int len = _state->lex->prev_token_terminator - _state->save_json_start; + char *val = palloc((len + 1) * sizeof(char)); + + memcpy(val, _state->save_json_start, len); + val[len] = '\0'; + hashentry->val = val; + } + else + { + /* must have had a scalar instead */ + hashentry->val = _state->saved_scalar; + } +} + +static void +hash_array_start(void *state) +{ + JHashState _state = (JHashState) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on an array", _state->function_name))); +} + +static void +hash_scalar(void *state, char *token, JsonTokenType tokentype) +{ + JHashState _state = (JHashState) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", _state->function_name))); + + if (_state->lex->lex_level == 1) + _state->saved_scalar = token; +} + + +/* + * SQL function json_populate_recordset + * + * set fields in a set of records from the argument json, + * which must be an array of objects. + * + * similar to json_populate_record, but the tuple-building code + * is pushed down into the semantic action handlers so it's done + * per object in the array. + */ +Datum +json_populate_recordset(PG_FUNCTION_ARGS) +{ + Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); + text *json = PG_GETARG_TEXT_P(1); + bool use_json_as_text = PG_GETARG_BOOL(2); + ReturnSetInfo *rsi; + MemoryContext old_cxt; + Oid tupType; + int32 tupTypmod; + HeapTupleHeader rec; + TupleDesc tupdesc; + RecordIOData *my_extra; + int ncolumns; + JsonLexContext *lex; + JsonSemAction sem; + PopulateRecordsetState state; + + if (!type_is_rowtype(argtype)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("first argument must be a rowtype"))); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo) || + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); + + + rsi->returnMode = SFRM_Materialize; + + /* + * get the tupdesc from the result set info - it must be a record type + * because we already checked that arg1 is a record type. + */ + (void) get_call_result_type(fcinfo, NULL, &tupdesc); + + state = palloc0(sizeof(populateRecordsetState)); + sem = palloc0(sizeof(jsonSemAction)); + + + /* make these in a sufficiently long-lived memory context */ + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + state->ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(state->ret_tdesc); + state->tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + /* if the json is null send back an empty set */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + if (PG_ARGISNULL(0)) + rec = NULL; + else + rec = PG_GETARG_HEAPTUPLEHEADER(0); + + tupType = tupdesc->tdtypeid; + tupTypmod = tupdesc->tdtypmod; + ncolumns = tupdesc->natts; + + lex = makeJsonLexContext(json, true); + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(RecordIOData) - sizeof(ColumnIOData) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + sizeof(RecordIOData) - sizeof(ColumnIOData) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + sem->semstate = (void *) state; + sem->array_start = populate_recordset_array_start; + sem->array_element_start = populate_recordset_array_element_start; + sem->scalar = populate_recordset_scalar; + sem->object_field_start = populate_recordset_object_field_start; + sem->object_field_end = populate_recordset_object_field_end; + sem->object_start = populate_recordset_object_start; + sem->object_end = populate_recordset_object_end; + + state->lex = lex; + + state->my_extra = my_extra; + state->rec = rec; + state->use_json_as_text = use_json_as_text; + state->fn_mcxt = fcinfo->flinfo->fn_mcxt; + + pg_parse_json(lex, sem); + + rsi->setResult = state->tuple_store; + rsi->setDesc = state->ret_tdesc; + + PG_RETURN_NULL(); + +} + +static void +populate_recordset_object_start(void *state) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + int lex_level = _state->lex->lex_level; + HASHCTL ctl; + + if (lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_populate_recordset on an object"))); + else if (lex_level > 1 && !_state->use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_populate_recordset with nested objects"))); + + /* set up a new hash for this entry */ + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = NAMEDATALEN; + ctl.entrysize = sizeof(jsonHashEntry); + ctl.hcxt = CurrentMemoryContext; + _state->json_hash = hash_create("json object hashtable", + 100, + &ctl, + HASH_ELEM | HASH_CONTEXT); +} + +static void +populate_recordset_object_end(void *state) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + HTAB *json_hash = _state->json_hash; + Datum *values; + bool *nulls; + char fname[NAMEDATALEN]; + int i; + RecordIOData *my_extra = _state->my_extra; + int ncolumns = my_extra->ncolumns; + TupleDesc tupdesc = _state->ret_tdesc; + JsonHashEntry hashentry; + HeapTupleHeader rec = _state->rec; + HeapTuple rettuple; + + if (_state->lex->lex_level > 1) + return; + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + if (_state->rec) + { + HeapTupleData tuple; + + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = _state->rec; + + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + } + else + { + for (i = 0; i < ncolumns; ++i) + { + values[i] = (Datum) 0; + nulls[i] = true; + } + } + + for (i = 0; i < ncolumns; ++i) + { + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = tupdesc->attrs[i]->atttypid; + char *value; + + /* Ignore dropped columns in datatype */ + if (tupdesc->attrs[i]->attisdropped) + { + nulls[i] = true; + continue; + } + + memset(fname, 0, NAMEDATALEN); + strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); + hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); + + /* + * we can't just skip here if the key wasn't found since we might have + * a domain to deal with. If we were passed in a non-null record + * datum, we assume that the existing values are valid (if they're + * not, then it's not our fault), but if we were passed in a null, + * then every field which we don't populate needs to be run through + * the input function just in case it's a domain type. + */ + if (hashentry == NULL && rec) + continue; + + /* + * Prepare to convert the column value from text + */ + if (column_info->column_type != column_type) + { + getTypeInputInfo(column_type, + &column_info->typiofunc, + &column_info->typioparam); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + _state->fn_mcxt); + column_info->column_type = column_type; + } + if (hashentry == NULL || hashentry->isnull) + { + /* + * need InputFunctionCall to happen even for nulls, so that domain + * checks are done + */ + values[i] = InputFunctionCall(&column_info->proc, NULL, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = true; + } + else + { + value = hashentry->val; + + values[i] = InputFunctionCall(&column_info->proc, value, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = false; + } + } + + rettuple = heap_form_tuple(tupdesc, values, nulls); + + tuplestore_puttuple(_state->tuple_store, rettuple); + + hash_destroy(json_hash); +} + +static void +populate_recordset_array_element_start(void *state, bool isnull) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + + if (_state->lex->lex_level == 1 && + _state->lex->token_type != JSON_TOKEN_OBJECT_START) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("must call populate_recordset on an array of objects"))); +} + +static void +populate_recordset_array_start(void *state) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + + if (_state->lex->lex_level != 0 && !_state->use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_populate_recordset with nested arrays"))); +} + +static void +populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_populate_recordset on a scalar"))); + + if (_state->lex->lex_level == 2) + _state->saved_scalar = token; +} + +static void +populate_recordset_object_field_start(void *state, char *fname, bool isnull) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + + if (_state->lex->lex_level > 2) + return; + + if (_state->lex->token_type == JSON_TOKEN_ARRAY_START || + _state->lex->token_type == JSON_TOKEN_OBJECT_START) + { + if (!_state->use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call json_populate_recordset on a nested object"))); + _state->save_json_start = _state->lex->token_start; + } + else + { + _state->save_json_start = NULL; + } +} + +static void +populate_recordset_object_field_end(void *state, char *fname, bool isnull) +{ + PopulateRecordsetState _state = (PopulateRecordsetState) state; + JsonHashEntry hashentry; + bool found; + char name[NAMEDATALEN]; + + /* + * ignore field names >= NAMEDATALEN - they can't match a record field + * ignore nested fields. + */ + if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN) + return; + + memset(name, 0, NAMEDATALEN); + strncpy(name, fname, NAMEDATALEN); + + hashentry = hash_search(_state->json_hash, name, HASH_ENTER, &found); + + /* + * found being true indicates a duplicate. We don't do anything about + * that, a later field with the same name overrides the earlier field. + */ + + hashentry->isnull = isnull; + if (_state->save_json_start != NULL) + { + int len = _state->lex->prev_token_terminator - _state->save_json_start; + char *val = palloc((len + 1) * sizeof(char)); + + memcpy(val, _state->save_json_start, len); + val[len] = '\0'; + hashentry->val = val; + } + else + { + /* must have had a scalar instead */ + hashentry->val = _state->saved_scalar; + } +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0eb8eefb8f..aa8b715a65 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201303271 +#define CATALOG_VERSION_NO 201303291 #endif diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index d79d76ee24..d2e1fef985 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1724,6 +1724,19 @@ DATA(insert OID = 3899 ( "-" PGNSP PGUID b f f 3831 3831 3831 0 0 range_minu DESCR("range difference"); DATA(insert OID = 3900 ( "*" PGNSP PGUID b f f 3831 3831 3831 3900 0 range_intersect - - )); DESCR("range intersection"); +DATA(insert OID = 3962 ( "->" PGNSP PGUID b f f 114 25 114 0 0 json_object_field - - )); +DESCR("get json object field"); +DATA(insert OID = 3963 ( "->>" PGNSP PGUID b f f 114 25 25 0 0 json_object_field_text - - )); +DESCR("get json object field as text"); +DATA(insert OID = 3964 ( "->" PGNSP PGUID b f f 114 23 114 0 0 json_array_element - - )); +DESCR("get json array element"); +DATA(insert OID = 3965 ( "->>" PGNSP PGUID b f f 114 23 25 0 0 json_array_element_text - - )); +DESCR("get json array element as text"); +DATA(insert OID = 3966 ( "#>" PGNSP PGUID b f f 114 1009 114 0 0 json_extract_path_op - - )); +DESCR("get value from json with path elements"); +DATA(insert OID = 3967 ( "#>>" PGNSP PGUID b f f 114 1009 25 0 0 json_extract_path_text_op - - )); +DESCR("get value from json as text with path elements"); + /* diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 151987ec1d..4d5cd9fdb6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4118,6 +4118,37 @@ DESCR("aggregate input into json"); DATA(insert OID = 3176 ( to_json PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 114 "2283" _null_ _null_ _null_ _null_ to_json _null_ _null_ _null_ )); DESCR("map input to json"); +DATA(insert OID = 3947 ( json_object_field PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 114 "114 25" _null_ _null_ "{from_json, field_name}" _null_ json_object_field _null_ _null_ _null_ )); +DESCR("get json object field"); +DATA(insert OID = 3948 ( json_object_field_text PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 25 "114 25" _null_ _null_ "{from_json, field_name}" _null_ json_object_field_text _null_ _null_ _null_ )); +DESCR("get json object field as text"); +DATA(insert OID = 3949 ( json_array_element PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 114 "114 23" _null_ _null_ "{from_json, element_index}" _null_ json_array_element _null_ _null_ _null_ )); +DESCR("get json array element"); +DATA(insert OID = 3950 ( json_array_element_text PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 25 "114 23" _null_ _null_ "{from_json, element_index}" _null_ json_array_element_text _null_ _null_ _null_ )); +DESCR("get json array element as text"); +DATA(insert OID = 3951 ( json_extract_path PGNSP PGUID 12 1 0 25 0 f f f f t f s 2 0 114 "114 1009" "{114,1009}" "{i,v}" "{from_json,path_elems}" _null_ json_extract_path _null_ _null_ _null_ )); +DESCR("get value from json with path elements"); +DATA(insert OID = 3952 ( json_extract_path_op PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 114 "114 1009" _null_ _null_ "{from_json,path_elems}" _null_ json_extract_path _null_ _null_ _null_ )); +DESCR("get value from json with path elements"); +DATA(insert OID = 3953 ( json_extract_path_text PGNSP PGUID 12 1 0 25 0 f f f f t f s 2 0 25 "114 1009" "{114,1009}" "{i,v}" "{from_json,path_elems}" _null_ json_extract_path_text _null_ _null_ _null_ )); +DESCR("get value from json as text with path elements"); +DATA(insert OID = 3954 ( json_extract_path_text_op PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 25 "114 1009" _null_ _null_ "{from_json,path_elems}" _null_ json_extract_path_text _null_ _null_ _null_ )); +DESCR("get value from json as text with path elements"); +DATA(insert OID = 3955 ( json_array_elements PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 114 "114" "{114,114}" "{i,o}" "{from_json,value}" _null_ json_array_elements _null_ _null_ _null_ )); +DESCR("key value pairs of a json object"); +DATA(insert OID = 3956 ( json_array_length PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 23 "114" _null_ _null_ _null_ _null_ json_array_length _null_ _null_ _null_ )); +DESCR("length of json array"); +DATA(insert OID = 3957 ( json_object_keys PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 25 "114" _null_ _null_ _null_ _null_ json_object_keys _null_ _null_ _null_ )); +DESCR("get json object keys"); +DATA(insert OID = 3958 ( json_each PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 2249 "114" "{114,25,114}" "{i,o,o}" "{from_json,key,value}" _null_ json_each _null_ _null_ _null_ )); +DESCR("key value pairs of a json object"); +DATA(insert OID = 3959 ( json_each_text PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 2249 "114" "{114,25,25}" "{i,o,o}" "{from_json,key,value}" _null_ json_each_text _null_ _null_ _null_ )); +DESCR("key value pairs of a json object"); +DATA(insert OID = 3960 ( json_populate_record PGNSP PGUID 12 1 0 0 0 f f f f f f s 3 0 2283 "2283 114 16" _null_ _null_ _null_ _null_ json_populate_record _null_ _null_ _null_ )); +DESCR("get record fields from a json object"); +DATA(insert OID = 3961 ( json_populate_recordset PGNSP PGUID 12 1 100 0 0 f f f f f t s 3 0 2283 "2283 114 16" _null_ _null_ _null_ _null_ json_populate_recordset _null_ _null_ _null_ )); +DESCR("get set of records with fields from a json array of objects"); + /* uuid */ DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ )); DESCR("I/O"); diff --git a/src/include/utils/json.h b/src/include/utils/json.h index caaa769449..4845f5f618 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,7 @@ #include "fmgr.h" #include "lib/stringinfo.h" +/* functions in json.c */ extern Datum json_in(PG_FUNCTION_ARGS); extern Datum json_out(PG_FUNCTION_ARGS); extern Datum json_recv(PG_FUNCTION_ARGS); @@ -32,4 +33,19 @@ extern Datum json_agg_finalfn(PG_FUNCTION_ARGS); extern void escape_json(StringInfo buf, const char *str); +/* functions in jsonfuncs.c */ +extern Datum json_object_field(PG_FUNCTION_ARGS); +extern Datum json_object_field_text(PG_FUNCTION_ARGS); +extern Datum json_array_element(PG_FUNCTION_ARGS); +extern Datum json_array_element_text(PG_FUNCTION_ARGS); +extern Datum json_extract_path(PG_FUNCTION_ARGS); +extern Datum json_extract_path_text(PG_FUNCTION_ARGS); +extern Datum json_object_keys(PG_FUNCTION_ARGS); +extern Datum json_array_length(PG_FUNCTION_ARGS); +extern Datum json_each(PG_FUNCTION_ARGS); +extern Datum json_each_text(PG_FUNCTION_ARGS); +extern Datum json_array_elements(PG_FUNCTION_ARGS); +extern Datum json_populate_record(PG_FUNCTION_ARGS); +extern Datum json_populate_recordset(PG_FUNCTION_ARGS); + #endif /* JSON_H */ diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h new file mode 100644 index 0000000000..f96e9538cd --- /dev/null +++ b/src/include/utils/jsonapi.h @@ -0,0 +1,110 @@ +/*------------------------------------------------------------------------- + * + * jsonapi.h + * Declarations for JSON API support. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/jsonapi.h + * + *------------------------------------------------------------------------- + */ + +#ifndef JSONAPI_H +#define JSONAPI_H + +#include "lib/stringinfo.h" + +typedef enum +{ + JSON_TOKEN_INVALID, + JSON_TOKEN_STRING, + JSON_TOKEN_NUMBER, + JSON_TOKEN_OBJECT_START, + JSON_TOKEN_OBJECT_END, + JSON_TOKEN_ARRAY_START, + JSON_TOKEN_ARRAY_END, + JSON_TOKEN_COMMA, + JSON_TOKEN_COLON, + JSON_TOKEN_TRUE, + JSON_TOKEN_FALSE, + JSON_TOKEN_NULL, + JSON_TOKEN_END, +} JsonTokenType; + + +/* + * All the fields in this structure should be treated as read-only. + * + * If strval is not null, then it should contain the de-escaped value + * of the lexeme if it's a string. Otherwise most of these field names + * should be self-explanatory. + * + * line_number and line_start are principally for use by the parser's + * error reporting routines. + * token_terminator and prev_token_terminator point to the character + * AFTER the end of the token, i.e. where there would be a nul byte + * if we were using nul-terminated strings. + */ +typedef struct JsonLexContext +{ + char *input; + int input_length; + char *token_start; + char *token_terminator; + char *prev_token_terminator; + JsonTokenType token_type; + int lex_level; + int line_number; + char *line_start; + StringInfo strval; +} JsonLexContext; + +typedef void (*json_struct_action) (void *state); +typedef void (*json_ofield_action) (void *state, char *fname, bool isnull); +typedef void (*json_aelem_action) (void *state, bool isnull); +typedef void (*json_scalar_action) (void *state, char *token, JsonTokenType tokentype); + + +/* + * Semantic Action structure for use in parsing json. + * Any of these actions can be NULL, in which case nothing is done at that + * point, Likewise, semstate can be NULL. Using an all-NULL structure amounts + * to doing a pure parse with no side-effects, and is therefore exactly + * what the json input routines do. + */ +typedef struct jsonSemAction +{ + void *semstate; + json_struct_action object_start; + json_struct_action object_end; + json_struct_action array_start; + json_struct_action array_end; + json_ofield_action object_field_start; + json_ofield_action object_field_end; + json_aelem_action array_element_start; + json_aelem_action array_element_end; + json_scalar_action scalar; +} jsonSemAction, *JsonSemAction; + +/* + * parse_json will parse the string in the lex calling the + * action functions in sem at the appropriate points. It is + * up to them to keep what state they need in semstate. If they + * need access to the state of the lexer, then its pointer + * should be passed to them as a member of whatever semstate + * points to. If the action pointers are NULL the parser + * does nothing and just continues. + */ +extern void pg_parse_json(JsonLexContext *lex, JsonSemAction sem); + +/* + * constructor for JsonLexContext, with or without strval element. + * If supplied, the strval element will contain a de-escaped version of + * the lexeme. However, doing this imposes a performance penalty, so + * it should be avoided if the de-escaped lexeme is not required. + */ +extern JsonLexContext *makeJsonLexContext(text *json, bool need_escapes); + +#endif /* JSONAPI_H */ diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 7ae18f1cb8..01f0679c87 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -457,3 +457,466 @@ FROM (SELECT '{"a":1,"b": [2,3,4,"d","e","f"],"c":{"p":1,"q":2}}'::json AS "json {"jsonfield":{"a":1,"b": [2,3,4,"d","e","f"],"c":{"p":1,"q":2}}} (1 row) +-- json extraction functions +CREATE TEMP TABLE test_json ( + json_type text, + test_json json +); +INSERT INTO test_json VALUES +('scalar','"a scalar"'), +('array','["zero", "one","two",null,"four","five"]'), +('object','{"field1":"val1","field2":"val2","field3":null}'); +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot extract element from a scalar +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'array'; +ERROR: cannot extract field from a non-object +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + +(1 row) + +SELECT test_json->'field2' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + "val2" +(1 row) + +SELECT test_json->>'field2' +FROM test_json +WHERE json_type = 'object'; + ?column? +---------- + val2 +(1 row) + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot extract element from a scalar +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + "two" +(1 row) + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'object'; +ERROR: cannot extract array element from a non-array +SELECT test_json->>2 +FROM test_json +WHERE json_type = 'array'; + ?column? +---------- + two +(1 row) + +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'scalar'; +ERROR: cannot call json_object_keys on a scalar +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'array'; +ERROR: cannot call json_object_keys on an array +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'object'; + json_object_keys +------------------ + field1 + field2 + field3 +(3 rows) + +-- nulls +select (test_json->'field3') is null as expect_false +from test_json +where json_type = 'object'; + expect_false +-------------- + f +(1 row) + +select (test_json->>'field3') is null as expect_true +from test_json +where json_type = 'object'; + expect_true +------------- + t +(1 row) + +select (test_json->3) is null as expect_false +from test_json +where json_type = 'array'; + expect_false +-------------- + f +(1 row) + +select (test_json->>3) is null as expect_true +from test_json +where json_type = 'array'; + expect_true +------------- + t +(1 row) + +-- array length +SELECT json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + json_array_length +------------------- + 5 +(1 row) + +SELECT json_array_length('[]'); + json_array_length +------------------- + 0 +(1 row) + +SELECT json_array_length('{"f1":1,"f2":[5,6]}'); +ERROR: cannot get array length of a non-array +SELECT json_array_length('4'); +ERROR: cannot get array length of a scalar +-- each +select json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null}'); + json_each +------------------- + (f1,"[1,2,3]") + (f2,"{""f3"":1}") + (f4,null) +(3 rows) + +select * from json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + key | value +-----+----------- + f1 | [1,2,3] + f2 | {"f3":1} + f4 | null + f5 | 99 + f6 | "stringy" +(5 rows) + +select json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":"null"}'); + json_each_text +------------------- + (f1,"[1,2,3]") + (f2,"{""f3"":1}") + (f4,) + (f5,null) +(4 rows) + +select * from json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + key | value +-----+---------- + f1 | [1,2,3] + f2 | {"f3":1} + f4 | + f5 | 99 + f6 | stringy +(5 rows) + +-- extract_path, extract_path_as_text +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); + json_extract_path +------------------- + "stringy" +(1 row) + +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); + json_extract_path +------------------- + {"f3":1} +(1 row) + +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); + json_extract_path +------------------- + "f3" +(1 row) + +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); + json_extract_path +------------------- + 1 +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); + json_extract_path_text +------------------------ + stringy +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); + json_extract_path_text +------------------------ + {"f3":1} +(1 row) + +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); + json_extract_path_text +------------------------ + f3 +(1 row) + +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); + json_extract_path_text +------------------------ + 1 +(1 row) + +-- extract_path nulls +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_false; + expect_false +-------------- + f +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_true; + expect_true +------------- + t +(1 row) + +select json_extract_path('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_false; + expect_false +-------------- + f +(1 row) + +select json_extract_path_text('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_true; + expect_true +------------- + t +(1 row) + +-- extract_path operators +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f4','f6']; + ?column? +----------- + "stringy" +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2']; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','0']; + ?column? +---------- + "f3" +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','1']; + ?column? +---------- + 1 +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f4','f6']; + ?column? +---------- + stringy +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2']; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','0']; + ?column? +---------- + f3 +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','1']; + ?column? +---------- + 1 +(1 row) + +-- same using array literals +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f4,f6}'; + ?column? +----------- + "stringy" +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2}'; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,0}'; + ?column? +---------- + "f3" +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,1}'; + ?column? +---------- + 1 +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f4,f6}'; + ?column? +---------- + stringy +(1 row) + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2}'; + ?column? +---------- + {"f3":1} +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,0}'; + ?column? +---------- + f3 +(1 row) + +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,1}'; + ?column? +---------- + 1 +(1 row) + +-- array_elements +select json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]'); + json_array_elements +----------------------- + 1 + true + [1,[2,3]] + null + {"f1":1,"f2":[7,8,9]} + false +(6 rows) + +select * from json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]') q; + value +----------------------- + 1 + true + [1,[2,3]] + null + {"f1":1,"f2":[7,8,9]} + false +(6 rows) + +-- populate_record +create type jpop as (a text, b int, c timestamp); +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}') q; + a | b | c +--------+---+--- + blurfl | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}') q; + a | b | c +--------+---+-------------------------- + blurfl | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}', true) q; + a | b | c +--------+---+--- + blurfl | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}', true) q; + a | b | c +--------+---+-------------------------- + blurfl | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(null::jpop,'{"a":[100,200,false],"x":43.2}', true) q; + a | b | c +-----------------+---+--- + [100,200,false] | | +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":[100,200,false],"x":43.2}', true) q; + a | b | c +-----------------+---+-------------------------- + [100,200,false] | 3 | Mon Dec 31 15:30:56 2012 +(1 row) + +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"c":[100,200,false],"x":43.2}', true) q; +ERROR: invalid input syntax for type timestamp: "[100,200,false]" +-- populate_recordset +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + a | b | c +---------------+----+-------------------------- + [100,200,300] | 99 | + {"z":true} | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; +ERROR: invalid input syntax for type timestamp: "[100,200,300]" +-- using the default use_json_as_text argument +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; + a | b | c +--------+---+-------------------------- + blurfl | | + | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; + a | b | c +--------+----+-------------------------- + blurfl | 99 | + def | 3 | Fri Jan 20 10:42:53 2012 +(2 rows) + +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; +ERROR: cannot call json_populate_recordset on a nested object +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; +ERROR: cannot call json_populate_recordset on a nested object diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 5583d65307..04b22fe297 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -125,3 +125,174 @@ FROM (SELECT '-Infinity'::float8 AS "float8field") q; -- json input SELECT row_to_json(q) FROM (SELECT '{"a":1,"b": [2,3,4,"d","e","f"],"c":{"p":1,"q":2}}'::json AS "jsonfield") q; + + +-- json extraction functions + +CREATE TEMP TABLE test_json ( + json_type text, + test_json json +); + +INSERT INTO test_json VALUES +('scalar','"a scalar"'), +('array','["zero", "one","two",null,"four","five"]'), +('object','{"field1":"val1","field2":"val2","field3":null}'); + +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'scalar'; + +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'array'; + +SELECT test_json -> 'x' +FROM test_json +WHERE json_type = 'object'; + +SELECT test_json->'field2' +FROM test_json +WHERE json_type = 'object'; + +SELECT test_json->>'field2' +FROM test_json +WHERE json_type = 'object'; + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'scalar'; + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'array'; + +SELECT test_json -> 2 +FROM test_json +WHERE json_type = 'object'; + +SELECT test_json->>2 +FROM test_json +WHERE json_type = 'array'; + +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'scalar'; + +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'array'; + +SELECT json_object_keys(test_json) +FROM test_json +WHERE json_type = 'object'; + +-- nulls + +select (test_json->'field3') is null as expect_false +from test_json +where json_type = 'object'; + +select (test_json->>'field3') is null as expect_true +from test_json +where json_type = 'object'; + +select (test_json->3) is null as expect_false +from test_json +where json_type = 'array'; + +select (test_json->>3) is null as expect_true +from test_json +where json_type = 'array'; + + +-- array length + +SELECT json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + +SELECT json_array_length('[]'); + +SELECT json_array_length('{"f1":1,"f2":[5,6]}'); + +SELECT json_array_length('4'); + +-- each + +select json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null}'); +select * from json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + +select json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":"null"}'); +select * from json_each_text('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q; + +-- extract_path, extract_path_as_text + +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); +select json_extract_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6'); +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2'); +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text); +select json_extract_path_text('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text); + +-- extract_path nulls + +select json_extract_path('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_false; +select json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":null,"f6":"stringy"}}','f4','f5') is null as expect_true; +select json_extract_path('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_false; +select json_extract_path_text('{"f2":{"f3":1},"f4":[0,1,2,null]}','f4','3') is null as expect_true; + +-- extract_path operators + +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f4','f6']; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2']; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','0']; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>array['f2','1']; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f4','f6']; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2']; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','0']; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>array['f2','1']; + +-- same using array literals +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f4,f6}'; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2}'; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,0}'; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>'{f2,1}'; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f4,f6}'; +select '{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2}'; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,0}'; +select '{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}'::json#>>'{f2,1}'; + +-- array_elements + +select json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]'); +select * from json_array_elements('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]') q; + +-- populate_record +create type jpop as (a text, b int, c timestamp); + +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}') q; +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}') q; + +select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}', true) q; +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}', true) q; + +select * from json_populate_record(null::jpop,'{"a":[100,200,false],"x":43.2}', true) q; +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":[100,200,false],"x":43.2}', true) q; +select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"c":[100,200,false],"x":43.2}', true) q; + +-- populate_recordset + +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',false) q; +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]',true) q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]',true) q; + +-- using the default use_json_as_text argument + +select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; +select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;