mirror of
https://github.com/oatpp/oatpp.git
synced 2025-03-25 18:20:24 +08:00
444 lines
12 KiB
C++
444 lines
12 KiB
C++
/***************************************************************************
|
|
*
|
|
* Project _____ __ ____ _ _
|
|
* ( _ ) /__\ (_ _)_| |_ _| |_
|
|
* )(_)( /(__)\ )( (_ _)(_ _)
|
|
* (_____)(__)(__)(__) |_| |_|
|
|
*
|
|
*
|
|
* Copyright 2018-present, Leonid Stryzhevskyi, <lganzzzo@gmail.com>
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
***************************************************************************/
|
|
|
|
#include "Utils.hpp"
|
|
|
|
#include "oatpp/encoding/Unicode.hpp"
|
|
#include "oatpp/encoding/Hex.hpp"
|
|
|
|
namespace oatpp { namespace parser { namespace json{
|
|
|
|
const char* const Utils::ERROR_INVALID_ESCAPED_CHAR = "ERROR_INVALID_ESCAPED_CHAR";
|
|
const char* const Utils::ERROR_INVALID_SURROGATE_PAIR = "ERROR_INVALID_SURROGATE_PAIR";
|
|
const char* const Utils::ERROR_PARSER_QUOTE_EXPECTED = "'\"' - EXPECTED";
|
|
|
|
v_int32 Utils::calcEscapedStringSize(p_char8 data, v_int32 size, v_int32& safeSize) {
|
|
v_int32 result = 0;
|
|
v_int32 i = 0;
|
|
safeSize = size;
|
|
while (i < size) {
|
|
v_char8 a = data[i];
|
|
if(a < 32) {
|
|
i ++;
|
|
if(a == '\b' || a == '\f' || a == '\n' || a == '\r' || a == '\t'){
|
|
result += 2; // '\n'
|
|
} else {
|
|
result += 6; // '\uFFFF' - 6 chars
|
|
}
|
|
} else if(a < 128){
|
|
i ++;
|
|
if(a == '\"' || a == '\\' || a == '/'){
|
|
result += 2; // '\/'
|
|
} else {
|
|
result ++;
|
|
}
|
|
} else {
|
|
v_int32 charSize = oatpp::encoding::Unicode::getUtf8CharSequenceLength(a);
|
|
if(charSize != 0) {
|
|
if(i + charSize > size) {
|
|
safeSize = i;
|
|
}
|
|
i += charSize;
|
|
if(charSize < 4) {
|
|
result += 6; // '\uFFFF' - 6 chars
|
|
} else if(charSize == 4) {
|
|
result += 12; // '\uFFFF\uFFFF' - 12 chars surrogate pair
|
|
} else {
|
|
result += 11; // '\u+FFFFFFFF' - 11 chars NOT JSON standard case
|
|
}
|
|
} else {
|
|
// invalid char
|
|
i ++;
|
|
result ++;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
v_int32 Utils::calcUnescapedStringSize(p_char8 data, v_int32 size, const char* & error, v_int32& errorPosition) {
|
|
error = nullptr;
|
|
v_int32 result = 0;
|
|
v_int32 i = 0;
|
|
|
|
while (i < size) {
|
|
v_char8 a = data[i];
|
|
if(a == '\\'){
|
|
|
|
if(i + 1 == size){
|
|
error = ERROR_INVALID_ESCAPED_CHAR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
|
|
v_char8 b = data[i + 1];
|
|
|
|
if(b == '"' || b == '\\' || b == '/' || b == 'b' || b == 'f' || b == 'n' || b == 'r' || b == 't'){
|
|
result += 1;
|
|
i += 2;
|
|
} else if(b == 'u'){
|
|
|
|
if(i + 6 > size){
|
|
error = ERROR_INVALID_ESCAPED_CHAR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
|
|
if(data[i + 2] == '+') { // not JSON standard case
|
|
if(i + 11 > size){
|
|
error = ERROR_INVALID_ESCAPED_CHAR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
v_word32 code;
|
|
error = encoding::Hex::readWord32(&data[i + 3], code);
|
|
if(error != nullptr){
|
|
errorPosition = i + 3;
|
|
return 0;
|
|
}
|
|
i += 11;
|
|
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(code);
|
|
} else {
|
|
v_word16 code;
|
|
error = encoding::Hex::readWord16(&data[i + 2], code);
|
|
if(error != nullptr){
|
|
errorPosition = i + 2;
|
|
return 0;
|
|
}
|
|
|
|
if(code >= 0xD800 && code <= 0xDBFF){
|
|
if(i + 12 > size){
|
|
error = ERROR_INVALID_SURROGATE_PAIR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
v_word16 low;
|
|
error = encoding::Hex::readWord16(&data[i + 8], low);
|
|
if(error != nullptr){
|
|
errorPosition = i + 8;
|
|
return 0;
|
|
}
|
|
|
|
if(low >= 0xDC00 && low <= 0xDFFF){
|
|
v_word32 bigCode = encoding::Unicode::utf16SurrogatePairToCode(code, low);
|
|
i += 12;
|
|
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(bigCode);
|
|
} else {
|
|
error = ERROR_INVALID_SURROGATE_PAIR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
|
|
} else {
|
|
i += 6;
|
|
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(code);
|
|
}
|
|
}
|
|
|
|
} else {
|
|
error = ERROR_INVALID_ESCAPED_CHAR;
|
|
errorPosition = i;
|
|
return 0;
|
|
}
|
|
|
|
} else {
|
|
i ++;
|
|
result ++;
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
v_int32 Utils::escapeUtf8Char(p_char8 sequence, p_char8 buffer){
|
|
v_int32 length;
|
|
v_int32 code = oatpp::encoding::Unicode::encodeUtf8Char(sequence, length);
|
|
if(code < 0x00010000) {
|
|
buffer[0] = '\\';
|
|
buffer[1] = 'u';
|
|
oatpp::encoding::Hex::writeWord16(code, &buffer[2]);
|
|
return 6;
|
|
} else if(code < 0x00200000) {
|
|
v_int16 high;
|
|
v_int16 low;
|
|
oatpp::encoding::Unicode::codeToUtf16SurrogatePair(code, high, low);
|
|
buffer[0] = '\\';
|
|
buffer[1] = 'u';
|
|
oatpp::encoding::Hex::writeWord16(high, &buffer[2]);
|
|
buffer[6] = '\\';
|
|
buffer[7] = 'u';
|
|
oatpp::encoding::Hex::writeWord16(low, &buffer[8]);
|
|
return 12;
|
|
} else {
|
|
buffer[0] = '\\';
|
|
buffer[1] = 'u';
|
|
buffer[2] = '+';
|
|
oatpp::encoding::Hex::writeWord32(code, &buffer[2]);
|
|
return 11;
|
|
}
|
|
}
|
|
|
|
oatpp::String Utils::escapeString(p_char8 data, v_int32 size) {
|
|
v_int32 safeSize;
|
|
v_int32 escapedSize = calcEscapedStringSize(data, size, safeSize);
|
|
if(escapedSize == size) {
|
|
return String(data, size, true);
|
|
}
|
|
auto result = String(escapedSize);
|
|
v_int32 i = 0;
|
|
p_char8 resultData = result->getData();
|
|
v_int32 pos = 0;
|
|
|
|
while (i < safeSize) {
|
|
v_char8 a = data[i];
|
|
if(a < 32) {
|
|
if(a == '\b'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = 'b'; pos += 2;
|
|
} else if(a == '\f'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = 'f'; pos += 2;
|
|
} else if(a == '\n'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = 'n'; pos += 2;
|
|
} else if(a == '\r'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = 'r'; pos += 2;
|
|
} else if(a == '\t'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = 't'; pos += 2;
|
|
} else {
|
|
resultData[pos] = '\\';
|
|
resultData[pos + 1] = 'u';
|
|
oatpp::encoding::Hex::writeWord16(a, &resultData[pos + 2]);
|
|
pos += 6;
|
|
}
|
|
i ++;
|
|
} else if(a < 128){
|
|
if(a == '\"'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = '"'; pos += 2;
|
|
} else if(a == '\\'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = '\\'; pos += 2;
|
|
} else if(a == '/'){
|
|
resultData[pos] = '\\'; resultData[pos + 1] = '/'; pos += 2;
|
|
} else {
|
|
resultData[pos] = data[i];
|
|
pos ++;
|
|
}
|
|
i ++;
|
|
} else {
|
|
v_int32 charSize = oatpp::encoding::Unicode::getUtf8CharSequenceLength(a);
|
|
if(charSize != 0) {
|
|
pos += escapeUtf8Char(&data[i], &resultData[pos]);
|
|
i += charSize;
|
|
} else {
|
|
// invalid char
|
|
resultData[pos] = data[i];
|
|
i ++;
|
|
pos ++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(size > safeSize){
|
|
for(v_int32 i = pos; i < result->getSize(); i ++){
|
|
resultData[i] = '?';
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void Utils::unescapeStringToBuffer(p_char8 data, v_int32 size, p_char8 resultData){
|
|
|
|
v_int32 i = 0;
|
|
v_int32 pos = 0;
|
|
|
|
while (i < size) {
|
|
v_char8 a = data[i];
|
|
|
|
if(a == '\\'){
|
|
v_char8 b = data[i + 1];
|
|
if(b != 'u'){
|
|
switch (b) {
|
|
case '"': resultData[pos] = '"'; pos ++; break;
|
|
case '\\': resultData[pos] = '\\'; pos ++; break;
|
|
case '/': resultData[pos] = '/'; pos ++; break;
|
|
case 'b': resultData[pos] = '\b'; pos ++; break;
|
|
case 'f': resultData[pos] = '\f'; pos ++; break;
|
|
case 'n': resultData[pos] = '\n'; pos ++; break;
|
|
case 'r': resultData[pos] = '\r'; pos ++; break;
|
|
case 't': resultData[pos] = '\t'; pos ++; break;
|
|
}
|
|
i += 2;
|
|
} else {
|
|
if(data[i + 2] == '+'){ // Not JSON standard case
|
|
v_word32 code;
|
|
encoding::Hex::readWord32(&data[i + 3], code);
|
|
i += 11;
|
|
pos += encoding::Unicode::decodeUtf8Char(code, &resultData[pos]);
|
|
} else {
|
|
|
|
v_word16 code;
|
|
encoding::Hex::readWord16(&data[i + 2], code);
|
|
|
|
if(code >= 0xD800 && code <= 0xDBFF){
|
|
v_word16 low;
|
|
encoding::Hex::readWord16(&data[i + 8], low);
|
|
v_word32 bigCode = encoding::Unicode::utf16SurrogatePairToCode(code, low);
|
|
pos += encoding::Unicode::decodeUtf8Char(bigCode, &resultData[pos]);
|
|
i += 12;
|
|
} else {
|
|
pos += encoding::Unicode::decodeUtf8Char(code, &resultData[pos]);
|
|
i += 6;
|
|
}
|
|
|
|
}
|
|
}
|
|
} else {
|
|
resultData[pos] = a;
|
|
pos ++;
|
|
i++;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
oatpp::String Utils::unescapeString(p_char8 data, v_int32 size,
|
|
const char* & error, v_int32& errorPosition) {
|
|
|
|
v_int32 unescapedSize = calcUnescapedStringSize(data, size, error, errorPosition);
|
|
if(error != nullptr){
|
|
return nullptr;
|
|
}
|
|
auto result = String(unescapedSize);
|
|
if(unescapedSize == size) {
|
|
std::memcpy(result->getData(), data, size);
|
|
} else {
|
|
unescapeStringToBuffer(data, size, result->getData());
|
|
}
|
|
return result;
|
|
|
|
}
|
|
|
|
std::string Utils::unescapeStringToStdString(p_char8 data, v_int32 size,
|
|
const char* & error, v_int32& errorPosition){
|
|
v_int32 unescapedSize = calcUnescapedStringSize(data, size, error, errorPosition);
|
|
if(error != nullptr){
|
|
return "";
|
|
}
|
|
std::string result;
|
|
result.resize(unescapedSize);
|
|
if(unescapedSize == size) {
|
|
std::memcpy((p_char8) result.data(), data, size);
|
|
} else {
|
|
unescapeStringToBuffer(data, size, (p_char8) result.data());
|
|
}
|
|
return result;
|
|
}
|
|
|
|
p_char8 Utils::preparseString(ParsingCaret& caret, v_int32& size){
|
|
|
|
if(caret.canContinueAtChar('"', 1)){
|
|
|
|
const p_char8 data = caret.getData();
|
|
v_int32 pos = caret.getPosition();
|
|
v_int32 pos0 = pos;
|
|
v_int32 length = caret.getSize();
|
|
|
|
while (pos < length) {
|
|
v_char8 a = data[pos];
|
|
if(a == '"'){
|
|
size = pos - pos0;
|
|
return &data[pos0];
|
|
} else if(a == '\\') {
|
|
pos += 2;
|
|
} else {
|
|
pos ++;
|
|
}
|
|
}
|
|
caret.setPosition(caret.getSize());
|
|
caret.setError(ERROR_PARSER_QUOTE_EXPECTED);
|
|
} else {
|
|
caret.setError(ERROR_PARSER_QUOTE_EXPECTED);
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
oatpp::String Utils::parseString(ParsingCaret& caret) {
|
|
|
|
v_int32 size;
|
|
p_char8 data = preparseString(caret, size);
|
|
|
|
if(data != nullptr) {
|
|
|
|
v_int32 pos = caret.getPosition();
|
|
|
|
const char* error;
|
|
v_int32 errorPosition;
|
|
auto result = unescapeString(data, size, error, errorPosition);
|
|
if(error != nullptr){
|
|
caret.setError(error);
|
|
caret.setPosition(pos + errorPosition);
|
|
} else {
|
|
caret.setPosition(pos + size + 1);
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
std::string Utils::parseStringToStdString(ParsingCaret& caret){
|
|
|
|
v_int32 size;
|
|
p_char8 data = preparseString(caret, size);
|
|
|
|
if(data != nullptr) {
|
|
|
|
v_int32 pos = caret.getPosition();
|
|
|
|
const char* error;
|
|
v_int32 errorPosition;
|
|
const std::string& result = unescapeStringToStdString(data, size, error, errorPosition);
|
|
if(error != nullptr){
|
|
caret.setError(error);
|
|
caret.setPosition(pos + errorPosition);
|
|
} else {
|
|
caret.setPosition(pos + size + 1);
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
}}}
|