2
0
mirror of https://github.com/oatpp/oatpp.git synced 2025-03-25 18:20:24 +08:00
2018-06-26 23:17:48 +03:00

444 lines
12 KiB
C++

/***************************************************************************
*
* Project _____ __ ____ _ _
* ( _ ) /__\ (_ _)_| |_ _| |_
* )(_)( /(__)\ )( (_ _)(_ _)
* (_____)(__)(__)(__) |_| |_|
*
*
* Copyright 2018-present, Leonid Stryzhevskyi, <lganzzzo@gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
***************************************************************************/
#include "Utils.hpp"
#include "oatpp/encoding/Unicode.hpp"
#include "oatpp/encoding/Hex.hpp"
namespace oatpp { namespace parser { namespace json{
const char* const Utils::ERROR_INVALID_ESCAPED_CHAR = "ERROR_INVALID_ESCAPED_CHAR";
const char* const Utils::ERROR_INVALID_SURROGATE_PAIR = "ERROR_INVALID_SURROGATE_PAIR";
const char* const Utils::ERROR_PARSER_QUOTE_EXPECTED = "'\"' - EXPECTED";
v_int32 Utils::calcEscapedStringSize(p_char8 data, v_int32 size, v_int32& safeSize) {
v_int32 result = 0;
v_int32 i = 0;
safeSize = size;
while (i < size) {
v_char8 a = data[i];
if(a < 32) {
i ++;
if(a == '\b' || a == '\f' || a == '\n' || a == '\r' || a == '\t'){
result += 2; // '\n'
} else {
result += 6; // '\uFFFF' - 6 chars
}
} else if(a < 128){
i ++;
if(a == '\"' || a == '\\' || a == '/'){
result += 2; // '\/'
} else {
result ++;
}
} else {
v_int32 charSize = oatpp::encoding::Unicode::getUtf8CharSequenceLength(a);
if(charSize != 0) {
if(i + charSize > size) {
safeSize = i;
}
i += charSize;
if(charSize < 4) {
result += 6; // '\uFFFF' - 6 chars
} else if(charSize == 4) {
result += 12; // '\uFFFF\uFFFF' - 12 chars surrogate pair
} else {
result += 11; // '\u+FFFFFFFF' - 11 chars NOT JSON standard case
}
} else {
// invalid char
i ++;
result ++;
}
}
}
return result;
}
v_int32 Utils::calcUnescapedStringSize(p_char8 data, v_int32 size, const char* & error, v_int32& errorPosition) {
error = nullptr;
v_int32 result = 0;
v_int32 i = 0;
while (i < size) {
v_char8 a = data[i];
if(a == '\\'){
if(i + 1 == size){
error = ERROR_INVALID_ESCAPED_CHAR;
errorPosition = i;
return 0;
}
v_char8 b = data[i + 1];
if(b == '"' || b == '\\' || b == '/' || b == 'b' || b == 'f' || b == 'n' || b == 'r' || b == 't'){
result += 1;
i += 2;
} else if(b == 'u'){
if(i + 6 > size){
error = ERROR_INVALID_ESCAPED_CHAR;
errorPosition = i;
return 0;
}
if(data[i + 2] == '+') { // not JSON standard case
if(i + 11 > size){
error = ERROR_INVALID_ESCAPED_CHAR;
errorPosition = i;
return 0;
}
v_word32 code;
error = encoding::Hex::readWord32(&data[i + 3], code);
if(error != nullptr){
errorPosition = i + 3;
return 0;
}
i += 11;
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(code);
} else {
v_word16 code;
error = encoding::Hex::readWord16(&data[i + 2], code);
if(error != nullptr){
errorPosition = i + 2;
return 0;
}
if(code >= 0xD800 && code <= 0xDBFF){
if(i + 12 > size){
error = ERROR_INVALID_SURROGATE_PAIR;
errorPosition = i;
return 0;
}
v_word16 low;
error = encoding::Hex::readWord16(&data[i + 8], low);
if(error != nullptr){
errorPosition = i + 8;
return 0;
}
if(low >= 0xDC00 && low <= 0xDFFF){
v_word32 bigCode = encoding::Unicode::utf16SurrogatePairToCode(code, low);
i += 12;
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(bigCode);
} else {
error = ERROR_INVALID_SURROGATE_PAIR;
errorPosition = i;
return 0;
}
} else {
i += 6;
result += encoding::Unicode::getUtf8CharSequenceLengthForCode(code);
}
}
} else {
error = ERROR_INVALID_ESCAPED_CHAR;
errorPosition = i;
return 0;
}
} else {
i ++;
result ++;
}
}
return result;
}
v_int32 Utils::escapeUtf8Char(p_char8 sequence, p_char8 buffer){
v_int32 length;
v_int32 code = oatpp::encoding::Unicode::encodeUtf8Char(sequence, length);
if(code < 0x00010000) {
buffer[0] = '\\';
buffer[1] = 'u';
oatpp::encoding::Hex::writeWord16(code, &buffer[2]);
return 6;
} else if(code < 0x00200000) {
v_int16 high;
v_int16 low;
oatpp::encoding::Unicode::codeToUtf16SurrogatePair(code, high, low);
buffer[0] = '\\';
buffer[1] = 'u';
oatpp::encoding::Hex::writeWord16(high, &buffer[2]);
buffer[6] = '\\';
buffer[7] = 'u';
oatpp::encoding::Hex::writeWord16(low, &buffer[8]);
return 12;
} else {
buffer[0] = '\\';
buffer[1] = 'u';
buffer[2] = '+';
oatpp::encoding::Hex::writeWord32(code, &buffer[2]);
return 11;
}
}
oatpp::String Utils::escapeString(p_char8 data, v_int32 size) {
v_int32 safeSize;
v_int32 escapedSize = calcEscapedStringSize(data, size, safeSize);
if(escapedSize == size) {
return String(data, size, true);
}
auto result = String(escapedSize);
v_int32 i = 0;
p_char8 resultData = result->getData();
v_int32 pos = 0;
while (i < safeSize) {
v_char8 a = data[i];
if(a < 32) {
if(a == '\b'){
resultData[pos] = '\\'; resultData[pos + 1] = 'b'; pos += 2;
} else if(a == '\f'){
resultData[pos] = '\\'; resultData[pos + 1] = 'f'; pos += 2;
} else if(a == '\n'){
resultData[pos] = '\\'; resultData[pos + 1] = 'n'; pos += 2;
} else if(a == '\r'){
resultData[pos] = '\\'; resultData[pos + 1] = 'r'; pos += 2;
} else if(a == '\t'){
resultData[pos] = '\\'; resultData[pos + 1] = 't'; pos += 2;
} else {
resultData[pos] = '\\';
resultData[pos + 1] = 'u';
oatpp::encoding::Hex::writeWord16(a, &resultData[pos + 2]);
pos += 6;
}
i ++;
} else if(a < 128){
if(a == '\"'){
resultData[pos] = '\\'; resultData[pos + 1] = '"'; pos += 2;
} else if(a == '\\'){
resultData[pos] = '\\'; resultData[pos + 1] = '\\'; pos += 2;
} else if(a == '/'){
resultData[pos] = '\\'; resultData[pos + 1] = '/'; pos += 2;
} else {
resultData[pos] = data[i];
pos ++;
}
i ++;
} else {
v_int32 charSize = oatpp::encoding::Unicode::getUtf8CharSequenceLength(a);
if(charSize != 0) {
pos += escapeUtf8Char(&data[i], &resultData[pos]);
i += charSize;
} else {
// invalid char
resultData[pos] = data[i];
i ++;
pos ++;
}
}
}
if(size > safeSize){
for(v_int32 i = pos; i < result->getSize(); i ++){
resultData[i] = '?';
}
}
return result;
}
void Utils::unescapeStringToBuffer(p_char8 data, v_int32 size, p_char8 resultData){
v_int32 i = 0;
v_int32 pos = 0;
while (i < size) {
v_char8 a = data[i];
if(a == '\\'){
v_char8 b = data[i + 1];
if(b != 'u'){
switch (b) {
case '"': resultData[pos] = '"'; pos ++; break;
case '\\': resultData[pos] = '\\'; pos ++; break;
case '/': resultData[pos] = '/'; pos ++; break;
case 'b': resultData[pos] = '\b'; pos ++; break;
case 'f': resultData[pos] = '\f'; pos ++; break;
case 'n': resultData[pos] = '\n'; pos ++; break;
case 'r': resultData[pos] = '\r'; pos ++; break;
case 't': resultData[pos] = '\t'; pos ++; break;
}
i += 2;
} else {
if(data[i + 2] == '+'){ // Not JSON standard case
v_word32 code;
encoding::Hex::readWord32(&data[i + 3], code);
i += 11;
pos += encoding::Unicode::decodeUtf8Char(code, &resultData[pos]);
} else {
v_word16 code;
encoding::Hex::readWord16(&data[i + 2], code);
if(code >= 0xD800 && code <= 0xDBFF){
v_word16 low;
encoding::Hex::readWord16(&data[i + 8], low);
v_word32 bigCode = encoding::Unicode::utf16SurrogatePairToCode(code, low);
pos += encoding::Unicode::decodeUtf8Char(bigCode, &resultData[pos]);
i += 12;
} else {
pos += encoding::Unicode::decodeUtf8Char(code, &resultData[pos]);
i += 6;
}
}
}
} else {
resultData[pos] = a;
pos ++;
i++;
}
}
}
oatpp::String Utils::unescapeString(p_char8 data, v_int32 size,
const char* & error, v_int32& errorPosition) {
v_int32 unescapedSize = calcUnescapedStringSize(data, size, error, errorPosition);
if(error != nullptr){
return nullptr;
}
auto result = String(unescapedSize);
if(unescapedSize == size) {
std::memcpy(result->getData(), data, size);
} else {
unescapeStringToBuffer(data, size, result->getData());
}
return result;
}
std::string Utils::unescapeStringToStdString(p_char8 data, v_int32 size,
const char* & error, v_int32& errorPosition){
v_int32 unescapedSize = calcUnescapedStringSize(data, size, error, errorPosition);
if(error != nullptr){
return "";
}
std::string result;
result.resize(unescapedSize);
if(unescapedSize == size) {
std::memcpy((p_char8) result.data(), data, size);
} else {
unescapeStringToBuffer(data, size, (p_char8) result.data());
}
return result;
}
p_char8 Utils::preparseString(ParsingCaret& caret, v_int32& size){
if(caret.canContinueAtChar('"', 1)){
const p_char8 data = caret.getData();
v_int32 pos = caret.getPosition();
v_int32 pos0 = pos;
v_int32 length = caret.getSize();
while (pos < length) {
v_char8 a = data[pos];
if(a == '"'){
size = pos - pos0;
return &data[pos0];
} else if(a == '\\') {
pos += 2;
} else {
pos ++;
}
}
caret.setPosition(caret.getSize());
caret.setError(ERROR_PARSER_QUOTE_EXPECTED);
} else {
caret.setError(ERROR_PARSER_QUOTE_EXPECTED);
}
return nullptr;
}
oatpp::String Utils::parseString(ParsingCaret& caret) {
v_int32 size;
p_char8 data = preparseString(caret, size);
if(data != nullptr) {
v_int32 pos = caret.getPosition();
const char* error;
v_int32 errorPosition;
auto result = unescapeString(data, size, error, errorPosition);
if(error != nullptr){
caret.setError(error);
caret.setPosition(pos + errorPosition);
} else {
caret.setPosition(pos + size + 1);
}
return result;
}
return nullptr;
}
std::string Utils::parseStringToStdString(ParsingCaret& caret){
v_int32 size;
p_char8 data = preparseString(caret, size);
if(data != nullptr) {
v_int32 pos = caret.getPosition();
const char* error;
v_int32 errorPosition;
const std::string& result = unescapeStringToStdString(data, size, error, errorPosition);
if(error != nullptr){
caret.setError(error);
caret.setPosition(pos + errorPosition);
} else {
caret.setPosition(pos + size + 1);
}
return result;
}
return "";
}
}}}