mirror of
https://github.com/oatpp/oatpp.git
synced 2025-04-06 18:40:24 +08:00
162 lines
4.9 KiB
C++
162 lines
4.9 KiB
C++
/***************************************************************************
|
|
*
|
|
* Project _____ __ ____ _ _
|
|
* ( _ ) /__\ (_ _)_| |_ _| |_
|
|
* )(_)( /(__)\ )( (_ _)(_ _)
|
|
* (_____)(__)(__)(__) |_| |_|
|
|
*
|
|
*
|
|
* Copyright 2018-present, Leonid Stryzhevskyi, <lganzzzo@gmail.com>
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
***************************************************************************/
|
|
|
|
#include "Unicode.hpp"
|
|
|
|
#include "./Hex.hpp"
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
namespace oatpp { namespace encoding {
|
|
|
|
v_int32 Unicode::getUtf8CharSequenceLength(v_char8 firstByte) {
|
|
|
|
if(firstByte < 128){
|
|
return 1;
|
|
}
|
|
|
|
if((firstByte | 192) != firstByte){
|
|
return 0;
|
|
}
|
|
|
|
if((firstByte | 32) != firstByte){
|
|
return 2;
|
|
} else if((firstByte | 16) != firstByte){
|
|
return 3;
|
|
} else if((firstByte | 8) != firstByte){
|
|
return 4;
|
|
} else if((firstByte | 4) != firstByte){
|
|
return 5;
|
|
} else if((firstByte | 2) != firstByte){
|
|
return 6;
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
v_int32 Unicode::getUtf8CharSequenceLengthForCode(v_word32 code){
|
|
if(code < 128) {
|
|
return 1;
|
|
} else if(code < 0x00000800){
|
|
return 2;
|
|
} else if(code < 0x00010000){
|
|
return 3;
|
|
} else if(code < 0x00200000){
|
|
return 4;
|
|
} else if(code < 0x04000000){
|
|
return 5;
|
|
} else {
|
|
return 6;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
v_int32 Unicode::encodeUtf8Char(p_char8 sequence, v_int32& length){
|
|
v_char8 byte = sequence[0];
|
|
if(byte > 127){
|
|
v_int32 code;
|
|
if((byte | 32) != byte){
|
|
length = 2;
|
|
code = ((31 & byte) << 6) | (sequence[1] & 63);
|
|
return code;
|
|
} else if((byte | 16) != byte){
|
|
code = (15 & byte) << 12;
|
|
length = 3;
|
|
} else if((byte | 8) != byte){
|
|
length = 4;
|
|
v_int32 value = *((p_int32)sequence);
|
|
code = ((7 & byte) << 18) |
|
|
(((value >> 24) & 0xFF) & 63) |
|
|
(((value >> 16) & 0xFF) & 63) << 6 |
|
|
(((value >> 8) & 0xFF) & 63) << 12;
|
|
return code;
|
|
} else if((byte | 4) != byte){
|
|
code = (3 & byte) << 24;
|
|
length = 5;
|
|
} else if((byte | 2) != byte){
|
|
code = (1 & byte) << 30;
|
|
length = 6;
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
v_char8 bitIndex = 0;
|
|
for(v_int32 i = length; i > 1; i--){
|
|
code |= (sequence[i - 1] & 63) << bitIndex;
|
|
bitIndex += 6;
|
|
}
|
|
return code;
|
|
} else {
|
|
length = 1;
|
|
return byte;
|
|
}
|
|
}
|
|
|
|
v_int32 Unicode::decodeUtf8Char(v_int32 code, p_char8 buffer) {
|
|
if(code >= 0x00000080 && code < 0x00000800){
|
|
*((p_int16) buffer) = htons(((((code >> 6) & 31) | 192) << 8) | ((code & 63) | 128));
|
|
return 2;
|
|
} else if(code >= 0x00000800 && code < 0x00010000){
|
|
*((p_int16) buffer) = htons((((( code >> 12 ) & 15) | 224) << 8) |
|
|
(((code >> 6 ) & 63) | 128));
|
|
buffer[2] = (code & 63) | 128;
|
|
return 3;
|
|
} else if(code >= 0x00010000 && code < 0x00200000){
|
|
*((p_int32) buffer) = htonl(((((code >> 18 ) & 7) | 240) << 24) |
|
|
((((code >> 12 ) & 63) | 128) << 16) |
|
|
((((code >> 6 ) & 63) | 128) << 8) |
|
|
(( code & 63) | 128) );
|
|
return 4;
|
|
} else if(code >= 0x00200000 && code < 0x04000000){
|
|
*((p_int32) buffer) = htonl(((((code >> 24 ) & 3) | 248) << 24) |
|
|
((((code >> 18 ) & 63) | 128) << 16) |
|
|
((((code >> 12 ) & 63) | 128) << 8) |
|
|
(((code >> 6 ) & 63) | 128));
|
|
buffer[4] = (code & 63) | 128;
|
|
return 5;
|
|
} else if(code >= 0x04000000){
|
|
*((p_int32) buffer) = htonl(((((code >> 30 ) & 1) | 252) << 24) |
|
|
((((code >> 24 ) & 63) | 128) << 16) |
|
|
((((code >> 18 ) & 63) | 128) << 8) |
|
|
(((code >> 12 ) & 63) | 128));
|
|
*((p_int16) &buffer[4]) = htons(((((code >> 6 ) & 63) | 128) << 8) | (code & 63));
|
|
return 6;
|
|
}
|
|
buffer[0] = code;
|
|
return 1;
|
|
}
|
|
|
|
void Unicode::codeToUtf16SurrogatePair(v_int32 code, v_int16& high, v_int16& low){
|
|
code -= 0x010000;
|
|
high = 0xD800 + ((code >> 10) & 1023);
|
|
low = 0xDC00 + (code & 1023);
|
|
}
|
|
|
|
v_int32 Unicode::utf16SurrogatePairToCode(v_int16 high, v_int16 low){
|
|
return (((low - 0xDC00) & 1023) | (((high - 0xD800) & 1023) << 10)) + 0x010000;
|
|
}
|
|
|
|
}}
|