2020-08-11 17:10:23 +08:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 2010-2011, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: ucharstrieiterator.h
|
|
|
|
* encoding: UTF-8
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2010nov15
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/ucharstrie.h"
|
|
|
|
#include "unicode/unistr.h"
|
|
|
|
#include "uvectr32.h"
|
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
|
|
|
|
UErrorCode &errorCode)
|
|
|
|
: uchars_(trieUChars),
|
|
|
|
pos_(uchars_), initialPos_(uchars_),
|
|
|
|
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
2022-10-28 14:11:55 +08:00
|
|
|
skipValue_(false),
|
2023-05-23 08:05:01 +08:00
|
|
|
maxLength_(maxStringLength), value_(0), stack_(nullptr) {
|
2020-08-11 17:10:23 +08:00
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// stack_ is a pointer so that it's easy to turn ucharstrie.h into
|
|
|
|
// a public API header for which we would want it to depend only on
|
|
|
|
// other public headers.
|
|
|
|
// Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
|
|
|
|
// via the UnicodeString and UVector32 implementations, so this additional
|
|
|
|
// cost is minimal.
|
|
|
|
stack_=new UVector32(errorCode);
|
2023-05-23 08:05:01 +08:00
|
|
|
if(stack_==nullptr) {
|
2020-08-11 17:10:23 +08:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
|
|
|
|
UErrorCode &errorCode)
|
|
|
|
: uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
|
|
|
|
remainingMatchLength_(trie.remainingMatchLength_),
|
|
|
|
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
2022-10-28 14:11:55 +08:00
|
|
|
skipValue_(false),
|
2023-05-23 08:05:01 +08:00
|
|
|
maxLength_(maxStringLength), value_(0), stack_(nullptr) {
|
2020-08-11 17:10:23 +08:00
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
stack_=new UVector32(errorCode);
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
2023-05-23 08:05:01 +08:00
|
|
|
if(stack_==nullptr) {
|
2020-08-11 17:10:23 +08:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
|
|
|
if(length>=0) {
|
|
|
|
// Pending linear-match node, append remaining UChars to str_.
|
|
|
|
++length;
|
|
|
|
if(maxLength_>0 && length>maxLength_) {
|
|
|
|
length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
|
|
|
|
}
|
|
|
|
str_.append(pos_, length);
|
|
|
|
pos_+=length;
|
|
|
|
remainingMatchLength_-=length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UCharsTrie::Iterator::~Iterator() {
|
|
|
|
delete stack_;
|
|
|
|
}
|
|
|
|
|
|
|
|
UCharsTrie::Iterator &
|
|
|
|
UCharsTrie::Iterator::reset() {
|
|
|
|
pos_=initialPos_;
|
|
|
|
remainingMatchLength_=initialRemainingMatchLength_;
|
2022-10-28 14:11:55 +08:00
|
|
|
skipValue_=false;
|
2020-08-11 17:10:23 +08:00
|
|
|
int32_t length=remainingMatchLength_+1; // Remaining match length.
|
|
|
|
if(maxLength_>0 && length>maxLength_) {
|
|
|
|
length=maxLength_;
|
|
|
|
}
|
|
|
|
str_.truncate(length);
|
|
|
|
pos_+=length;
|
|
|
|
remainingMatchLength_-=length;
|
|
|
|
stack_->setSize(0);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool
|
2023-05-23 08:05:01 +08:00
|
|
|
UCharsTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
|
2020-08-11 17:10:23 +08:00
|
|
|
|
|
|
|
UBool
|
|
|
|
UCharsTrie::Iterator::next(UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
2022-10-28 14:11:55 +08:00
|
|
|
return false;
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
2023-05-23 08:05:01 +08:00
|
|
|
const char16_t *pos=pos_;
|
|
|
|
if(pos==nullptr) {
|
2020-08-11 17:10:23 +08:00
|
|
|
if(stack_->isEmpty()) {
|
2022-10-28 14:11:55 +08:00
|
|
|
return false;
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
|
|
|
// Pop the state off the stack and continue with the next outbound edge of
|
|
|
|
// the branch node.
|
|
|
|
int32_t stackSize=stack_->size();
|
|
|
|
int32_t length=stack_->elementAti(stackSize-1);
|
|
|
|
pos=uchars_+stack_->elementAti(stackSize-2);
|
|
|
|
stack_->setSize(stackSize-2);
|
|
|
|
str_.truncate(length&0xffff);
|
|
|
|
length=(int32_t)((uint32_t)length>>16);
|
|
|
|
if(length>1) {
|
|
|
|
pos=branchNext(pos, length, errorCode);
|
2023-05-23 08:05:01 +08:00
|
|
|
if(pos==nullptr) {
|
2022-10-28 14:11:55 +08:00
|
|
|
return true; // Reached a final value.
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
str_.append(*pos++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(remainingMatchLength_>=0) {
|
|
|
|
// We only get here if we started in a pending linear-match node
|
|
|
|
// with more than maxLength remaining units.
|
|
|
|
return truncateAndStop();
|
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
int32_t node=*pos++;
|
|
|
|
if(node>=kMinValueLead) {
|
|
|
|
if(skipValue_) {
|
|
|
|
pos=skipNodeValue(pos, node);
|
|
|
|
node&=kNodeTypeMask;
|
2022-10-28 14:11:55 +08:00
|
|
|
skipValue_=false;
|
2020-08-11 17:10:23 +08:00
|
|
|
} else {
|
|
|
|
// Deliver value for the string so far.
|
|
|
|
UBool isFinal=(UBool)(node>>15);
|
|
|
|
if(isFinal) {
|
|
|
|
value_=readValue(pos, node&0x7fff);
|
|
|
|
} else {
|
|
|
|
value_=readNodeValue(pos, node);
|
|
|
|
}
|
|
|
|
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
|
2023-05-23 08:05:01 +08:00
|
|
|
pos_=nullptr;
|
2020-08-11 17:10:23 +08:00
|
|
|
} else {
|
|
|
|
// We cannot skip the value right here because it shares its
|
|
|
|
// lead unit with a match node which we have to evaluate
|
|
|
|
// next time.
|
|
|
|
// Instead, keep pos_ on the node lead unit itself.
|
|
|
|
pos_=pos-1;
|
2022-10-28 14:11:55 +08:00
|
|
|
skipValue_=true;
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
2022-10-28 14:11:55 +08:00
|
|
|
return true;
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if(maxLength_>0 && str_.length()==maxLength_) {
|
|
|
|
return truncateAndStop();
|
|
|
|
}
|
|
|
|
if(node<kMinLinearMatch) {
|
|
|
|
if(node==0) {
|
|
|
|
node=*pos++;
|
|
|
|
}
|
|
|
|
pos=branchNext(pos, node+1, errorCode);
|
2023-05-23 08:05:01 +08:00
|
|
|
if(pos==nullptr) {
|
2022-10-28 14:11:55 +08:00
|
|
|
return true; // Reached a final value.
|
2020-08-11 17:10:23 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Linear-match node, append length units to str_.
|
|
|
|
int32_t length=node-kMinLinearMatch+1;
|
|
|
|
if(maxLength_>0 && str_.length()+length>maxLength_) {
|
|
|
|
str_.append(pos, maxLength_-str_.length());
|
|
|
|
return truncateAndStop();
|
|
|
|
}
|
|
|
|
str_.append(pos, length);
|
|
|
|
pos+=length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Branch node, needs to take the first outbound edge and push state for the rest.
|
2023-05-23 08:05:01 +08:00
|
|
|
const char16_t *
|
|
|
|
UCharsTrie::Iterator::branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode) {
|
2020-08-11 17:10:23 +08:00
|
|
|
while(length>kMaxBranchLinearSubNodeLength) {
|
|
|
|
++pos; // ignore the comparison unit
|
|
|
|
// Push state for the greater-or-equal edge.
|
|
|
|
stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
|
|
|
|
stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
|
|
|
// Follow the less-than edge.
|
|
|
|
length>>=1;
|
|
|
|
pos=jumpByDelta(pos);
|
|
|
|
}
|
|
|
|
// List of key-value pairs where values are either final values or jump deltas.
|
|
|
|
// Read the first (key, value) pair.
|
2023-05-23 08:05:01 +08:00
|
|
|
char16_t trieUnit=*pos++;
|
2020-08-11 17:10:23 +08:00
|
|
|
int32_t node=*pos++;
|
|
|
|
UBool isFinal=(UBool)(node>>15);
|
|
|
|
int32_t value=readValue(pos, node&=0x7fff);
|
|
|
|
pos=skipValue(pos, node);
|
|
|
|
stack_->addElement((int32_t)(pos-uchars_), errorCode);
|
|
|
|
stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
|
|
|
|
str_.append(trieUnit);
|
|
|
|
if(isFinal) {
|
2023-05-23 08:05:01 +08:00
|
|
|
pos_=nullptr;
|
2020-08-11 17:10:23 +08:00
|
|
|
value_=value;
|
2023-05-23 08:05:01 +08:00
|
|
|
return nullptr;
|
2020-08-11 17:10:23 +08:00
|
|
|
} else {
|
|
|
|
return pos+value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|