better multipart stateful parser

This commit is contained in:
lganzzzo 2019-07-12 19:10:06 +03:00
parent d657d4f736
commit 427612adfd
3 changed files with 120 additions and 22 deletions

View File

@ -31,6 +31,20 @@
namespace oatpp { namespace web { namespace mime { namespace multipart {
StatefulParser::StatefulParser(const oatpp::String& boundary, const std::shared_ptr<Listener>& listener)
: m_state(STATE_BOUNDARY)
, m_currPartIndex(0)
, m_currBoundaryCharIndex(0)
, m_checkForBoundary(true)
, m_finishingBoundary(false)
, m_readingBody(false)
, m_headerSectionEndAccumulator(0)
, m_firstBoundarySample("--" + boundary)
, m_nextBoundarySample("\r\n--" + boundary)
, m_maxPartHeadersSize(4092)
, m_listener(listener)
{}
void StatefulParser::onPartHeaders(const Headers& partHeaders) {
m_currPartIndex ++;
@ -59,12 +73,8 @@ void StatefulParser::onPartHeaders(const Headers& partHeaders) {
m_currPartName = nameLabel.toString();
OATPP_LOGD("Part", "name='%s'", m_currPartName->getData());
for(auto& pair : partHeaders) {
auto key = pair.first.toString();
auto value = pair.second.toString();
OATPP_LOGD("header", "key='%s', value='%s'", key->getData(), value->getData());
if(m_listener) {
m_listener->onPartHeaders(m_currPartName, partHeaders);
}
} else {
@ -83,8 +93,9 @@ void StatefulParser::onPartHeaders(const Headers& partHeaders) {
void StatefulParser::onPartData(p_char8 data, v_int32 size) {
oatpp::String text((const char*)data, size, true);
OATPP_LOGD("data", "part='%s', data='%s'", m_currPartName->getData(), text->getData());
if(m_listener) {
m_listener->onPartData(m_currPartName, data, size);
}
}
@ -116,6 +127,9 @@ v_int32 StatefulParser::parseNext_Boundary(p_char8 data, v_int32 size) {
m_state = STATE_AFTER_BOUNDARY;
m_currBoundaryCharIndex = 0;
m_readingBody = false;
if(m_currPartIndex > 0) {
onPartData(nullptr, 0);
}
}
return caret.getPosition();
@ -181,6 +195,10 @@ v_int32 StatefulParser::parseNext_Headers(p_char8 data, v_int32 size) {
if(m_headerSectionEndAccumulator == HEADERS_SECTION_END) {
if(m_headersBuffer.getSize() + i > m_maxPartHeadersSize) {
throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext_Headers()]: Error. Too large heades.");
}
m_headersBuffer.write(data, i);
auto headersText = m_headersBuffer.toString();
@ -203,6 +221,10 @@ v_int32 StatefulParser::parseNext_Headers(p_char8 data, v_int32 size) {
}
if(m_headersBuffer.getSize() + size > m_maxPartHeadersSize) {
throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext_Headers()]: Error. Too large heades.");
}
m_headersBuffer.write(data, size);
return size;
@ -265,4 +287,8 @@ v_int32 StatefulParser::parseNext(p_char8 data, v_int32 size) {
}
bool StatefulParser::finished() {
return m_state == STATE_DONE;
}
}}}}

View File

@ -33,6 +33,10 @@
namespace oatpp { namespace web { namespace mime { namespace multipart {
/**
* Stateful parser of multipart-data stream.
* Parser designed to work with stream-like data in order to store minimum data in the memory.
*/
class StatefulParser {
private:
static constexpr v_int32 STATE_BOUNDARY = 0;
@ -42,7 +46,7 @@ private:
static constexpr v_int32 STATE_DONE = 4;
private:
static constexpr v_int32 HEADERS_SECTION_END = ('\r' << 24) | ('\n' << 16) | ('\r' << 8) | ('\n');
public:
private:
/**
* Typedef for headers map. Headers map key is case-insensitive.
* `std::unordered_map` of &id:oatpp::data::share::StringKeyLabelCI_FAST; and &id:oatpp::data::share::StringKeyLabel;.
@ -50,10 +54,36 @@ public:
typedef std::unordered_map<oatpp::data::share::StringKeyLabelCI_FAST, oatpp::data::share::StringKeyLabel> Headers;
public:
/**
* Listener for parsed items.
*/
class Listener {
public:
/**
* Convenience typedef for headers map. Headers map key is case-insensitive.
* `std::unordered_map` of &id:oatpp::data::share::StringKeyLabelCI_FAST; and &id:oatpp::data::share::StringKeyLabel;.
*/
typedef std::unordered_map<oatpp::data::share::StringKeyLabelCI_FAST, oatpp::data::share::StringKeyLabel> Headers;
public:
/**
* Called on new part found in the stream.
* Always called before `onPartData` events.
* @param name - name of the part.
* @param partHeaders - complete set of part headers.
*/
virtual void onPartHeaders(const oatpp::String& name, const Headers& partHeaders) = 0;
/**
* Called on each new chunk of bytes parsed from the part body.
* When all data of message is read, readMessage is called again with size == 0 to
* indicate end of the message.
* @param name - name of the part.
* @param data - pointer to data.
* @param size - size of the data in bytes.
*/
virtual void onPartData(const oatpp::String& name, p_char8 data, oatpp::data::v_io_size size) = 0;
};
private:
@ -76,6 +106,14 @@ private:
*/
oatpp::data::stream::ChunkedBuffer m_headersBuffer;
/*
* Max length of all headers per one part.
* Default value = 4096 bytes.
*/
v_int32 m_maxPartHeadersSize;
std::shared_ptr<Listener> m_listener;
private:
void onPartHeaders(const Headers& partHeaders);
@ -90,20 +128,27 @@ private:
public:
StatefulParser(const oatpp::String& boundary)
: m_state(STATE_BOUNDARY)
, m_currPartIndex(0)
, m_currBoundaryCharIndex(0)
, m_checkForBoundary(true)
, m_finishingBoundary(false)
, m_readingBody(false)
, m_headerSectionEndAccumulator(0)
, m_firstBoundarySample("--" + boundary)
, m_nextBoundarySample("\r\n--" + boundary)
{}
/**
* Constructor.
* @param boundary - value of multipart boundary.
* @param listener - &l:StatefulParser::Listener;.
*/
StatefulParser(const oatpp::String& boundary, const std::shared_ptr<Listener>& listener);
/**
* Parse next chunk of bytes.
* @param data - pointer to data.
* @param size - data size.
* @return - exact number of parsed bytes. <br>
* returned value may be less than size given.
*/
v_int32 parseNext(p_char8 data, v_int32 size);
/**
* Check if parser done parsing data.
* @return - `true` or `false`.
*/
bool finished();
};

View File

@ -47,6 +47,33 @@ namespace {
"--12345--\r\n"
;
class Listener : public oatpp::web::mime::multipart::StatefulParser::Listener {
private:
oatpp::data::stream::ChunkedBuffer m_buffer;
public:
void onPartHeaders(const oatpp::String& name, const Headers& partHeaders) override {
OATPP_LOGD("aaa", "part='%s' headers:", name->getData());
for(auto& pair : partHeaders) {
OATPP_LOGD("Header", "name='%s', value='%s'", pair.first.toString()->getData(), pair.second.toString()->getData());
}
}
void onPartData(const oatpp::String& name, p_char8 data, oatpp::data::v_io_size size) override {
if(size > 0) {
m_buffer.write(data, size);
} else {
auto data = m_buffer.toString();
m_buffer.clear();
OATPP_LOGD("aaa", "part='%s', data='%s'", name->getData(), data->getData());
OATPP_LOGW("aaa", "part end.");
}
}
};
}
void StatefulParserTest::onRun() {
@ -54,7 +81,7 @@ void StatefulParserTest::onRun() {
oatpp::String text = TEST_DATA_1;
{
oatpp::web::mime::multipart::StatefulParser parser("12345");
oatpp::web::mime::multipart::StatefulParser parser("12345", std::make_shared<Listener>());
for (v_int32 i = 0; i < text->getSize(); i++) {
parser.parseNext(&text->getData()[i], 1);
@ -64,7 +91,7 @@ void StatefulParserTest::onRun() {
OATPP_LOGI(TAG, "Test2.................................................");
{
oatpp::web::mime::multipart::StatefulParser parser("12345");
oatpp::web::mime::multipart::StatefulParser parser("12345", std::make_shared<Listener>());
parser.parseNext(text->getData(), text->getSize());
}