From d657d4f73644d1fb79cfd1f32a9878d7e7972da0 Mon Sep 17 00:00:00 2001 From: lganzzzo Date: Thu, 11 Jul 2019 00:36:52 +0300 Subject: [PATCH] multipart data. streaming parser POC. WIP --- src/CMakeLists.txt | 2 + src/oatpp/web/client/ApiClient.hpp | 6 +- .../web/mime/multipart/StatefulParser.cpp | 268 ++++++++++++++++++ .../web/mime/multipart/StatefulParser.hpp | 112 ++++++++ .../http/incoming/RequestHeadersReader.cpp | 5 +- test/CMakeLists.txt | 2 + test/oatpp/AllTestsMain.cpp | 11 +- .../web/mime/multipart/StatefulParserTest.cpp | 74 +++++ .../web/mime/multipart/StatefulParserTest.hpp | 42 +++ 9 files changed, 515 insertions(+), 7 deletions(-) create mode 100644 src/oatpp/web/mime/multipart/StatefulParser.cpp create mode 100644 src/oatpp/web/mime/multipart/StatefulParser.hpp create mode 100644 test/oatpp/web/mime/multipart/StatefulParserTest.cpp create mode 100644 test/oatpp/web/mime/multipart/StatefulParserTest.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a1931d97..5e2bd250 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -141,6 +141,8 @@ add_library(oatpp oatpp/web/client/HttpRequestExecutor.hpp oatpp/web/client/RequestExecutor.cpp oatpp/web/client/RequestExecutor.hpp + oatpp/web/mime/multipart/StatefulParser.cpp + oatpp/web/mime/multipart/StatefulParser.hpp oatpp/web/protocol/CommunicationError.cpp oatpp/web/protocol/CommunicationError.hpp oatpp/web/protocol/http/Http.cpp diff --git a/src/oatpp/web/client/ApiClient.hpp b/src/oatpp/web/client/ApiClient.hpp index 0551f07c..6d1350d6 100644 --- a/src/oatpp/web/client/ApiClient.hpp +++ b/src/oatpp/web/client/ApiClient.hpp @@ -22,8 +22,8 @@ * ***************************************************************************/ -#ifndef glanzzzo_web_client_Client_hpp -#define glanzzzo_web_client_Client_hpp +#ifndef oatpp_web_client_Client_hpp +#define oatpp_web_client_Client_hpp #include "./RequestExecutor.hpp" @@ -214,4 +214,4 @@ public: }}} -#endif /* glanzzzo_web_client_Client_hpp */ +#endif /* oatpp_web_client_Client_hpp */ diff --git a/src/oatpp/web/mime/multipart/StatefulParser.cpp b/src/oatpp/web/mime/multipart/StatefulParser.cpp new file mode 100644 index 00000000..5995be1a --- /dev/null +++ b/src/oatpp/web/mime/multipart/StatefulParser.cpp @@ -0,0 +1,268 @@ +/*************************************************************************** + * + * Project _____ __ ____ _ _ + * ( _ ) /__\ (_ _)_| |_ _| |_ + * )(_)( /(__)\ )( (_ _)(_ _) + * (_____)(__)(__)(__) |_| |_| + * + * + * Copyright 2018-present, Leonid Stryzhevskyi + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************/ + +#include "StatefulParser.hpp" + +#include "oatpp/web/protocol/http/Http.hpp" + +#include "oatpp/core/parser/Caret.hpp" + + +namespace oatpp { namespace web { namespace mime { namespace multipart { + +void StatefulParser::onPartHeaders(const Headers& partHeaders) { + + m_currPartIndex ++; + + auto it = partHeaders.find("Content-Disposition"); + if(it != partHeaders.end()) { + + parser::Caret caret(it->second.toString()); + + if(caret.findText((p_char8)"name=", 5)) { + caret.inc(5); + + parser::Caret::Label nameLabel(nullptr); + + if(caret.isAtChar('"')) { + nameLabel = caret.parseStringEnclosed('"', '"', '\\'); + } else if(caret.isAtChar('\'')) { + nameLabel = caret.parseStringEnclosed('\'', '\'', '\\'); + } else { + nameLabel = caret.putLabel(); + caret.findCharFromSet(" \t\n\r\f"); + nameLabel.end(); + } + + if(nameLabel) { + + m_currPartName = nameLabel.toString(); + + OATPP_LOGD("Part", "name='%s'", m_currPartName->getData()); + + for(auto& pair : partHeaders) { + auto key = pair.first.toString(); + auto value = pair.second.toString(); + OATPP_LOGD("header", "key='%s', value='%s'", key->getData(), value->getData()); + } + + } else { + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::onPartHeaders()]: Error. Can't parse part name."); + } + + } else { + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::onPartHeaders()]: Error. Part name is missing."); + } + + } else { + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::onPartHeaders()]: Error. Missing 'Content-Disposition' header."); + } + +} + +void StatefulParser::onPartData(p_char8 data, v_int32 size) { + + oatpp::String text((const char*)data, size, true); + OATPP_LOGD("data", "part='%s', data='%s'", m_currPartName->getData(), text->getData()); + +} + +v_int32 StatefulParser::parseNext_Boundary(p_char8 data, v_int32 size) { + + p_char8 sampleData = m_nextBoundarySample->getData(); + v_int32 sampleSize = m_nextBoundarySample->getSize(); + + if (m_currPartIndex == 0) { + sampleData = m_firstBoundarySample->getData(); + sampleSize = m_firstBoundarySample->getSize(); + } else { + sampleData = m_nextBoundarySample->getData(); + sampleSize = m_nextBoundarySample->getSize(); + } + + v_int32 checkSize = sampleSize - m_currBoundaryCharIndex; + if(checkSize > size) { + checkSize = size; + } + + parser::Caret caret(data, size); + + if(caret.isAtText(&sampleData[m_currBoundaryCharIndex], checkSize, true)) { + + m_currBoundaryCharIndex += caret.getPosition(); + + if(m_currBoundaryCharIndex == sampleSize) { + m_state = STATE_AFTER_BOUNDARY; + m_currBoundaryCharIndex = 0; + m_readingBody = false; + } + + return caret.getPosition(); + + } else if(m_readingBody) { + + if(m_currBoundaryCharIndex > 0) { + onPartData(sampleData, m_currBoundaryCharIndex); + } + + m_state = STATE_DATA; + m_currBoundaryCharIndex = 0; + m_checkForBoundary = false; + + return 0; + + } + + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext_Boundary()]: Error. Invalid state."); + +} + +v_int32 StatefulParser::parseNext_AfterBoundary(p_char8 data, v_int32 size) { + + if(m_currBoundaryCharIndex == 0) { + + if(data[0] == '-') { + m_finishingBoundary = true; + } else if(data[0] != '\r') { + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext_AfterBoundary()]: Error. Invalid char."); + } + + } + + if(size > 1 || m_currBoundaryCharIndex == 1) { + + if (m_finishingBoundary && data[1 - m_currBoundaryCharIndex] == '-') { + m_state = STATE_DONE; + m_currBoundaryCharIndex = 0; + return 2 - m_currBoundaryCharIndex; + } else if (!m_finishingBoundary && data[1 - m_currBoundaryCharIndex] == '\n') { + m_state = STATE_HEADERS; + m_currBoundaryCharIndex = 0; + m_headerSectionEndAccumulator = 0; + return 2 - m_currBoundaryCharIndex; + } else { + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext_AfterBoundary()]: Error. Invalid trailing char."); + } + + } + + m_currBoundaryCharIndex = 1; + return 1; + +} + +v_int32 StatefulParser::parseNext_Headers(p_char8 data, v_int32 size) { + + for(v_int32 i = 0; i < size; i ++) { + + m_headerSectionEndAccumulator <<= 8; + m_headerSectionEndAccumulator |= data[i]; + + if(m_headerSectionEndAccumulator == HEADERS_SECTION_END) { + + m_headersBuffer.write(data, i); + + auto headersText = m_headersBuffer.toString(); + m_headersBuffer.clear(); + + protocol::http::Status status; + parser::Caret caret(headersText); + Headers headers; + + protocol::http::Parser::parseHeaders(headers, headersText.getPtr(), caret, status); + + onPartHeaders(headers); + + m_state = STATE_DATA; + m_checkForBoundary = true; + + return i + 1; + + } + + } + + m_headersBuffer.write(data, size); + + return size; +} + +v_int32 StatefulParser::parseNext_Data(p_char8 data, v_int32 size) { + + parser::Caret caret(data, size); + + bool rFound = caret.findChar('\r'); + if(rFound && !m_checkForBoundary) { + caret.inc(); + rFound = caret.findChar('\r'); + } + + m_checkForBoundary = true; + + if(rFound) { + if(caret.getPosition() > 0) { + onPartData(data, caret.getPosition()); + } + m_state = STATE_BOUNDARY; + m_readingBody = true; + return caret.getPosition(); + } else { + onPartData(data, size); + } + + return size; +} + +v_int32 StatefulParser::parseNext(p_char8 data, v_int32 size) { + + v_int32 pos = 0; + + while(pos < size) { + + switch (m_state) { + case STATE_BOUNDARY: + pos += parseNext_Boundary(&data[pos], size - pos); + break; + case STATE_AFTER_BOUNDARY: + pos += parseNext_AfterBoundary(&data[pos], size - pos); + break; + case STATE_HEADERS: + pos += parseNext_Headers(&data[pos], size - pos); + break; + case STATE_DATA: + pos += parseNext_Data(&data[pos], size - pos); + break; + case STATE_DONE: + return pos; + default: + throw std::runtime_error("[oatpp::web::mime::multipart::StatefulParser::parseNext()]: Error. Invalid state."); + } + + } + + return pos; + +} + +}}}} \ No newline at end of file diff --git a/src/oatpp/web/mime/multipart/StatefulParser.hpp b/src/oatpp/web/mime/multipart/StatefulParser.hpp new file mode 100644 index 00000000..9e2e958b --- /dev/null +++ b/src/oatpp/web/mime/multipart/StatefulParser.hpp @@ -0,0 +1,112 @@ +/*************************************************************************** + * + * Project _____ __ ____ _ _ + * ( _ ) /__\ (_ _)_| |_ _| |_ + * )(_)( /(__)\ )( (_ _)(_ _) + * (_____)(__)(__)(__) |_| |_| + * + * + * Copyright 2018-present, Leonid Stryzhevskyi + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************/ + +#ifndef oatpp_web_mime_multipart_StatefulParser_hpp +#define oatpp_web_mime_multipart_StatefulParser_hpp + +#include "oatpp/core/data/stream/ChunkedBuffer.hpp" +#include "oatpp/core/data/share/MemoryLabel.hpp" +#include "oatpp/core/Types.hpp" + +#include + +namespace oatpp { namespace web { namespace mime { namespace multipart { + +class StatefulParser { +private: + static constexpr v_int32 STATE_BOUNDARY = 0; + static constexpr v_int32 STATE_AFTER_BOUNDARY = 1; + static constexpr v_int32 STATE_HEADERS = 2; + static constexpr v_int32 STATE_DATA = 3; + static constexpr v_int32 STATE_DONE = 4; +private: + static constexpr v_int32 HEADERS_SECTION_END = ('\r' << 24) | ('\n' << 16) | ('\r' << 8) | ('\n'); +public: + /** + * Typedef for headers map. Headers map key is case-insensitive. + * `std::unordered_map` of &id:oatpp::data::share::StringKeyLabelCI_FAST; and &id:oatpp::data::share::StringKeyLabel;. + */ + typedef std::unordered_map Headers; +public: + + class Listener { + public: + virtual void onPartHeaders(const oatpp::String& name, const Headers& partHeaders) = 0; + virtual void onPartData(const oatpp::String& name, p_char8 data, oatpp::data::v_io_size size) = 0; + }; + +private: + + v_int32 m_state; + v_int32 m_currPartIndex; + v_int32 m_currBoundaryCharIndex; + bool m_checkForBoundary; + bool m_finishingBoundary; + bool m_readingBody; + + v_word32 m_headerSectionEndAccumulator; + + oatpp::String m_firstBoundarySample; + oatpp::String m_nextBoundarySample; + oatpp::String m_currPartName; + + /* + * Headers of the part are stored in the buffer and are parsed as one chunk. + */ + oatpp::data::stream::ChunkedBuffer m_headersBuffer; + +private: + + void onPartHeaders(const Headers& partHeaders); + void onPartData(p_char8 data, v_int32 size); + +private: + + v_int32 parseNext_Boundary(p_char8 data, v_int32 size); + v_int32 parseNext_AfterBoundary(p_char8 data, v_int32 size); + v_int32 parseNext_Headers(p_char8 data, v_int32 size); + v_int32 parseNext_Data(p_char8 data, v_int32 size); + +public: + + StatefulParser(const oatpp::String& boundary) + : m_state(STATE_BOUNDARY) + , m_currPartIndex(0) + , m_currBoundaryCharIndex(0) + , m_checkForBoundary(true) + , m_finishingBoundary(false) + , m_readingBody(false) + , m_headerSectionEndAccumulator(0) + , m_firstBoundarySample("--" + boundary) + , m_nextBoundarySample("\r\n--" + boundary) + {} + + v_int32 parseNext(p_char8 data, v_int32 size); + + +}; + +}}}} + +#endif // oatpp_web_mime_multipart_StatefulParser_hpp diff --git a/src/oatpp/web/protocol/http/incoming/RequestHeadersReader.cpp b/src/oatpp/web/protocol/http/incoming/RequestHeadersReader.cpp index 540e8d69..11c2b285 100644 --- a/src/oatpp/web/protocol/http/incoming/RequestHeadersReader.cpp +++ b/src/oatpp/web/protocol/http/incoming/RequestHeadersReader.cpp @@ -31,8 +31,7 @@ namespace oatpp { namespace web { namespace protocol { namespace http { namespac data::v_io_size RequestHeadersReader::readHeadersSection(const std::shared_ptr& connection, oatpp::data::stream::OutputStream* bufferStream, Result& result) { - - v_word32 sectionEnd = ('\r' << 24) | ('\n' << 16) | ('\r' << 8) | ('\n'); + v_word32 accumulator = 0; v_int32 progress = 0; data::v_io_size res; @@ -53,7 +52,7 @@ data::v_io_size RequestHeadersReader::readHeadersSection(const std::shared_ptr + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************/ + +#include "StatefulParserTest.hpp" + +#include "oatpp/web/mime/multipart/StatefulParser.hpp" + +namespace oatpp { namespace test { namespace web { namespace mime { namespace multipart { + +namespace { + + static const char* TEST_DATA_1 = + "--12345\r\n" + "Content-Disposition: form-data; name=\"part1\"\r\n" + "\r\n" + "part1-value\r\n" + "--12345\r\n" + "Content-Disposition: form-data; name=\"part2\" filename=\"filename.txt\"\r\n" + "\r\n" + "--part2-file-content-line1\r\n" + "--1234part2-file-content-line2\r\n" + "--12345\r\n" + "Content-Disposition: form-data; name=\"part3\" filename=\"filename.jpg\"\r\n" + "\r\n" + "part3-file-binary-data\r\n" + "--12345--\r\n" + ; + +} + +void StatefulParserTest::onRun() { + + oatpp::String text = TEST_DATA_1; + + { + oatpp::web::mime::multipart::StatefulParser parser("12345"); + + for (v_int32 i = 0; i < text->getSize(); i++) { + parser.parseNext(&text->getData()[i], 1); + } + } + + OATPP_LOGI(TAG, "Test2................................................."); + + { + oatpp::web::mime::multipart::StatefulParser parser("12345"); + parser.parseNext(text->getData(), text->getSize()); + } + + +} + +}}}}} diff --git a/test/oatpp/web/mime/multipart/StatefulParserTest.hpp b/test/oatpp/web/mime/multipart/StatefulParserTest.hpp new file mode 100644 index 00000000..7d2468b1 --- /dev/null +++ b/test/oatpp/web/mime/multipart/StatefulParserTest.hpp @@ -0,0 +1,42 @@ +/*************************************************************************** + * + * Project _____ __ ____ _ _ + * ( _ ) /__\ (_ _)_| |_ _| |_ + * )(_)( /(__)\ )( (_ _)(_ _) + * (_____)(__)(__)(__) |_| |_| + * + * + * Copyright 2018-present, Leonid Stryzhevskyi + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************/ + +#ifndef oatpp_test_web_mime_multipart_StatefulParserTest_hpp +#define oatpp_test_web_mime_multipart_StatefulParserTest_hpp + +#include "oatpp-test/UnitTest.hpp" + +namespace oatpp { namespace test { namespace web { namespace mime { namespace multipart { + +class StatefulParserTest : public UnitTest { +public: + + StatefulParserTest():UnitTest("TEST[web::mime::multipart::StatefulParserTest]"){} + void onRun() override; + +}; + +}}}}} + +#endif /* oatpp_test_web_mime_multipart_StatefulParserTest_hpp */