From fd33822a6e80022bbde181210c6b138928f7c351 Mon Sep 17 00:00:00 2001 From: Tom Vercauteren Date: Sat, 17 Feb 2024 11:11:48 +0100 Subject: [PATCH 1/2] ported unit tests from https://github.com/matheusgomes28/base64pp - Validation of encoded strings is made more stringent, i.e. throw an error rather then try and decode non-compliant input --- CMakeLists.txt | 26 ++++ include/base64.hpp | 15 +- test/base64_tests.cpp | 330 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 369 insertions(+), 2 deletions(-) create mode 100644 test/base64_tests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bfb94aa..8a8e7e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,3 +29,29 @@ target_include_directories(roundtrip_test PRIVATE include) enable_testing() add_test(NAME roundtrip_test COMMAND roundtrip_test) + +# Add some more tests +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG 750d67d809700ae8fca6d610f7b41b71aa161808 + SYSTEM +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +set_target_properties(gtest PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gtest_main PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gmock PROPERTIES CXX_CLANG_TIDY "") +set_target_properties(gmock_main PROPERTIES CXX_CLANG_TIDY "") + +add_executable(base64_tests test/base64_tests.cpp) +target_include_directories(base64_tests PRIVATE include) + +target_link_libraries(base64_tests PRIVATE GTest::gtest GTest::gtest_main) + +add_test(NAME base64_tests COMMAND base64_tests) + + diff --git a/include/base64.hpp b/include/base64.hpp index 6d5d92b..c1a8c86 100644 --- a/include/base64.hpp +++ b/include/base64.hpp @@ -70,7 +70,17 @@ inline OutputBuffer decode_into(std::string_view data) { size_t counter = 0; uint32_t bit_stream = 0; OutputBuffer decoded; - decoded.reserve(std::size(data)); + const size_t encoded_size = std::size(data); + if ((encoded_size % 4) != 0) { + throw std::runtime_error{ + "Invalid base64 encoded data - Size not divisible by 4"}; + } + const size_t numlasteqs = std::count(data.rbegin(), data.rbegin() + 4, '='); + if (numlasteqs > 2) { + throw std::runtime_error{ + "Invalid base64 encoded data - Found more than 2 padding signs"}; + } + decoded.reserve(encoded_size); for (std::string_view::value_type c : data) { auto const num_val = base64_chars.find(c); if (num_val != std::string::npos) { @@ -87,7 +97,8 @@ inline OutputBuffer decode_into(std::string_view data) { bit_stream = 0; } } else if (c != '=') { - throw std::runtime_error{"Invalid base64 encoded data"}; + throw std::runtime_error{ + "Invalid base64 encoded data - Found invalid character"}; } counter++; } diff --git a/test/base64_tests.cpp b/test/base64_tests.cpp new file mode 100644 index 0000000..4c931b4 --- /dev/null +++ b/test/base64_tests.cpp @@ -0,0 +1,330 @@ +// Test suite ported from https://github.com/matheusgomes28/base64pp +#include + +#include +#include +#include + +#include "../include/base64.hpp" + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesEmpty) { + std::string const expected{}; + std::string const actual{base64::to_base64({})}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesThreeBytesZeros) { + std::array const input{0x00, 0x00, 0x00}; + auto const expected{"AAAA"}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesThreeBytesRandom) { + std::array const input{0xFE, 0xE9, 0x72}; + auto const expected{"/uly"}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesTwoBytes) { + std::array const input{0x00, 0x00}; + auto const expected{"AAA="}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesOneByte) { + std::array const input{0x00}; + auto const expected{"AA=="}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesFourBytes) { + std::array const input{0x74, 0x68, 0x65, 0x20}; + auto const expected{"dGhlIA=="}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesFiveBytes) { + std::array const input{0x20, 0x62, 0x72, 0x6f, 0x77}; + auto const expected{"IGJyb3c="}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(actual, expected); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesSixBytes) { + std::array const input{0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73}; + auto const expected{"IGp1bXBz"}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(actual, expected); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesBrownFox) { + std::array const input{ + 0x74, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, + 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, + 0x6d, 0x70, 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67}; + + auto const expected{ + "dGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZw=="}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(actual, expected); +} + +// NOLINTNEXTLINE +TEST(Base64Encode, EncodesBrownFastFoxNullInMiddle) { + std::array const input{ + 0x74, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x21, 0x20, 0x62, + 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, + 0x70, 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x00, + 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67}; + + auto const expected{ + "dGhlIHF1aWNrISBicm93biBmb3gganVtcHMgb3ZlciB0aGUAIGxhenkgZG9n"}; + auto const actual{base64::encode_into(begin(input), end(input))}; + ASSERT_EQ(actual, expected); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeOneString) { + std::string const input{"1"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeOneStringPadded) { + std::string const input{"1==="}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeOneCharRemaining) { + std::string const input{"something"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeNonSize4Bigger) { + std::string const input{"SomethingEntirelyDifferent"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::vector const expected{0x4A, 0x89, 0x9E, 0xB6, 0x18, + // 0xA7, 0x80, 0x49, 0xED, 0x8A, 0xB7, 0xA5, + // 0xC8, 0x38, 0x9F, 0x7D, 0xEA, 0xDE, 0x9E}; +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeNonBase64Short) { + std::string const input{"a aa"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, FailDecodeNonBase64Longer) { + std::string const input{"aaa`aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesMissingTwoPads0) { + std::string const input{"12"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::vector const expected{0xD7}; +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesMissingTwoPads1) { + std::string const input = "AA"; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::vector const expected{0x00}; +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesMissingOnePad0) { + std::string const input = "AAA"; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::vector const expected{0x00, 0x00}; +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesMissingOnePad1) { + std::string const input{"12a"}; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::vector const expected{0xD7, 0x66}; +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesMissingIssueExample) { + std::string const input = "eyJuYW1lIjoiSm9obiBEb2UifQ"; + ASSERT_THROW(base64::from_base64(input), std::runtime_error); + // For the record - expected decoding if relaxed checks + // std::string const expected_str = R"({"name":"John Doe"})"; + // See https://github.com/matheusgomes28/base64pp/issues/84 +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesEmptyString) { + std::string const input{}; + std::string expected{}; + auto const actual{base64::from_base64("")}; + + ASSERT_EQ(expected, actual); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesZeroArray) { + std::string const input{"AAAA"}; + std::vector const expected{0x00, 0x00, 0x00}; + auto const actual{base64::from_base64(input)}; + + ASSERT_EQ(actual, std::string(expected.begin(), expected.end())); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesZeroArrayTwice) { + std::string const input{"AAAAAAAA"}; + std::vector const expected{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + auto const actual{base64::from_base64(input)}; + + ASSERT_EQ(actual, std::string(expected.begin(), expected.end())); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesZeroArrayOneByte) { + std::string const input{"AA=="}; + std::vector const expected{0x00}; + auto const actual{base64::from_base64(input)}; + + ASSERT_EQ(actual, std::string(expected.begin(), expected.end())); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesZeroArrayTwoBytes) { + std::string const input{"AAA="}; + std::vector const expected{0x00, 0x00}; + auto const actual{base64::from_base64(input)}; + + ASSERT_EQ(actual, std::string(expected.begin(), expected.end())); +} + +// NOLINTNEXTLINE +TEST(Base64Decode, DecodesQuickFox) { + std::string const input{ + "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZw=="}; + std::vector const expected{ + 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, + 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, + 0x6d, 0x70, 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67}; + auto const actual{base64::from_base64(input)}; + ASSERT_EQ(actual, std::string(expected.begin(), expected.end())); +} + +// NOLINTNEXTLINE +TEST(Base64RoundTripTests, AllPossibleBytes) { + std::vector all_possible_bytes; + for (std::size_t i = 0; i <= 255; ++i) { + all_possible_bytes.push_back(static_cast(i)); + } + + auto const encode_string = base64::encode_into( + begin(all_possible_bytes), end(all_possible_bytes)); + auto const decoded_bytes = base64::from_base64(encode_string); + // ASSERT_TRUE(decoded_bytes); + ASSERT_EQ(std::string(all_possible_bytes.begin(), all_possible_bytes.end()), + decoded_bytes); +} + +// NOLINTNEXTLINE +TEST(Base64RoundTripTests, ExhaustiveTests) { + std::vector const base64_strings = { + "YW55IGNhcm5hbCBwbGVhcw==", + "bGVnYWwgcGFzcw==", + "dGVzdCBzdHJpbmc=", + "bGVnYWwgcHJvdmlkZXI=", + "ZW5vdWdoIHRoZSBzYW1lIG9mIHRoZSBwbGFjZQ==", + "YW5vdGhlciB0aGUgc3RyYWlnaHQ=", + "d2FzIG1lIGFkZHJlc3MgcHJvdmlkZXI=", + "YWJvdXQgdGhlIG1hc3RlciBvZiB0aGUgZGFtYWdl", + "ZW50aXJlIHRoYXQgYnJvdWdodCBvZiB0aGUgbW9uZXk=", + "bGVnYWwgc2VjdXJpdHk=", + "YmFzaWMgZ29vZCBvZiB0aGUgcGFkIHN0cmluZw==", + "ZGVsZXRlIHN0cmluZyBvZiB0aGUgc3RyYWlnaHQ=", + "YnJvdWdodCBvZiB0aGUgcGFkIGZvbGRlciBvZiB0aGUgZGFtYWdl", + "aW50ZXJmYWNlIHN0cmluZw==", + "Y29uc29sZS1tZS1jb21wYW55", + "aW5mb3JtYXRpb24tbWVkaWE=", + "c3RhdHVzLXNlY3VyZQ==", + "Y3JlYXRlLWNvbXBhbnktc3RyaW5n", + "b3JkZXItbGVhZGVy", + "Y2F0YWxvZy1wcm9maWxl", + "dGVzdC1jb25zdWx0aW5n", + "YnJvdWdodC1sZWFkZXI=", + "YXNzaWduLW1lY2hhbmlzbQ==", + "bGVnYWwtY29udGFpbmVy", + "ZW1haWwtY29udGFpbmVy", + "aW5zdGFuY2UtY29udGFpbmVy", + "dGVzdC1jb21wYW55LWFuZC1wcm9maWxl", + "YmFzZTY0LWJhc2U=", + "cGFzc3dvcmQ=", + "Zm9vYmFy", + "Y29vbC1iYXNl", + "YmFzZTY0LXNlY3VyZQ==", + "aW50ZXJ2YWw=", + "dGhlLW1hc3Rlci1vZi10aGUtZGFtYWdl", + "c2FtZS1wbGFjZS1vZi10aGUtZGFtYWdl", + "aGFzaC1zb21ldGhpbmc="}; + + for (auto const& b64_string : base64_strings) { + auto const decoded = base64::from_base64(b64_string); + // ASSERT_TRUE(decoded); + + auto const encoded_round_trip = + base64::encode_into(begin(decoded), end(decoded)); + ASSERT_EQ(encoded_round_trip, b64_string); + } +} + +// NOLINTNEXTLINE +TEST(Base64OverloadTests, EncodesString1) { + std::array, 11> const test_cases = { + {{"", ""}, + {"Hello, World!", "SGVsbG8sIFdvcmxkIQ=="}, + {"abcdefghijklmnopqrstuvwxyz0123456789\\`!\"£$%^&*()_+", + "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXowMTIzNDU2Nzg5XGAhIsKjJCVeJiooKV8r"}, + {"Base64 encoding", "QmFzZTY0IGVuY29kaW5n"}, + {"I love coding", "SSBsb3ZlIGNvZGluZw=="}, + {"C++23 is awesome", "QysrMjMgaXMgYXdlc29tZQ=="}, + {"This is a sample", "VGhpcyBpcyBhIHNhbXBsZQ=="}, + {"Base64 is useful", "QmFzZTY0IGlzIHVzZWZ1bA=="}, + {"Encode and decode", "RW5jb2RlIGFuZCBkZWNvZGU="}, + {"Data encryption", "RGF0YSBlbmNyeXB0aW9u"}, + {"Th3 Quickk Br0wn f0x", "VGgzIFF1aWNrayAgQnIwd24gZjB4"}}}; + + for (auto const& [input, expected] : test_cases) { + auto const actual = base64::to_base64(input); + ASSERT_EQ(actual, expected); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From e3d6ef3019a138feb23b9424997c7f2b84019df2 Mon Sep 17 00:00:00 2001 From: Tom Vercauteren Date: Sun, 18 Feb 2024 22:33:44 +0100 Subject: [PATCH 2/2] pointing to benchmark of various base64 implementations --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d2d78db..771c275 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,12 @@ int main() { ``` ## Notes -This library relies on C++17. +This library relies on C++17. -A different, unrelated C++20 library for base64 encoding/decoding can be found at https://github.com/matheusgomes28/base64pp +A benchmark of various c/c++ base64 implementations can be found at https://github.com/gaspardpetit/base64/ + +There are many implementations available and it may be worth looking at those. For example, a different, unrelated, C++20 library for base64 encoding/decoding can be found at https://github.com/matheusgomes28/base64pp + +There is also an implementation that works with older C++ versions available at https://github.com/ReneNyffenegger/cpp-base64 + +There are also some more generic libraries available such as https://github.com/azawadzki/base-n