Rewrite message parsing using string_view

It's a bit too early yet to require C++17 so the implementation from
BackportCpp (string_view-standalone) is used instead.

Fixes https://crbug.com/oss-fuzz/34413 - slow message parsing on huge
messages. In real word, messages can't be that big, because CSocket
enforces a line length limit.

This can be considered a regression of 1.7.0, because before it, instead
of gathering params into a vector, code was searching 1st word in the
string, then 2nd word, then 3rd word, starting from beginning each time.
It was not very efficient, but the number of passes over the string was
limited.
This commit is contained in:
Alexey Sokolov
2021-05-21 08:57:09 +01:00
parent e0ffdddd47
commit fd71a69fab
7 changed files with 1479 additions and 21 deletions
-1
View File
@@ -284,7 +284,6 @@ if(append_git_version)
endif()
file(GLOB csocket_files LIST_DIRECTORIES FALSE
"${PROJECT_SOURCE_DIR}/third_party/Csocket/Csocket.*")
if(csocket_files STREQUAL "")
+1
View File
@@ -16,6 +16,7 @@ ZNC includes code from jQuery UI (http://jqueryui.com/), licensed under the MIT
ZNC includes code from Selectize (http://brianreavis.github.io/selectize.js/), licensed under the Apache License 2.0.
ZNC includes modified code from CMakeFindFrameworks.cmake by Kitware, Inc., licensed under BSD License.
ZNC includes modified code from TestLargeFiles.cmake, licensed under Boost Software License, Version 1.0.
ZNC includes code from BackportCpp (https://github.com/bitwizeshift/string_view-standalone), licensed under the MIT license.
ZNC is developed by these people:
+1 -1
View File
@@ -161,7 +161,7 @@ class CMessage {
};
CString ToString(unsigned int uFlags = IncludeAll) const;
void Parse(CString sMessage);
void Parse(const CString& sMessage);
// Implicit and explicit conversion to a subclass reference.
#ifndef SWIG
+1
View File
@@ -60,6 +60,7 @@ add_custom_target(version
add_dependencies(znclib copy_csocket_h copy_csocket_cc version)
set(znc_include_dirs
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/bpstd>"
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_FULL_INCLUDEDIR}>")
+42 -19
View File
@@ -16,6 +16,7 @@
#include <znc/Message.h>
#include <znc/Utils.h>
#include "bpstd/string_view.hpp"
CMessage::CMessage(const CString& sMessage) {
Parse(sMessage);
@@ -157,19 +158,43 @@ CString CMessage::ToString(unsigned int uFlags) const {
return sMessage;
}
void CMessage::Parse(CString sMessage) {
void CMessage::Parse(const CString& sMessage) {
const char* begin = sMessage.c_str();
const char* const end = begin + sMessage.size();
auto next_word = [&]() {
// Find the end of the first word
const char* p = begin;
while (p < end && *p != ' ') ++p;
bpstd::string_view result(begin, p - begin);
begin = p;
// Prepare for the following word
while (begin < end && *begin == ' ') ++begin;
return result;
};
// <tags>
m_mssTags.clear();
if (sMessage.StartsWith("@")) {
VCString vsTags;
sMessage.Token(0).TrimPrefix_n("@").Split(";", vsTags, false);
for (const CString& sTag : vsTags) {
CString sKey = sTag.Token(0, false, "=", true);
CString sValue = sTag.Token(1, true, "=", true);
if (begin < end && *begin == '@') {
bpstd::string_view svTags = next_word().substr(1);
std::vector<bpstd::string_view> vsTags;
// Split by ';'
while (true) {
auto delim = svTags.find_first_of(';');
if (delim == bpstd::string_view::npos) {
vsTags.push_back(svTags);
break;
}
vsTags.push_back(svTags.substr(0, delim));
svTags = svTags.substr(delim + 1);
}
// Save key and value
for (bpstd::string_view svTag : vsTags) {
auto delim = svTag.find_first_of('=');
CString sKey = std::string(delim == bpstd::string_view::npos ? svTag : svTag.substr(0, delim));
CString sValue = delim == bpstd::string_view::npos ? std::string() : std::string(svTag.substr(delim + 1));
m_mssTags[sKey] =
sValue.Escape(CString::EMSGTAG, CString::CString::EASCII);
}
sMessage = sMessage.Token(1, true);
}
// <message> ::= [':' <prefix> <SPACE> ] <command> <params> <crlf>
@@ -183,26 +208,24 @@ void CMessage::Parse(CString sMessage) {
// NUL or CR or LF>
// <prefix>
if (sMessage.TrimPrefix(":")) {
m_Nick.Parse(sMessage.Token(0));
sMessage = sMessage.Token(1, true);
if (begin < end && *begin == ':') {
m_Nick.Parse(std::string(next_word().substr(1)));
}
// <command>
m_sCommand = sMessage.Token(0);
sMessage = sMessage.Token(1, true);
m_sCommand = std::string(next_word());
// <params>
m_bColon = false;
m_vsParams.clear();
while (!sMessage.empty()) {
m_bColon = sMessage.TrimPrefix(":");
while (begin < end) {
m_bColon = *begin == ':';
if (m_bColon) {
m_vsParams.push_back(sMessage);
sMessage.clear();
++begin;
m_vsParams.push_back(std::string(begin, end - begin));
begin = end;
} else {
m_vsParams.push_back(sMessage.Token(0));
sMessage = sMessage.Token(1, true);
m_vsParams.push_back(std::string(next_word()));
}
}
+10
View File
@@ -22,6 +22,7 @@
using ::testing::IsEmpty;
using ::testing::ContainerEq;
using ::testing::ElementsAre;
using ::testing::SizeIs;
TEST(MessageTest, SetParam) {
CMessage msg;
@@ -609,3 +610,12 @@ TEST(MessageTest, ParseWithoutSourceAndTags) {
EXPECT_EQ(msg.GetCommand(), "COMMAND");
EXPECT_EQ(msg.GetParams(), VCString());
}
TEST(MessageTest, HugeParse) {
CString line;
for (int i = 0; i < 1000000; ++i) {
line += "a ";
}
CMessage msg(line);
EXPECT_THAT(msg.GetParams(), SizeIs(999999));
}
File diff suppressed because it is too large Load Diff