diff options
-rw-r--r-- | src/lib/dns/master_lexer.cc | 53 | ||||
-rw-r--r-- | src/lib/dns/master_lexer_state.h | 3 | ||||
-rw-r--r-- | src/lib/dns/tests/master_lexer_state_unittest.cc | 87 |
3 files changed, 136 insertions, 7 deletions
diff --git a/src/lib/dns/master_lexer.cc b/src/lib/dns/master_lexer.cc index d128159fe4..fc33c96385 100644 --- a/src/lib/dns/master_lexer.cc +++ b/src/lib/dns/master_lexer.cc @@ -193,7 +193,6 @@ public: } }; -// Currently this is provided mostly as a place holder class String : public State { public: String() {} @@ -201,6 +200,13 @@ public: virtual const State* handle(MasterLexer& lexer) const; }; +class QString : public State { +public: + QString() {} + virtual ~QString() {} // see the base class for the destructor + virtual const State* handle(MasterLexer& lexer) const; +}; + // We use a common instance of a each state in a singleton-like way to save // construction overhead. They are not singletons in its strict sense as // we don't prohibit direct construction of these objects. But that doesn't @@ -208,6 +214,7 @@ public: // this file. const CRLF CRLF_STATE; const String STRING_STATE; +const QString QSTRING_STATE; } const State& @@ -217,6 +224,8 @@ State::getInstance(ID state_id) { return (CRLF_STATE); case String: return (STRING_STATE); + case QString: + return (QSTRING_STATE); } // This is a bug of the caller, and this method is only expected to be @@ -261,6 +270,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) { if (paren_count == 0) { // check if we are in () (see above) return (&CRLF_STATE); } + } else if (c == '"' && (options & MasterLexer::QSTRING) != 0) { + lexerimpl.last_was_eol_ = false; + return (&QSTRING_STATE); } else if (c == '(') { lexerimpl.last_was_eol_ = false; ++paren_count; @@ -284,7 +296,6 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) { const State* String::handle(MasterLexer& lexer) const { std::vector<char>& data = getLexerImpl(lexer)->data_; - MasterLexer::Token& token = getLexerImpl(lexer)->token_; data.clear(); bool escaped = false; @@ -298,14 +309,48 @@ String::handle(MasterLexer& lexer) const { (!escaped && (c == ' ' || c == '\t' || c == '(' || c == ')'))) { getLexerImpl(lexer)->source_->ungetChar(); - token = MasterLexer::Token(&data.at(0), data.size()); + getLexerImpl(lexer)->token_ = + MasterLexer::Token(&data.at(0), data.size()); return (NULL); } - escaped = (!escaped && (c == '\\')); + escaped = (c == '\\' && !escaped); data.push_back(c); } } +const State* +QString::handle(MasterLexer& lexer) const { + MasterLexer::Token& token = getLexerImpl(lexer)->token_; + std::vector<char>& data = getLexerImpl(lexer)->data_; + data.clear(); + + bool escaped = false; + while (true) { + const int c = getLexerImpl(lexer)->source_->getChar(); + if (c == InputSource::END_OF_STREAM) { + token = Token(Token::UNEXPECTED_END); + return (NULL); + } else if (c == '"') { + if (escaped) { + // found escaped '"'. overwrite the preceding backslash. + assert(!data.empty()); + escaped = false; + data.back() = '"'; + } else { + token = MasterLexer::Token(&data.at(0), data.size(), true); + return (NULL); + } + } else if (c == '\n' && !escaped) { + getLexerImpl(lexer)->source_->ungetChar(); + token = Token(Token::UNBALANCED_QUOTES); + return (NULL); + } else { + escaped = (c == '\\' && !escaped); + data.push_back(c); + } + } +} + } // namespace master_lexer_internal } // end of namespace dns diff --git a/src/lib/dns/master_lexer_state.h b/src/lib/dns/master_lexer_state.h index 1130f3334f..830e60a3bb 100644 --- a/src/lib/dns/master_lexer_state.h +++ b/src/lib/dns/master_lexer_state.h @@ -98,7 +98,8 @@ public: /// a way to get an instance of a specific state. enum ID { CRLF, ///< Just seen a carriage-return character - String ///< Handling a string token + String, ///< Handling a string token + QString ///< Handling a quoted string token }; /// \brief Returns a \c State instance of the given state. diff --git a/src/lib/dns/tests/master_lexer_state_unittest.cc b/src/lib/dns/tests/master_lexer_state_unittest.cc index 48aceacc3f..5c3be03009 100644 --- a/src/lib/dns/tests/master_lexer_state_unittest.cc +++ b/src/lib/dns/tests/master_lexer_state_unittest.cc @@ -32,6 +32,7 @@ protected: s_null(NULL), s_crlf(State::getInstance(State::CRLF)), s_string(State::getInstance(State::String)), + s_qstring(State::getInstance(State::QString)), options(MasterLexer::NONE), orig_options(options) {} @@ -42,6 +43,7 @@ protected: const State* const s_null; const State& s_crlf; const State& s_string; + const State& s_qstring; std::stringstream ss; MasterLexer::Options options, orig_options; }; @@ -254,9 +256,10 @@ TEST_F(MasterLexerStateTest, crlf) { } void -stringTokenCheck(const std::string& expected, const MasterLexer::Token& token) +stringTokenCheck(const std::string& expected, const MasterLexer::Token& token, + bool quoted = false) { - EXPECT_EQ(Token::STRING, token.getType()); + EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType()); EXPECT_EQ(expected, token.getString()); const std::string actual(token.getStringRegion().beg, token.getStringRegion().beg + @@ -350,4 +353,84 @@ TEST_F(MasterLexerStateTest, stringEscape) { stringTokenCheck("escaped\\\\", s_string.getToken(lexer)); } +TEST_F(MasterLexerStateTest, quotedString) { + ss << "\"ignore-quotes\"\n"; + ss << "\"quoted string\" "; + ss << "\"escape\\ in quote\" "; + ss << "\"escaped\\\"\" "; + ss << "\"escaped backslash\\\\\" "; + ss << "\"no;comment\""; + lexer.pushSource(ss); + + // by default, '"' doesn't have any special meaning and part of string + EXPECT_EQ(&s_string, State::start(lexer, common_options)); + EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n + stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer)); + EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it + EXPECT_TRUE(s_string.wasLastEOL(lexer)); + + // If QSTRING is specified in option, '"' is regarded as a beginning of + // a quoted string. + const MasterLexer::Options options = common_options | MasterLexer::QSTRING; + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"' + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("quoted string", s_string.getToken(lexer), true); + + // escape character mostly doesn't have any effect in the qstring + // processing + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true); + + // The only exception is the quotation mark itself. Note that the escape + // only works on the quotation mark immediately after it. + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("escaped\"", s_string.getToken(lexer), true); + + // quoted '\' then '"'. Unlike the previous case '"' shouldn't be + // escaped. + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true); + + // ';' has no meaning in a quoted string (not indicating a comment) + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("no;comment", s_string.getToken(lexer), true); +} + +TEST_F(MasterLexerStateTest, brokenQuotedString) { + ss << "\"unbalanced-quote\n"; + ss << "\"quoted\\\n\" "; + ss << "\"unclosed quote and EOF"; + lexer.pushSource(ss); + + // EOL is encountered without closing the quote + const MasterLexer::Options options = common_options | MasterLexer::QSTRING; + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType()); + EXPECT_EQ(Token::UNBALANCED_QUOTES, + s_qstring.getToken(lexer).getErrorCode()); + // We can resume after the error from the '\n' + EXPECT_EQ(s_null, State::start(lexer, options)); + EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType()); + + // \n is okay in a quoted string if escaped + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true); + + // EOF is encountered without closing the quote + EXPECT_EQ(&s_qstring, State::start(lexer, options)); + EXPECT_EQ(s_null, s_qstring.handle(lexer)); + ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType()); + EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode()); + // If we continue we'll simply see the EOF + EXPECT_EQ(s_null, State::start(lexer, options)); + EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType()); +} + } |