summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/lib/dns/master_lexer.cc53
-rw-r--r--src/lib/dns/master_lexer_state.h3
-rw-r--r--src/lib/dns/tests/master_lexer_state_unittest.cc87
3 files changed, 136 insertions, 7 deletions
diff --git a/src/lib/dns/master_lexer.cc b/src/lib/dns/master_lexer.cc
index d128159fe4..fc33c96385 100644
--- a/src/lib/dns/master_lexer.cc
+++ b/src/lib/dns/master_lexer.cc
@@ -193,7 +193,6 @@ public:
}
};
-// Currently this is provided mostly as a place holder
class String : public State {
public:
String() {}
@@ -201,6 +200,13 @@ public:
virtual const State* handle(MasterLexer& lexer) const;
};
+class QString : public State {
+public:
+ QString() {}
+ virtual ~QString() {} // see the base class for the destructor
+ virtual const State* handle(MasterLexer& lexer) const;
+};
+
// We use a common instance of a each state in a singleton-like way to save
// construction overhead. They are not singletons in its strict sense as
// we don't prohibit direct construction of these objects. But that doesn't
@@ -208,6 +214,7 @@ public:
// this file.
const CRLF CRLF_STATE;
const String STRING_STATE;
+const QString QSTRING_STATE;
}
const State&
@@ -217,6 +224,8 @@ State::getInstance(ID state_id) {
return (CRLF_STATE);
case String:
return (STRING_STATE);
+ case QString:
+ return (QSTRING_STATE);
}
// This is a bug of the caller, and this method is only expected to be
@@ -261,6 +270,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
if (paren_count == 0) { // check if we are in () (see above)
return (&CRLF_STATE);
}
+ } else if (c == '"' && (options & MasterLexer::QSTRING) != 0) {
+ lexerimpl.last_was_eol_ = false;
+ return (&QSTRING_STATE);
} else if (c == '(') {
lexerimpl.last_was_eol_ = false;
++paren_count;
@@ -284,7 +296,6 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
const State*
String::handle(MasterLexer& lexer) const {
std::vector<char>& data = getLexerImpl(lexer)->data_;
- MasterLexer::Token& token = getLexerImpl(lexer)->token_;
data.clear();
bool escaped = false;
@@ -298,14 +309,48 @@ String::handle(MasterLexer& lexer) const {
(!escaped &&
(c == ' ' || c == '\t' || c == '(' || c == ')'))) {
getLexerImpl(lexer)->source_->ungetChar();
- token = MasterLexer::Token(&data.at(0), data.size());
+ getLexerImpl(lexer)->token_ =
+ MasterLexer::Token(&data.at(0), data.size());
return (NULL);
}
- escaped = (!escaped && (c == '\\'));
+ escaped = (c == '\\' && !escaped);
data.push_back(c);
}
}
+const State*
+QString::handle(MasterLexer& lexer) const {
+ MasterLexer::Token& token = getLexerImpl(lexer)->token_;
+ std::vector<char>& data = getLexerImpl(lexer)->data_;
+ data.clear();
+
+ bool escaped = false;
+ while (true) {
+ const int c = getLexerImpl(lexer)->source_->getChar();
+ if (c == InputSource::END_OF_STREAM) {
+ token = Token(Token::UNEXPECTED_END);
+ return (NULL);
+ } else if (c == '"') {
+ if (escaped) {
+ // found escaped '"'. overwrite the preceding backslash.
+ assert(!data.empty());
+ escaped = false;
+ data.back() = '"';
+ } else {
+ token = MasterLexer::Token(&data.at(0), data.size(), true);
+ return (NULL);
+ }
+ } else if (c == '\n' && !escaped) {
+ getLexerImpl(lexer)->source_->ungetChar();
+ token = Token(Token::UNBALANCED_QUOTES);
+ return (NULL);
+ } else {
+ escaped = (c == '\\' && !escaped);
+ data.push_back(c);
+ }
+ }
+}
+
} // namespace master_lexer_internal
} // end of namespace dns
diff --git a/src/lib/dns/master_lexer_state.h b/src/lib/dns/master_lexer_state.h
index 1130f3334f..830e60a3bb 100644
--- a/src/lib/dns/master_lexer_state.h
+++ b/src/lib/dns/master_lexer_state.h
@@ -98,7 +98,8 @@ public:
/// a way to get an instance of a specific state.
enum ID {
CRLF, ///< Just seen a carriage-return character
- String ///< Handling a string token
+ String, ///< Handling a string token
+ QString ///< Handling a quoted string token
};
/// \brief Returns a \c State instance of the given state.
diff --git a/src/lib/dns/tests/master_lexer_state_unittest.cc b/src/lib/dns/tests/master_lexer_state_unittest.cc
index 48aceacc3f..5c3be03009 100644
--- a/src/lib/dns/tests/master_lexer_state_unittest.cc
+++ b/src/lib/dns/tests/master_lexer_state_unittest.cc
@@ -32,6 +32,7 @@ protected:
s_null(NULL),
s_crlf(State::getInstance(State::CRLF)),
s_string(State::getInstance(State::String)),
+ s_qstring(State::getInstance(State::QString)),
options(MasterLexer::NONE),
orig_options(options)
{}
@@ -42,6 +43,7 @@ protected:
const State* const s_null;
const State& s_crlf;
const State& s_string;
+ const State& s_qstring;
std::stringstream ss;
MasterLexer::Options options, orig_options;
};
@@ -254,9 +256,10 @@ TEST_F(MasterLexerStateTest, crlf) {
}
void
-stringTokenCheck(const std::string& expected, const MasterLexer::Token& token)
+stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
+ bool quoted = false)
{
- EXPECT_EQ(Token::STRING, token.getType());
+ EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());
EXPECT_EQ(expected, token.getString());
const std::string actual(token.getStringRegion().beg,
token.getStringRegion().beg +
@@ -350,4 +353,84 @@ TEST_F(MasterLexerStateTest, stringEscape) {
stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
}
+TEST_F(MasterLexerStateTest, quotedString) {
+ ss << "\"ignore-quotes\"\n";
+ ss << "\"quoted string\" ";
+ ss << "\"escape\\ in quote\" ";
+ ss << "\"escaped\\\"\" ";
+ ss << "\"escaped backslash\\\\\" ";
+ ss << "\"no;comment\"";
+ lexer.pushSource(ss);
+
+ // by default, '"' doesn't have any special meaning and part of string
+ EXPECT_EQ(&s_string, State::start(lexer, common_options));
+ EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+ stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
+ EXPECT_TRUE(s_string.wasLastEOL(lexer));
+
+ // If QSTRING is specified in option, '"' is regarded as a beginning of
+ // a quoted string.
+ const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("quoted string", s_string.getToken(lexer), true);
+
+ // escape character mostly doesn't have any effect in the qstring
+ // processing
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
+
+ // The only exception is the quotation mark itself. Note that the escape
+ // only works on the quotation mark immediately after it.
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
+
+ // quoted '\' then '"'. Unlike the previous case '"' shouldn't be
+ // escaped.
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
+
+ // ';' has no meaning in a quoted string (not indicating a comment)
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("no;comment", s_string.getToken(lexer), true);
+}
+
+TEST_F(MasterLexerStateTest, brokenQuotedString) {
+ ss << "\"unbalanced-quote\n";
+ ss << "\"quoted\\\n\" ";
+ ss << "\"unclosed quote and EOF";
+ lexer.pushSource(ss);
+
+ // EOL is encountered without closing the quote
+ const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+ EXPECT_EQ(Token::UNBALANCED_QUOTES,
+ s_qstring.getToken(lexer).getErrorCode());
+ // We can resume after the error from the '\n'
+ EXPECT_EQ(s_null, State::start(lexer, options));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+ // \n is okay in a quoted string if escaped
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
+
+ // EOF is encountered without closing the quote
+ EXPECT_EQ(&s_qstring, State::start(lexer, options));
+ EXPECT_EQ(s_null, s_qstring.handle(lexer));
+ ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+ EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
+ // If we continue we'll simply see the EOF
+ EXPECT_EQ(s_null, State::start(lexer, options));
+ EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
}