summaryrefslogtreecommitdiffstats
path: root/src/lib/dns
diff options
context:
space:
mode:
authorJINMEI Tatuya <jinmei@isc.org>2012-11-13 22:12:20 +0100
committerJINMEI Tatuya <jinmei@isc.org>2012-11-13 22:12:20 +0100
commit9a11ef62ae36d9d891e87ba792fa249ae82f0736 (patch)
tree5add7ee45614d0a9962e5a14e7654245bf6f31dd /src/lib/dns
parent[master] Merge branch 'trac2463' (diff)
parent[2372] clarified that INITIAL_WS is recognized only after an EOL. (diff)
downloadkea-9a11ef62ae36d9d891e87ba792fa249ae82f0736.tar.xz
kea-9a11ef62ae36d9d891e87ba792fa249ae82f0736.zip
[master] Merge branch 'trac2372'
Diffstat (limited to 'src/lib/dns')
-rw-r--r--src/lib/dns/Makefile.am1
-rw-r--r--src/lib/dns/master_lexer.cc170
-rw-r--r--src/lib/dns/master_lexer.h29
-rw-r--r--src/lib/dns/master_lexer_state.h138
-rw-r--r--src/lib/dns/tests/Makefile.am1
-rw-r--r--src/lib/dns/tests/master_lexer_state_unittest.cc256
6 files changed, 592 insertions, 3 deletions
diff --git a/src/lib/dns/Makefile.am b/src/lib/dns/Makefile.am
index e81ef76a8d..14b74f7d27 100644
--- a/src/lib/dns/Makefile.am
+++ b/src/lib/dns/Makefile.am
@@ -97,6 +97,7 @@ libb10_dns___la_SOURCES += master_lexer_inputsource.h master_lexer_inputsource.c
libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
libb10_dns___la_SOURCES += masterload.h masterload.cc
libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
+libb10_dns___la_SOURCES += master_lexer_state.h
libb10_dns___la_SOURCES += message.h message.cc
libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
libb10_dns___la_SOURCES += name.h name.cc
diff --git a/src/lib/dns/master_lexer.cc b/src/lib/dns/master_lexer.cc
index c9c5528995..992a051048 100644
--- a/src/lib/dns/master_lexer.cc
+++ b/src/lib/dns/master_lexer.cc
@@ -16,6 +16,7 @@
#include <dns/master_lexer.h>
#include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
#include <boost/shared_ptr.hpp>
@@ -32,10 +33,34 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
using namespace master_lexer_internal;
struct MasterLexer::MasterLexerImpl {
- MasterLexerImpl() : token_(Token::NOT_STARTED) {}
+ MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
+ paren_count_(0), last_was_eol_(false)
+ {}
+
+ // A helper method to skip possible comments toward the end of EOL or EOF.
+ // commonly used by state classes. It returns the corresponding "end-of"
+ // character in case it's a comment; otherwise it simply returns the
+ // current character.
+ int skipComment(int c) {
+ if (c == ';') {
+ while (true) {
+ c = source_->getChar();
+ if (c == '\n' || c == InputSource::END_OF_STREAM) {
+ return (c);
+ }
+ }
+ }
+ return (c);
+ }
std::vector<InputSourcePtr> sources_;
- Token token_;
+ InputSource* source_; // current source (NULL if sources_ is empty)
+ Token token_; // currently recognized token (set by a state)
+
+ // These are used in states, and defined here only as a placeholder.
+ // The main lexer class does not need these members.
+ size_t paren_count_; // nest count of the parentheses
+ bool last_was_eol_; // whether the lexer just passed an end-of-line
};
MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
@@ -60,12 +85,14 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
return (false);
}
+ impl_->source_ = impl_->sources_.back().get();
return (true);
}
void
MasterLexer::pushSource(std::istream& input) {
impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
+ impl_->source_ = impl_->sources_.back().get();
}
void
@@ -75,6 +102,8 @@ MasterLexer::popSource() {
"MasterLexer::popSource on an empty source");
}
impl_->sources_.pop_back();
+ impl_->source_ = impl_->sources_.empty() ? NULL :
+ impl_->sources_.back().get();
}
std::string
@@ -115,5 +144,142 @@ MasterLexer::Token::getErrorText() const {
return (error_text[val_.error_code_]);
}
+namespace master_lexer_internal {
+// Below we implement state classes for state transitions of MasterLexer.
+// Note that these need to be defined here so that they can refer to
+// the details of MasterLexerImpl.
+
+typedef MasterLexer::Token Token; // convenience shortcut
+
+bool
+State::wasLastEOL(const MasterLexer& lexer) const {
+ return (lexer.impl_->last_was_eol_);
+}
+
+const MasterLexer::Token&
+State::getToken(const MasterLexer& lexer) const {
+ return (lexer.impl_->token_);
+}
+
+size_t
+State::getParenCount(const MasterLexer& lexer) const {
+ return (lexer.impl_->paren_count_);
+}
+
+namespace {
+class CRLF : public State {
+public:
+ CRLF() {}
+ virtual const State* handle(MasterLexer& lexer) const {
+ // We've just seen '\r'. If this is part of a sequence of '\r\n',
+ // we combine them as a single END-OF-LINE. Otherwise we treat the
+ // single '\r' as an EOL and continue tokeniziation from the character
+ // immediately after '\r'. One tricky case is that there's a comment
+ // between '\r' and '\n'. This implementation combines these
+ // characters and treats them as a single EOL (the behavior derived
+ // from BIND 9). Technically this may not be correct, but in practice
+ // the caller wouldn't distinguish this case from the case it has
+ // two EOLs, so we simplify the process.
+ const int c = getLexerImpl(lexer)->skipComment(
+ getLexerImpl(lexer)->source_->getChar());
+ if (c != '\n') {
+ getLexerImpl(lexer)->source_->ungetChar();
+ }
+ getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+ getLexerImpl(lexer)->last_was_eol_ = true;
+ return (NULL);
+ }
+};
+
+// Currently this is provided mostly as a place holder
+class String : public State {
+public:
+ String() {}
+ virtual const State* handle(MasterLexer& /*lexer*/) const {
+ return (NULL);
+ }
+};
+
+// We use a common instance of a each state in a singleton-like way to save
+// construction overhead. They are not singletons in its strict sense as
+// we don't prohibit direct construction of these objects. But that doesn't
+// matter much anyway, because the definitions are completely hidden within
+// this file.
+const CRLF CRLF_STATE;
+const String STRING_STATE;
+}
+
+const State&
+State::getInstance(ID state_id) {
+ switch (state_id) {
+ case CRLF:
+ return (CRLF_STATE);
+ case String:
+ return (STRING_STATE);
+ }
+
+ // This is a bug of the caller, and this method is only expected to be
+ // used by tests, so we just forcefully make it fail by asserting the
+ // condition.
+ assert(false);
+ return (STRING_STATE); // a dummy return, to silence some compilers.
+}
+
+const State*
+State::start(MasterLexer& lexer, MasterLexer::Options options) {
+ // define some shortcuts
+ MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
+ size_t& paren_count = lexerimpl.paren_count_;
+
+ while (true) {
+ const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
+ if (c == InputSource::END_OF_STREAM) {
+ lexerimpl.last_was_eol_ = false;
+ if (paren_count != 0) {
+ lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+ paren_count = 0; // reset to 0; this helps in lenient mode.
+ return (NULL);
+ }
+ lexerimpl.token_ = Token(Token::END_OF_FILE);
+ return (NULL);
+ } else if (c == ' ' || c == '\t') {
+ // If requested and we are not in (), recognize the initial space.
+ if (lexerimpl.last_was_eol_ && paren_count == 0 &&
+ (options & MasterLexer::INITIAL_WS) != 0) {
+ lexerimpl.last_was_eol_ = false;
+ lexerimpl.token_ = Token(Token::INITIAL_WS);
+ return (NULL);
+ }
+ } else if (c == '\n') {
+ lexerimpl.last_was_eol_ = true;
+ if (paren_count == 0) { // we don't recognize EOL if we are in ()
+ lexerimpl.token_ = Token(Token::END_OF_LINE);
+ return (NULL);
+ }
+ } else if (c == '\r') {
+ if (paren_count == 0) { // check if we are in () (see above)
+ return (&CRLF_STATE);
+ }
+ } else if (c == '(') {
+ lexerimpl.last_was_eol_ = false;
+ ++paren_count;
+ } else if (c == ')') {
+ lexerimpl.last_was_eol_ = false;
+ if (paren_count == 0) {
+ lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+ return (NULL);
+ }
+ --paren_count;
+ } else {
+ // Note: in #2373 we should probably ungetChar().
+ lexerimpl.last_was_eol_ = false;
+ return (&STRING_STATE);
+ }
+ // no code should be here; we just continue the loop.
+ }
+}
+
+} // namespace master_lexer_internal
+
} // end of namespace dns
} // end of namespace isc
diff --git a/src/lib/dns/master_lexer.h b/src/lib/dns/master_lexer.h
index da6bb5dee2..854d602e03 100644
--- a/src/lib/dns/master_lexer.h
+++ b/src/lib/dns/master_lexer.h
@@ -24,6 +24,9 @@
namespace isc {
namespace dns {
+namespace master_lexer_internal {
+class State;
+}
/// \brief Tokenizer for parsing DNS master files.
///
@@ -64,9 +67,22 @@ namespace dns {
/// this class does not throw for an error that would be reported as an
/// exception in other classes.
class MasterLexer {
+ friend class master_lexer_internal::State;
public:
class Token; // we define it separately for better readability
+ /// \brief Options for getNextToken.
+ ///
+ /// A compound option, indicating multiple options are set, can be
+ /// specified using the logical OR operator (operator|()).
+ enum Options {
+ NONE = 0, ///< No option
+ INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
+ ///< end-of-line
+ QSTRING = 2, ///< recognize quoted string
+ NUMBER = 4 ///< recognize numeric text as integer
+ };
+
/// \brief The constructor.
///
/// \throw std::bad_alloc Internal resource allocation fails (rare case).
@@ -167,6 +183,16 @@ private:
MasterLexerImpl* impl_;
};
+/// \brief Operator to combine \c MasterLexer options
+///
+/// This is a trivial shortcut so that compound options can be specified
+/// in an intuitive way.
+inline MasterLexer::Options
+operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
+ return (static_cast<MasterLexer::Options>(
+ static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
+}
+
/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
@@ -192,7 +218,8 @@ public:
enum Type {
END_OF_LINE, ///< End of line detected (if asked for detecting it)
END_OF_FILE, ///< End of file detected (if asked for detecting it)
- INITIAL_WS, ///< White spaces at the beginning of a line
+ INITIAL_WS, ///< White spaces at the beginning of a line after an
+ ///< end of line
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
/// no-value (type only) types.
/// Mainly for internal use.
diff --git a/src/lib/dns/master_lexer_state.h b/src/lib/dns/master_lexer_state.h
new file mode 100644
index 0000000000..86957c5e3f
--- /dev/null
+++ b/src/lib/dns/master_lexer_state.h
@@ -0,0 +1,138 @@
+// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef MASTER_LEXER_STATE_H
+#define MASTER_LEXER_STATE_H 1
+
+#include <dns/master_lexer.h>
+
+namespace isc {
+namespace dns {
+
+namespace master_lexer_internal {
+
+/// \brief Tokenization state for \c MasterLexer.
+///
+/// This is a base class of classes that represent various states of a single
+/// tokenization session of \c MasterLexer, i.e., the states used for a
+/// single call to \c MasterLexer::getNextToken().
+///
+/// It follows the convention of the state design pattern: each derived class
+/// corresponds to a specific state, and the state transition takes place
+/// through the virtual method named \c handle(). The \c handle() method
+/// takes the main \c MasterLexer object that holds all necessary internal
+/// context, and updates it as necessary; each \c State derived class is
+/// completely stateless.
+///
+/// The initial transition takes place in a static method of the base class,
+/// \c start(). This is mainly for implementation convenience; we need to
+/// pass options given to \c MasterLexer::getNextToken() for the initial
+/// state, so it makes more sense to separate the interface for the transition
+/// from the initial state.
+///
+/// When an object of a specific state class completes the session, it
+/// normally sets the identified token in the lexer, and returns NULL;
+/// if more transition is necessary, it returns a pointer to the next state
+/// object.
+///
+/// As is usual in the state design pattern, the \c State class is made
+/// a friend class of \c MasterLexer and can refer to its internal details.
+/// This is intentional; essentially its a part of \c MasterLexer and
+/// is defined as a separate class only for implementation clarity and better
+/// testability. It's defined in a publicly visible header, but that's only
+/// for testing purposes. No normal application or even no other classes of
+/// this library are expected to use this class.
+class State {
+public:
+ /// \brief Begin state transitions to get the next token.
+ ///
+ /// This is the first method that \c MasterLexer needs to call for a
+ /// tokenization session. The lexer passes a reference to itself
+ /// and options given in \c getNextToken().
+ ///
+ /// \throw InputSource::ReadError Unexpected I/O error
+ /// \throw std::bad_alloc Internal resource allocation failure
+ ///
+ /// \param lexer The lexer object that holds the main context.
+ /// \param options The options passed to getNextToken().
+ /// \return A pointer to the next state object or NULL if the transition
+ /// is completed.
+ static const State* start(MasterLexer& lexer,
+ MasterLexer::Options options);
+
+ /// \brief Handle the process of one specific state.
+ ///
+ /// This method is expected to be called on the object returned by
+ /// start(), and keep called on the returned object until NULL is
+ /// returned. The call chain will form the complete state transition.
+ ///
+ /// \throw InputSource::ReadError Unexpected I/O error
+ /// \throw std::bad_alloc Internal resource allocation failure
+ ///
+ /// \param lexer The lexer object that holds the main context.
+ /// \return A pointer to the next state object or NULL if the transition
+ /// is completed.
+ virtual const State* handle(MasterLexer& lexer) const = 0;
+
+ /// \brief Types of states.
+ ///
+ /// Specific states are basically hidden within the implementation,
+ /// but we'd like to allow tests to examine them, so we provide
+ /// a way to get an instance of a specific state.
+ enum ID {
+ CRLF, ///< Just seen a carriage-return character
+ String ///< Handling a string token
+ };
+
+ /// \brief Returns a \c State instance of the given state.
+ ///
+ /// This is provided only for testing purposes so tests can check
+ /// the behavior of each state separately. \c MasterLexer shouldn't
+ /// need this method.
+ static const State& getInstance(ID state_id);
+
+ /// \name Read-only accessors for testing purposes.
+ ///
+ /// These allow tests to inspect some selected portion of the internal
+ /// states of \c MasterLexer. These shouldn't be used except for testing
+ /// purposes.
+ ///@{
+ bool wasLastEOL(const MasterLexer& lexer) const;
+ const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
+ size_t getParenCount(const MasterLexer& lexer) const;
+ ///@}
+
+protected:
+ /// \brief An accessor to the internal implementation class of
+ /// \c MasterLexer.
+ ///
+ /// This is provided for specific derived classes as they are not direct
+ /// friends of \c MasterLexer.
+ ///
+ /// \param lexer The lexer object that holds the main context.
+ /// \return A pointer to the implementation class object of the given
+ /// lexer. This is never NULL.
+ MasterLexer::MasterLexerImpl* getLexerImpl(MasterLexer& lexer) const {
+ return (lexer.impl_);
+ }
+};
+
+} // namespace master_lexer_internal
+} // namespace dns
+} // namespace isc
+#endif // MASTER_LEXER_STATE_H
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/dns/tests/Makefile.am b/src/lib/dns/tests/Makefile.am
index d5adc21e8f..33867da0c2 100644
--- a/src/lib/dns/tests/Makefile.am
+++ b/src/lib/dns/tests/Makefile.am
@@ -27,6 +27,7 @@ run_unittests_SOURCES += labelsequence_unittest.cc
run_unittests_SOURCES += messagerenderer_unittest.cc
run_unittests_SOURCES += master_lexer_token_unittest.cc
run_unittests_SOURCES += master_lexer_unittest.cc
+run_unittests_SOURCES += master_lexer_state_unittest.cc
run_unittests_SOURCES += name_unittest.cc
run_unittests_SOURCES += nsec3hash_unittest.cc
run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc
diff --git a/src/lib/dns/tests/master_lexer_state_unittest.cc b/src/lib/dns/tests/master_lexer_state_unittest.cc
new file mode 100644
index 0000000000..bcee7fd1ec
--- /dev/null
+++ b/src/lib/dns/tests/master_lexer_state_unittest.cc
@@ -0,0 +1,256 @@
+// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <dns/master_lexer.h>
+#include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+using namespace isc::dns;
+using namespace master_lexer_internal;
+
+namespace {
+typedef MasterLexer::Token Token; // shortcut
+
+class MasterLexerStateTest : public ::testing::Test {
+protected:
+ MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
+ s_null(NULL),
+ s_crlf(State::getInstance(State::CRLF)),
+ s_string(State::getInstance(State::String)),
+ options(MasterLexer::NONE),
+ orig_options(options)
+ {}
+
+ // Specify INITIAL_WS as common initial options.
+ const MasterLexer::Options common_options;
+ MasterLexer lexer;
+ const State* const s_null;
+ const State& s_crlf;
+ const State& s_string;
+ std::stringstream ss;
+ MasterLexer::Options options, orig_options;
+};
+
+// Common check for the end-of-file condition.
+// Token is set to END_OF_FILE, and the lexer was NOT last eol state.
+// Passed state can be any valid one; they are stateless, just providing the
+// interface for inspection.
+void
+eofCheck(const State& state, MasterLexer& lexer) {
+ EXPECT_EQ(Token::END_OF_FILE, state.getToken(lexer).getType());
+ EXPECT_FALSE(state.wasLastEOL(lexer));
+}
+
+TEST_F(MasterLexerStateTest, startAndEnd) {
+ // A simple case: the input is empty, so we begin with start and
+ // are immediately done.
+ lexer.pushSource(ss);
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, startToEOL) {
+ ss << "\n";
+ lexer.pushSource(ss);
+
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+ // The next lexer session will reach EOF. Same eof check should pass.
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, space) {
+ // repeat '\t\n' twice (see below), then space after EOL
+ ss << " \t\n\t\n ";
+ lexer.pushSource(ss);
+
+ // by default space characters and tabs will be ignored. We check this
+ // twice; at the second iteration, it's a white space at the beginning
+ // of line, but since we don't specify INITIAL_WS option, it's treated as
+ // normal space and ignored.
+ for (size_t i = 0; i < 2; ++i) {
+ EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
+ EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+ }
+
+ // Now we specify the INITIAL_WS option. It will be recognized and the
+ // corresponding token will be returned.
+ EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
+ EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+ EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, parentheses) {
+ ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
+ lexer.pushSource(ss);
+
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
+
+ // Now handle '('. It skips \n and recognize 'a' as string
+ EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
+ EXPECT_EQ(&s_string, State::start(lexer, common_options));
+ EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
+ EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+ // skip 'a' (note: until #2373 it's actually skipped as part of the '('
+ // handling)
+ s_string.handle(lexer);
+
+ // Then handle ')'. '\n' before ')' isn't recognized because
+ // it's canceled due to the '('. Likewise, the space after the '\n'
+ // shouldn't be recognized but should be just ignored.
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+
+ // Now, temporarily disabled options are restored: Both EOL and the
+ // initial WS are recognized
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, nestedParentheses) {
+ // This is an unusual, but allowed (in this implementation) case.
+ ss << "(a(b)\n c)\n ";
+ lexer.pushSource(ss);
+
+ EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+ s_string.handle(lexer); // consume 'a'
+ EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+ s_string.handle(lexer); // consume 'b'
+ EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
+
+ // Close the inner most parentheses. count will be decreased, but option
+ // shouldn't be restored yet, so the intermediate EOL or initial WS won't
+ // be recognized.
+ EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
+ s_string.handle(lexer); // consume 'c'
+ EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+
+ // Close the outermost parentheses. count will be reset to 0, and original
+ // options are restored.
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+
+ // Now, temporarily disabled options are restored: Both EOL and the
+ // initial WS are recognized
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, unbalancedParentheses) {
+ // Only closing paren is provided. We prepend a \n to check if it's
+ // correctly canceled after detecting the error.
+ ss << "\n)";
+ ss << "(a";
+ lexer.pushSource(ss);
+
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
+ EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
+
+ // Now checking ')'. The result should be error, count shouldn't be
+ // changed. "last EOL" should be canceled.
+ EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+ ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+ EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+ EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+ // Reach EOF with a dangling open parenthesis.
+ EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+ s_string.handle(lexer); // consume 'a'
+ EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // reach EOF
+ ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+ EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+ EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
+}
+
+TEST_F(MasterLexerStateTest, startToComment) {
+ // Begin with 'start', skip space, then encounter a comment. Skip
+ // the rest of the line, and recognize the new line. Note that the
+ // second ';' is simply ignored.
+ ss << " ;a;\n";
+ ss << ";a;"; // Likewise, but the comment ends with EOF.
+ lexer.pushSource(ss);
+
+ // Comment ending with EOL
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+ // Comment ending with EOF
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, commentAfterParen) {
+ // comment after an opening parenthesis. The code that is tested by
+ // other tests should also ensure that it works correctly, but we
+ // check it explicitly.
+ ss << "( ;this is a comment\na)\n";
+ lexer.pushSource(ss);
+
+ // consume '(', skip comments, consume 'a', then consume ')'
+ EXPECT_EQ(&s_string, State::start(lexer, common_options));
+ s_string.handle(lexer);
+ EXPECT_EQ(s_null, State::start(lexer, common_options));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, crlf) {
+ ss << "\r\n"; // case 1
+ ss << "\r "; // case 2
+ ss << "\r;comment\na"; // case 3
+ ss << "\r"; // case 4
+ lexer.pushSource(ss);
+
+ // 1. A sequence of \r, \n is recognized as a single 'end-of-line'
+ EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+ EXPECT_EQ(s_null, s_crlf.handle(lexer)); // recognize '\n'
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+ EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+
+ // 2. Single '\r' (not followed by \n) is recognized as a single
+ // 'end-of-line'. then there will be "initial WS"
+ EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+ // see ' ', "unget" it
+ EXPECT_EQ(s_null, s_crlf.handle(lexer));
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
+ EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+
+ // 3. comment between \r and \n
+ EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+ // skip comments, recognize '\n'
+ EXPECT_EQ(s_null, s_crlf.handle(lexer));
+ EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+ EXPECT_EQ(&s_string, State::start(lexer, common_options));
+
+ // 4. \r then EOF
+ EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+ // see EOF, then "unget" it
+ EXPECT_EQ(s_null, s_crlf.handle(lexer));
+ EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize EOF
+ EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+}