summaryrefslogtreecommitdiffstats
path: root/src/lib/dns/master_lexer.h
blob: 33c0567b089bd22ea244c354ec2ea43d12abd879 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

#ifndef MASTER_LEXER_H
#define MASTER_LEXER_H 1

#include <dns/exceptions.h>

#include <istream>
#include <string>

#include <stdint.h>

#include <boost/noncopyable.hpp>

namespace isc {
namespace dns {
namespace master_lexer_internal {
class State;
}

/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
/// (if it has a value) its value.  Essentially, the class provides
/// constructors corresponding to different types of tokens, and corresponding
/// getter methods.  The type and value are fixed at the time of construction
/// and will never be modified throughout the lifetime of the object.
/// The getter methods are still provided to maximize the safety; an
/// application cannot refer to a value that is invalid for the type of token.
///
/// This class is intentionally implemented as copyable and assignable
/// (using the default version of copy constructor and assignment operator),
/// but it's mainly for internal implementation convenience.  Applications will
/// simply refer to Token object as a reference via the \c MasterLexer class.
class MasterToken {
public:
    /// \brief Enumeration for token types
    ///
    /// \note At the time of initial implementation, all numeric tokens
    /// that would be extracted from \c MasterLexer should be represented
    /// as an unsigned 32-bit integer.  If we see the need for larger integers
    /// or negative numbers, we can then extend the token types.
    enum Type {
        END_OF_LINE, ///< End of line detected
        END_OF_FILE, ///< End of file detected
        INITIAL_WS,  ///< White spaces at the beginning of a line after an
                     ///< end of line or at the beginning of file (if asked
                     //   for detecting it)
        NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
                                       /// no-value (type only) types.
                                       /// Mainly for internal use.
        STRING, ///< A single string
        QSTRING, ///< A single string quoted by double-quotes (").
        NUMBER,  ///< A decimal number (unsigned 32-bit)
        ERROR    ///< Error detected in getting a token
    };

    /// \brief Enumeration for lexer error codes
    enum ErrorCode {
        NOT_STARTED, ///< The lexer is just initialized and has no token
        UNBALANCED_PAREN,       ///< Unbalanced parentheses detected
        UNEXPECTED_END, ///< The lexer reaches the end of line or file
                        /// unexpectedly
        UNBALANCED_QUOTES,      ///< Unbalanced quotations detected
        NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
                           /// error and should never get out of the lexer.
        NUMBER_OUT_OF_RANGE, ///< Number was out of range
        BAD_NUMBER,    ///< Number is expected but not recognized
        UNEXPECTED_QUOTES, ///< Unexpected quotes character detected
        MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
                       /// (excluding this one). Mainly for internal use.
    };

    /// \brief A simple representation of a range of a string.
    ///
    /// This is a straightforward pair of the start pointer of a string
    /// and its length.  The \c STRING and \c QSTRING types of tokens
    /// will be primarily represented in this form.
    ///
    /// Any character can be stored in the valid range of the region.
    /// In particular, there can be a nul character (\0) in the middle of
    /// the region.  So the usual string manipulation API may not work
    /// as expected.
    ///
    /// The `MasterLexer` implementation ensures that there are at least
    /// len + 1 bytes of valid memory region starting from beg, and that
    /// beg[len] is \0.  This means the application can use the bytes as a
    /// validly nul-terminated C string if there is no intermediate nul
    /// character.  Note also that due to this property beg is always non
    /// NULL; for an empty string len will be set to 0 and beg[0] is \0.
    struct StringRegion {
        const char* beg;        ///< The start address of the string
        size_t len;             ///< The length of the string in bytes
    };

    /// \brief Constructor for non-value type of token.
    ///
    /// \throw InvalidParameter A value type token is specified.
    /// \param type The type of the token.  It must indicate a non-value
    /// type (not larger than \c NOVALUE_TYPE_MAX).
    explicit MasterToken(Type type) : type_(type) {
        if (type > NOVALUE_TYPE_MAX) {
            isc_throw(InvalidParameter, "Token per-type constructor "
                      "called with invalid type: " << type);
        }
    }

    /// \brief Constructor for string and quoted-string types of token.
    ///
    /// The optional \c quoted parameter specifies whether it's a quoted or
    /// non quoted string.
    ///
    /// The string is specified as a pair of a pointer to the start address
    /// and its length.  Any character can be contained in any position of
    /// the valid range (see \c StringRegion).
    ///
    /// When it's a quoted string, the quotation marks must be excluded
    /// from the specified range.
    ///
    /// \param str_beg The start address of the string
    /// \param str_len The size of the string in bytes
    /// \param quoted true if it's a quoted string; false otherwise.
    MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
        type_(quoted ? QSTRING : STRING)
    {
        val_.str_region_.beg = str_beg;
        val_.str_region_.len = str_len;
    }

    /// \brief Constructor for number type of token.
    ///
    /// \brief number An unsigned 32-bit integer corresponding to the token
    /// value.
    explicit MasterToken(uint32_t number) : type_(NUMBER) {
        val_.number_ = number;
    }

    /// \brief Constructor for error type of token.
    ///
    /// \throw InvalidParameter Invalid error code value is specified.
    /// \brief error_code A pre-defined constant of \c ErrorCode.
    explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
        if (!(error_code < MAX_ERROR_CODE)) {
            isc_throw(InvalidParameter, "Invalid master lexer error code: "
                      << error_code);
        }
        val_.error_code_ = error_code;
    }

    /// \brief Return the token type.
    ///
    /// \throw none
    Type getType() const { return (type_); }

    /// \brief Return the value of a string-variant token.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \return A reference to \c StringRegion corresponding to the string
    ///         token value.
    const StringRegion& getStringRegion() const {
        if (type_ != STRING && type_ != QSTRING) {
            isc_throw(InvalidOperation,
                      "Token::getStringRegion() for non string-variant type");
        }
        return (val_.str_region_);
    }

    /// \brief Return the value of a string-variant token as a string object.
    ///
    /// Note that the underlying string may contain a nul (\0) character
    /// in the middle.  The returned string object will contain all characters
    /// of the valid range of the underlying string.  So some string
    /// operations such as c_str() may not work as expected.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    /// \return A std::string object corresponding to the string token value.
    std::string getString() const {
        std::string ret;
        getString(ret);
        return (ret);
    }

    /// \brief Fill in a string with the value of a string-variant token.
    ///
    /// This is similar to the other version of \c getString(), but
    /// the caller is supposed to pass a placeholder string object.
    /// This will be more efficient if the caller uses the same
    /// \c MasterLexer repeatedly and needs to get string token in the
    /// form of a string object many times as this version could reuse
    /// the existing internal storage of the passed string.
    ///
    /// Any existing content of the passed string will be removed.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    ///
    /// \param ret A string object to be filled with the token string.
    void getString(std::string& ret) const {
        if (type_ != STRING && type_ != QSTRING) {
            isc_throw(InvalidOperation,
                      "Token::getString() for non string-variant type");
        }
        ret.assign(val_.str_region_.beg,
                   val_.str_region_.beg + val_.str_region_.len);
    }

    /// \brief Return the value of a string-variant token as a string object.
    ///
    /// \throw InvalidOperation Called on a non number type of token.
    /// \return The integer corresponding to the number token value.
    uint32_t getNumber() const {
        if (type_ != NUMBER) {
            isc_throw(InvalidOperation,
                      "Token::getNumber() for non number type");
        }
        return (val_.number_);
    }

    /// \brief Return the error code of a error type token.
    ///
    /// \throw InvalidOperation Called on a non error type of token.
    /// \return The error code of the token.
    ErrorCode getErrorCode() const {
        if (type_ != ERROR) {
            isc_throw(InvalidOperation,
                      "Token::getErrorCode() for non error type");
        }
        return (val_.error_code_);
    };

    /// \brief Return a textual description of the error of a error type token.
    ///
    /// The returned string would be useful to produce a log message when
    /// a zone file parser encounters an error.
    ///
    /// \throw InvalidOperation Called on a non error type of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    /// \return A string object that describes the meaning of the error.
    std::string getErrorText() const;

private:
    Type type_;    // this is not const so the class can be assignable

    // We use a union to represent different types of token values via the
    // unified Token class.  The class integrity should ensure valid operation
    // on the union; getter methods should only refer to the member set at
    // the construction.
    union {
        StringRegion str_region_;
        uint32_t number_;
        ErrorCode error_code_;
    } val_;
};

/// \brief Tokenizer for parsing DNS master files.
///
/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
/// master files.  It understands some special rules of master files as
/// defined in RFC 1035, such as comments, character escaping, and multi-line
/// data, and provides the user application with the actual data in a
/// more convenient form such as a std::string object.
///
/// In order to support the $INCLUDE notation, this class is designed to be
/// able to operate on multiple files or input streams in the nested way.
/// The \c pushSource() and \c popSource() methods correspond to the push
/// and pop operations.
///
/// While this class is public, it is less likely to be used by normal
/// applications; it's mainly expected to be used within this library,
/// specifically by the \c MasterLoader class and \c Rdata implementation
/// classes.
///
/// \note The error handling policy of this class is slightly different from
/// that of other classes of this library.  We generally throw an exception
/// for an invalid input, whether it's more likely to be a program error or
/// a "user error", which means an invalid input that comes from outside of
/// the library.  But, this class returns an error code for some certain
/// types of user errors instead of throwing an exception.  Such cases include
/// a syntax error identified by the lexer or a misspelled file name that
/// causes a system error at the time of open.  This is based on the assumption
/// that the main user of this class is a parser of master files, where
/// we want to give an option to ignore some non fatal errors and continue
/// the parsing.  This will be useful if it just performs overall error
/// checks on a master file.  When the (immediate) caller needs to do explicit
/// error handling, exceptions are not that a useful tool for error reporting
/// because we cannot separate the normal and error cases anyway, which would
/// be one major advantage when we use exceptions.  And, exceptions are
/// generally more expensive, either when it happens or just by being able
/// to handle with \c try and \c catch (depending on the underlying
/// implementation of the exception handling).  For these reasons, some of
/// this class does not throw for an error that would be reported as an
/// exception in other classes.
class MasterLexer : public boost::noncopyable {
    friend class master_lexer_internal::State;
public:
    /// \brief Exception thrown when we fail to read from the input
    /// stream or file.
    class ReadError : public Unexpected {
    public:
        ReadError(const char* file, size_t line, const char* what) :
            Unexpected(file, line, what)
        {}
    };

    /// \brief Exception thrown from a wrapper version of
    /// \c MasterLexer::getNextToken() for non fatal errors.
    ///
    /// See the method description for more details.
    ///
    /// The \c token_ member variable (read-only) is set to a \c MasterToken
    /// object of type ERROR indicating the reason for the error.
    class LexerError : public isc::dns::Exception {
    public:
        LexerError(const char* file, size_t line, MasterToken error_token) :
            isc::dns::Exception(file, line, error_token.getErrorText().c_str()),
            token_(error_token)
        {}
        const MasterToken token_;
    };

    /// \brief Special value for input source size meaning "unknown".
    ///
    /// This constant value will be used as a return value of
    /// \c getTotalSourceSize() when the size of one of the pushed sources
    /// is unknown.  Note that this value itself is a valid integer in the
    /// range of the type, so there's still a small possibility of
    /// ambiguity.  In practice, however, the value should be sufficiently
    /// large that should eliminate the possibility.
    static const size_t SOURCE_SIZE_UNKNOWN;

    /// \brief Options for getNextToken.
    ///
    /// A compound option, indicating multiple options are set, can be
    /// specified using the logical OR operator (operator|()).
    enum Options {
        NONE = 0,               ///< No option
        INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
                        ///< end-of-line
        QSTRING = 2,    ///< recognize quoted string
        NUMBER = 4   ///< recognize numeric text as integer
    };

    /// \brief The constructor.
    ///
    /// \throw std::bad_alloc Internal resource allocation fails (rare case).
    MasterLexer();

    /// \brief The destructor.
    ///
    /// It internally closes any remaining input sources.
    ~MasterLexer();

    /// \brief Open a file and make it the current input source of MasterLexer.
    ///
    /// The opened file can be explicitly closed by the \c popSource() method;
    /// if \c popSource() is not called within the lifetime of the
    /// \c MasterLexer, it will be closed in the destructor.
    ///
    /// In the case possible system errors in opening the file (most likely
    /// because of specifying a non-existent or unreadable file), it returns
    /// false, and if the optional \c error parameter is non NULL, it will be
    /// set to a description of the error (any existing content of the string
    /// will be discarded).  If opening the file succeeds, the given
    /// \c error parameter will be intact.
    ///
    /// Note that this method has two styles of error reporting: one by
    /// returning \c false (and setting \c error optionally) and the other
    /// by throwing an exception.  See the note for the class description
    /// about the distinction.
    ///
    /// \throw InvalidParameter filename is NULL
    /// \param filename A non NULL string specifying a master file
    /// \param error If non null, a placeholder to set error description in
    /// case of failure.
    ///
    /// \return true if pushing the file succeeds; false otherwise.
    bool pushSource(const char* filename, std::string* error = NULL);

    /// \brief Make the given stream the current input source of MasterLexer.
    ///
    /// The caller still holds the ownership of the passed stream; it's the
    /// caller's responsibility to keep it valid as long as it's used in
    /// \c MasterLexer or to release any resource for the stream after that.
    /// The caller can explicitly tell \c MasterLexer to stop using the
    /// stream by calling the \c popSource() method.
    ///
    /// The data in \c input must be complete at the time of this call.
    /// The behavior of the lexer is undefined if the caller builds or adds
    /// data in \c input after pushing it.
    ///
    /// Except for rare case system errors such as memory allocation failure,
    /// this method is generally expected to be exception free.  However,
    /// it can still throw if it encounters an unexpected failure when it
    /// tries to identify the "size" of the input source (see
    /// \c getTotalSourceSize()).  It's an unexpected result unless the
    /// caller intentionally passes a broken stream; otherwise it would mean
    /// some system-dependent unexpected behavior or possibly an internal bug.
    /// In these cases it throws an \c Unexpected exception.  Note that
    /// this version of the method doesn't return a boolean unlike the
    /// other version that takes a file name; since this failure is really
    /// unexpected and can be critical, it doesn't make sense to give the
    /// caller an option to continue (other than by explicitly catching the
    /// exception).
    ///
    /// \throw Unexpected An unexpected failure happens in initialization.
    ///
    /// \param input An input stream object that produces textual
    /// representation of DNS RRs.
    void pushSource(std::istream& input);

    /// \brief Stop using the most recently opened input source (file or
    /// stream).
    ///
    /// If it's a file, the previously opened file will be closed internally.
    /// If it's a stream, \c MasterLexer will simply stop using
    /// the stream; the caller can assume it will be never used in
    /// \c MasterLexer thereafter.
    ///
    /// This method must not be called when there is no source pushed for
    /// \c MasterLexer.  This method is otherwise exception free.
    ///
    /// \throw isc::InvalidOperation Called with no pushed source.
    void popSource();

    /// \brief Get number of sources inside the lexer.
    ///
    /// This method never throws.
    size_t getSourceCount() const;

    /// \brief Return the name of the current input source name.
    ///
    /// If it's a file, it will be the C string given at the corresponding
    /// \c pushSource() call, that is, its filename.  If it's a stream, it will
    /// be formatted as \c "stream-%p" where \c %p is hex representation
    /// of the address of the stream object.
    ///
    /// If there is no opened source at the time of the call, this method
    /// returns an empty string.
    ///
    /// \throw std::bad_alloc Resource allocation failed for string
    /// construction (rare case)
    ///
    /// \return A string representation of the current source (see the
    /// description)
    std::string getSourceName() const;

    /// \brief Return the input source line number.
    ///
    /// If there is an opened source, the return value will be a non-0
    /// integer indicating the line number of the current source where
    /// the \c MasterLexer is currently working.  The expected usage of
    /// this value is to print a helpful error message when parsing fails
    /// by specifically identifying the position of the error.
    ///
    /// If there is no opened source at the time of the call, this method
    /// returns 0.
    ///
    /// \throw None
    ///
    /// \return The current line number of the source (see the description)
    size_t getSourceLine() const;

    /// \brief Return the total size of pushed sources.
    ///
    /// This method returns the sum of the size of sources that have been
    /// pushed to the lexer by the time of the call.  It would give the
    /// caller some hint about the amount of data the lexer is working on.
    ///
    /// The size of a normal file is equal to the file size at the time of
    /// the source is pushed.  The size of other type of input stream is
    /// the size of the data available in the stream at the time of the
    /// source is pushed.
    ///
    /// In some special cases, it's possible that the size of the file or
    /// stream is unknown.  It happens, for example, if the standard input
    /// is associated with a pipe from the output of another process and it's
    /// specified as an input source.  If the size of some of the pushed
    /// source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
    ///
    /// The total size won't change when a source is popped.  So the return
    /// values of this method will monotonically increase or
    /// \c SOURCE_SIZE_UNKNOWN; once it returns \c SOURCE_SIZE_UNKNOWN,
    /// any subsequent call will also result in that value, by the above
    /// definition.
    ///
    /// Before pushing any source, it returns 0.
    ///
    /// \throw None
    size_t getTotalSourceSize() const;

    /// \brief Return the position of lexer in the pushed sources so far.
    ///
    /// This method returns the position in terms of the number of recognized
    /// characters from all sources that have been pushed by the time of the
    /// call.  Conceptually, the position in a single source is the offset
    /// from the beginning of the file or stream to the current "read cursor"
    /// of the lexer.  The return value of this method is the sum of the
    /// positions in all the pushed sources.  If any of the sources has
    /// already been popped, the position of the source at the time of the
    /// pop operation will be used for the calculation.
    ///
    /// If the lexer reaches the end for each of all the pushed sources,
    /// the return value should be equal to that of \c getTotalSourceSize().
    /// It's generally expected that a source is popped when the lexer
    /// reaches the end of the source.  So, when the application of this
    /// class parses all contents of all sources, possibly with multiple
    /// pushes and pops, the return value of this method and
    /// \c getTotalSourceSize() should be identical (unless the latter
    /// returns SOURCE_SIZE_UNKNOWN).  But this is not necessarily
    /// guaranteed as the application can pop a source in the middle of
    /// parsing it.
    ///
    /// Before pushing any source, it returns 0.
    ///
    /// The return values of this method and \c getTotalSourceSize() would
    /// give the caller an idea of the progress of the lexer at the time of
    /// the call.  Note, however, that since it's not predictable whether
    /// more sources will be pushed after the call, the progress determined
    /// this way may not make much sense; it can only give an informational
    /// hint of the progress.
    ///
    /// Note that the conceptual "read cursor" would move backward after a
    /// call to \c ungetToken(), in which case this method will return a
    /// smaller value.  That is, unlike \c getTotalSourceSize(), return
    /// values of this method may not always monotonically increase.
    ///
    /// \throw None
    size_t getPosition() const;

    /// \brief Parse and return another token from the input.
    ///
    /// It reads a bit of the last opened source and produces another token
    /// found in it.
    ///
    /// This method does not provide the strong exception guarantee. Generally,
    /// if it throws, the object should not be used any more and should be
    /// discarded. It was decided all the exceptions thrown from here are
    /// serious enough that aborting the loading process is the only reasonable
    /// recovery anyway, so the strong exception guarantee is not needed.
    ///
    /// \param options The options can be used to modify the tokenization.
    ///     The method can be made reporting things which are usually ignored
    ///     by this parameter. Multiple options can be passed at once by
    ///     bitwise or (eg. option1 | option 2). See description of available
    ///     options.
    /// \return Next token found in the input. Note that the token refers to
    ///     some internal data in the lexer. It is valid only until
    ///     getNextToken or ungetToken is called. Also, the token becomes
    ///     invalid when the lexer is destroyed.
    /// \throw isc::InvalidOperation in case the source is not available. This
    ///     may mean the pushSource() has not been called yet, or that the
    ///     current source has been read past the end.
    /// \throw ReadError in case there's problem reading from the underlying
    ///     source (eg. I/O error in the file on the disk).
    /// \throw std::bad_alloc in case allocation of some internal resources
    ///     or the token fail.
    const MasterToken& getNextToken(Options options = NONE);

    /// \brief Parse the input for the expected type of token.
    ///
    /// This method is a wrapper of the other version, customized for the case
    /// where a particular type of token is expected as the next one.
    /// More specifically, it's intended to be used to get tokens for RDATA
    /// fields.  Since most RDATA types of fixed format, the token type is
    /// often predictable and the method interface can be simplified.
    ///
    /// This method basically works as follows: it gets the type of the
    /// expected token, calls the other version of \c getNextToken(Options),
    /// and returns the token if it's of the expected type (due to the usage
    /// assumption this should be normally the case).  There are some non
    /// trivial details though:
    ///
    /// - If the expected type is MasterToken::QSTRING, both quoted and
    ///   unquoted strings are recognized and returned.
    /// - A string with quotation marks is not recognized as a
    /// - MasterToken::STRING. You have to get it as a
    /// - MasterToken::QSTRING.
    /// - If the optional \c eol_ok parameter is \c true (very rare case),
    ///   MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
    ///   and returned if they are found instead of the expected type of
    ///   token.
    /// - If the next token is not of the expected type (including the case
    ///   a number is expected but it's out of range), ungetToken() is
    ///   internally called so the caller can re-read that token.
    /// - If other types or errors (such as unbalanced parentheses) are
    ///   detected, the erroneous part isn't "ungotten"; the caller can
    ///   continue parsing after that part.
    ///
    /// In some very rare cases where the RDATA has an optional trailing field,
    /// the \c eol_ok parameter would be set to \c true.  This way the caller
    /// can handle both cases (the field does or does not exist) by a single
    /// call to this method.  In all other cases \c eol_ok should be set to
    /// \c false, and that is the default and can be omitted.
    ///
    /// Unlike the other version of \c getNextToken(Options), this method
    /// throws an exception of type \c LexerError for non fatal errors such as
    /// broken syntax or encountering an unexpected type of token.  This way
    /// the caller can write RDATA parser code without bothering to handle
    /// errors for each field.  For example, pseudo parser code for MX RDATA
    /// would look like this:
    /// \code
    ///    const uint32_t pref =
    ///        lexer.getNextToken(MasterToken::NUMBER).getNumber();
    ///    // check if pref is the uint16_t range; no other check is needed.
    ///    const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
    /// \endcode
    ///
    /// In the case where \c LexerError exception is thrown, it's expected
    /// to be handled comprehensively for the parser of the RDATA or at a
    /// higher layer.  The \c token_ member variable of the corresponding
    /// \c LexerError exception object stores a token of type
    /// \c MasterToken::ERROR that indicates the reason for the error.
    ///
    /// Due to the specific intended usage of this method, only a subset
    /// of \c MasterToken::Type values are acceptable for the \c expect
    /// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
    /// \c MasterToken::NUMBER.  Specifying other values will result in
    /// an \c InvalidParameter exception.
    ///
    /// \throw InvalidParameter The expected token type is not allowed for
    /// this method.
    /// \throw LexerError The lexer finds non fatal error or it finds an
    /// \throw other Anything the other version of getNextToken() can throw.
    ///
    /// \param expect Expected type of token.  Must be either STRING, QSTRING,
    /// or NUMBER.
    /// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
    /// \return The expected type of token.
    const MasterToken& getNextToken(MasterToken::Type expect,
                                    bool eol_ok = false);

    /// \brief Return the last token back to the lexer.
    ///
    /// The method undoes the lasts call to getNextToken(). If you call the
    /// getNextToken() again with the same options, it'll return the same
    /// token. If the options are different, it may return a different token,
    /// but it acts as if the previous getNextToken() was never called.
    ///
    /// It is possible to return only one token back in time (you can't call
    /// ungetToken() twice in a row without calling getNextToken() in between
    /// successfully).
    ///
    /// It does not work after change of source (by pushSource or popSource).
    ///
    /// \throw isc::InvalidOperation If called second time in a row or if
    ///     getNextToken() was not called since the last change of the source.
    void ungetToken();

private:
    struct MasterLexerImpl;
    MasterLexerImpl* impl_;
};

/// \brief Operator to combine \c MasterLexer options
///
/// This is a trivial shortcut so that compound options can be specified
/// in an intuitive way.
inline MasterLexer::Options
operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
    return (static_cast<MasterLexer::Options>(
                static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
}

} // namespace dns
} // namespace isc
#endif  // MASTER_LEXER_H

// Local Variables:
// mode: c++
// End: