summaryrefslogtreecommitdiffstats
path: root/src/lib/http/http_message_parser_base.h
blob: 01f9831e2db1b6c061091dc2527aa076b092b1ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
// Copyright (C) 2017-2020 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef HTTP_MESSAGE_PARSER_BASE_H
#define HTTP_MESSAGE_PARSER_BASE_H

#include <exceptions/exceptions.h>
#include <http/http_message.h>
#include <util/state_model.h>
#include <functional>
#include <string>

namespace isc {
namespace http {

/// @brief Exception thrown when an error during parsing HTTP message
/// has occurred.
///
/// The most common errors are due to receiving malformed requests.
class HttpParseError : public Exception {
public:
    HttpParseError(const char* file, size_t line, const char* what) :
        isc::Exception(file, line, what) { };
};

/// @brief Base class for the HTTP message parsers.
///
/// This is a base class for @c HttpRequestParser and @c HttpResponseParser
/// classes. It provides common states, events and functionality for processing
/// the received HTTP messages.
///
/// This class must not be used directly. Instead, an instance of the
/// derived class should be used.
///
/// HTTP uses TCP as a transport which is asynchronous in nature, i.e. the
/// HTTP message is received in chunks and multiple TCP connections can be
/// established at the same time. Multiplexing between these connections
/// requires providing a separate state machine per connection to "remember"
/// the state of each transaction when the parser is waiting for asynchronous
/// data to be delivered. While the parser is waiting for the data, it can
/// parse requests received over other connections. This class provides means
/// for parsing partial data received over the specific connection and
/// interrupting data parsing to switch to a different context.
///
/// A new method @ref HttpMessageParserBase::poll has been created to run the
/// parser's state machine as long as there are unparsed data in the parser's
/// internal buffer. This method returns control to the caller when the parser
/// runs out of data in this buffer. The caller must feed the buffer by calling
/// @ref HttpMessageParserBase::postBuffer and then run
/// @ref HttpMessageParserBase::poll again.
///
/// In case, the caller provides more data than indicated by the "Content-Length"
/// header the parser will return from @c poll() after parsing the data which
/// constitute the HTTP request and not parse the extraneous data. The caller
/// should test the @ref HttpMessageParserBase::needData and
/// @ref HttpMessageParserBase::httpParseOk to determine whether parsing has
/// completed.
///
/// The @ref util::StateModel::runModel must not be used to run the parser
/// state machine, thus it is made private method.
class HttpMessageParserBase : public util::StateModel {
public:

    /// @name States supported by the HttpMessageParserBase.
    ///
    //@{

    /// @brief Parsing successfully completed.
    static const int HTTP_PARSE_OK_ST = SM_DERIVED_STATE_MIN + 1000;

    /// @brief Parsing failed.
    static const int HTTP_PARSE_FAILED_ST = SM_DERIVED_STATE_MIN + 1001;

    //@}

    /// @name Events used during HTTP message parsing.
    ///
    //@{

    /// @brief Chunk of data successfully read and parsed.
    static const int DATA_READ_OK_EVT = SM_DERIVED_EVENT_MIN + 1;

    /// @brief Unable to proceed with parsing until new data is provided.
    static const int NEED_MORE_DATA_EVT = SM_DERIVED_EVENT_MIN + 2;

    /// @brief New data provided and parsing should continue.
    static const int MORE_DATA_PROVIDED_EVT = SM_DERIVED_EVENT_MIN + 3;

    /// @brief Parsing HTTP request successful.
    static const int HTTP_PARSE_OK_EVT = SM_DERIVED_EVENT_MIN + 1000;

    /// @brief Parsing HTTP request failed.
    static const int HTTP_PARSE_FAILED_EVT = SM_DERIVED_EVENT_MIN + 1001;

    //@}

    /// @brief Constructor.
    ///
    /// @param message Reference to the HTTP request or response message.
    HttpMessageParserBase(HttpMessage& message);

    /// @brief Run the parser as long as the amount of data is sufficient.
    ///
    /// The data to be parsed should be provided by calling
    /// @ref HttpMessageParserBase::postBuffer. When the parser reaches the end
    /// of the data buffer the @ref HttpMessageParserBase::poll sets the next
    /// event to @ref NEED_MORE_DATA_EVT and returns. The caller should then invoke
    /// @ref HttpMessageParserBase::postBuffer again to provide more data to the
    /// parser, and call @ref HttpMessageParserBase::poll to continue parsing.
    ///
    /// This method also returns when parsing completes or fails. The last
    /// event can be examined to check whether parsing was successful or not.
    void poll();

    /// @brief Returns true if the parser needs more data to continue.
    ///
    /// @return true if the next event is NEED_MORE_DATA_EVT.
    bool needData() const;

    /// @brief Returns true if the message has been parsed successfully.
    bool httpParseOk() const;

    /// @brief Returns error message.
    std::string getErrorMessage() const {
        return (error_message_);
    }

    /// @brief Provides more input data to the parser.
    ///
    /// This method must be called prior to calling @ref HttpMessageParserBase::poll
    /// to deliver data to be parsed. HTTP messages are received over TCP and
    /// multiple reads may be necessary to retrieve the entire request. There is
    /// no need to accumulate the entire request to start parsing it. A chunk
    /// of data can be provided to the parser using this method and parsed right
    /// away using @ref HttpMessageParserBase::poll.
    ///
    /// @param buf A pointer to the buffer holding the data.
    /// @param buf_size Size of the data within the buffer.
    void postBuffer(const void* buf, const size_t buf_size);

    /// @brief Returns parser's input buffer as string.
    ///
    /// @param limit Maximum length of the buffer to be output. If the limit is 0,
    /// the length of the output is unlimited.
    /// @return Textual representation of the input buffer.
    std::string getBufferAsString(const size_t limit = 0) const;

    /// @brief Formats provided HTTP message for logging.
    ///
    /// This method is useful in cases when there is a need to log a HTTP message
    /// (as text). If the @c limit is specified the message output is limited to
    /// this size. If the @c limit is set to 0 (default), the whole message is
    /// output. The @c getBufferAsString method calls this method internally.
    ///
    /// @param message HTTP message to be logged.
    /// @param limit Maximum length of the buffer to be output. If the limit is 0,
    /// the length of the output is unlimited.
    /// @return HTTP message formatted for logging.
    static std::string logFormatHttpMessage(const std::string& message,
                                            const size_t limit = 0);

private:

    /// @brief Make @ref runModel private to make sure that the caller uses
    /// @ref poll method instead.
    using StateModel::runModel;

protected:

    /// @brief Define events used by the parser.
    virtual void defineEvents();

    /// @brief Verifies events used by the parser.
    virtual void verifyEvents();

    /// @brief Defines states of the parser.
    virtual void defineStates();

    /// @brief Generic parser handler which reads a single byte of data and
    /// parses it using specified callback function.
    ///
    /// This generic handler is used in most of the parser states to parse a
    /// single byte of input data. If there is no more data it simply returns.
    /// Otherwise, if the next event is DATA_READ_OK_EVT or
    /// MORE_DATA_PROVIDED_EVT, it calls the provided callback function to
    /// parse the new byte of data. For all other states it throws an exception.
    ///
    /// @param handler_name Name of the handler function which called this
    /// method.
    /// @param after_read_logic Callback function to parse the byte of data.
    /// This callback function implements state specific logic.
    ///
    /// @throw HttpRequestParserError when invalid event occurred.
    void stateWithReadHandler(const std::string& handler_name,
                              std::function<void(const char c)>
                              after_read_logic);

    /// @brief Generic parser handler which reads multiple bytes of data and
    /// parses it using specified callback function.
    ///
    /// This handler is mostly used for parsing body of the HTTP message,
    /// where we don't validate the content read. Reading multiple bytes
    /// is the most efficient. If there is no more data it simply returns.
    /// Otherwise, if the next event is DATA_READ_OK_EVT or
    /// MORE_DATA_PROVIDED_EVT, it calls the provided callback function to
    /// parse the new byte of data.
    ///
    /// @param handler_name Name of the handler function which called this
    /// method.
    /// @param after_read_logic Callback function to parse multiple bytes of
    /// data. This callback function implements state specific logic.
    ///
    /// @throw HttpRequestParserError when invalid event occurred.
    void stateWithMultiReadHandler(const std::string& handler_name,
                                   std::function<void(const std::string&)>
                                   after_read_logic);

    /// @brief Transition parser to failure state.
    ///
    /// This method transitions the parser to @ref HTTP_PARSE_FAILED_ST and
    /// sets next event to HTTP_PARSE_FAILED_EVT.
    ///
    /// @param error_msg Error message explaining the failure.
    void parseFailure(const std::string& error_msg);

    /// @brief A method called when parsing fails.
    ///
    /// @param explanation Error message explaining the reason for parsing
    /// failure.
    virtual void onModelFailure(const std::string& explanation);

    /// @brief Retrieves next bytes of data from the buffer.
    ///
    /// During normal operation, when there is no more data in the buffer,
    /// the parser sets NEED_MORE_DATA_EVT as next event to signal the need for
    /// calling @ref HttpMessageParserBase::postBuffer.
    ///
    /// @param [out] bytes Reference to the variable where read data should be stored.
    /// @param limit Maximum number of bytes to be read.
    ///
    /// @throw HttpMessageParserBaseError If current event is already set to
    /// NEED_MORE_DATA_EVT or MORE_DATA_PROVIDED_EVT. In the former case, it
    /// indicates that the caller failed to provide new data using
    /// @ref HttpMessageParserBase::postBuffer. The latter case is highly unlikely
    /// as it indicates that no new data were provided but the state of the
    /// parser was changed from NEED_MORE_DATA_EVT or the data were provided
    /// but the data buffer is empty. In both cases, it is an internal server
    /// error.
    void getNextFromBuffer(std::string& bytes, const size_t limit = 1);

    /// @brief This method is called when invalid event occurred in a particular
    /// parser state.
    ///
    /// This method simply throws @ref HttpParseError informing about invalid
    /// event occurring for the particular parser state. The error message
    /// includes the name of the handler in which the exception has been
    /// thrown. It also includes the event which caused the exception.
    ///
    /// @param handler_name Name of the handler in which the exception is
    /// thrown.
    /// @param event An event which caused the exception.
    ///
    /// @throw HttpMessageParserBaseError.
    void invalidEventError(const std::string& handler_name,
                           const unsigned int event);

    /// @brief Handler for HTTP_PARSE_OK_ST and HTTP_PARSE_FAILED_ST.
    ///
    /// If parsing is successful, it calls @ref HttpRequest::create to validate
    /// the HTTP request. In both cases it transitions the parser to the END_ST.
    void parseEndedHandler();

    /// @brief Tries to read next byte from buffer.
    ///
    /// @param [out] next A reference to the variable where read data should be
    /// stored.
    /// @param limit Maximum number of characters to be read.
    ///
    /// @return true if data was successfully read, false otherwise.
    bool popNextFromBuffer(std::string& next, const size_t limit = 1);

    /// @brief Checks if specified value is a character.
    ///
    /// @return true, if specified value is a character.
    bool isChar(const char c) const;

    /// @brief Checks if specified value is a control value.
    ///
    /// @return true, if specified value is a control value.
    bool isCtl(const char c) const;

    /// @brief Checks if specified value is a special character.
    ///
    /// @return true, if specified value is a special character.
    bool isSpecial(const char c) const;

    /// @brief Reference to the parsed HTTP message.
    HttpMessage& message_;

    /// @brief Internal buffer from which parser reads data.
    std::string buffer_;

    /// @brief Position of the next character to read from the buffer.
    size_t buffer_pos_;

    /// @brief Error message set by @ref onModelFailure.
    std::string error_message_;
};

} // end of namespace isc::http
} // end of namespace isc

#endif