00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __PION_HTTP_PARSER_HEADER__
00011 #define __PION_HTTP_PARSER_HEADER__
00012
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/function/function2.hpp>
00016 #include <boost/logic/tribool.hpp>
00017 #include <boost/system/error_code.hpp>
00018 #include <boost/thread/once.hpp>
00019 #include <pion/config.hpp>
00020 #include <pion/logger.hpp>
00021 #include <pion/http/message.hpp>
00022
00023 #ifndef BOOST_SYSTEM_NOEXCEPT
00024 #define BOOST_SYSTEM_NOEXCEPT BOOST_NOEXCEPT
00025 #endif
00026
00027
00028 namespace pion {
00029 namespace http {
00030
00031
00032
00033 class request;
00034 class response;
00035
00039 class PION_API parser :
00040 private boost::noncopyable
00041 {
00042
00043 public:
00044
00046 static const std::size_t DEFAULT_CONTENT_MAX;
00047
00049 typedef boost::function2<void, const char *, std::size_t> payload_handler_t;
00050
00052 enum error_value_t {
00053 ERROR_METHOD_CHAR = 1,
00054 ERROR_METHOD_SIZE,
00055 ERROR_URI_CHAR,
00056 ERROR_URI_SIZE,
00057 ERROR_QUERY_CHAR,
00058 ERROR_QUERY_SIZE,
00059 ERROR_VERSION_EMPTY,
00060 ERROR_VERSION_CHAR,
00061 ERROR_STATUS_EMPTY,
00062 ERROR_STATUS_CHAR,
00063 ERROR_HEADER_CHAR,
00064 ERROR_HEADER_NAME_SIZE,
00065 ERROR_HEADER_VALUE_SIZE,
00066 ERROR_INVALID_CONTENT_LENGTH,
00067 ERROR_CHUNK_CHAR,
00068 ERROR_MISSING_CHUNK_DATA,
00069 ERROR_MISSING_HEADER_DATA,
00070 ERROR_MISSING_TOO_MUCH_CONTENT,
00071 };
00072
00074 class error_category_t
00075 : public boost::system::error_category
00076 {
00077 public:
00078 const char *name() const BOOST_SYSTEM_NOEXCEPT { return "parser"; }
00079 std::string message(int ev) const {
00080 switch (ev) {
00081 case ERROR_METHOD_CHAR:
00082 return "invalid method character";
00083 case ERROR_METHOD_SIZE:
00084 return "method exceeds maximum size";
00085 case ERROR_URI_CHAR:
00086 return "invalid URI character";
00087 case ERROR_URI_SIZE:
00088 return "method exceeds maximum size";
00089 case ERROR_QUERY_CHAR:
00090 return "invalid query string character";
00091 case ERROR_QUERY_SIZE:
00092 return "query string exceeds maximum size";
00093 case ERROR_VERSION_EMPTY:
00094 return "HTTP version undefined";
00095 case ERROR_VERSION_CHAR:
00096 return "invalid version character";
00097 case ERROR_STATUS_EMPTY:
00098 return "HTTP status undefined";
00099 case ERROR_STATUS_CHAR:
00100 return "invalid status character";
00101 case ERROR_HEADER_CHAR:
00102 return "invalid header character";
00103 case ERROR_HEADER_NAME_SIZE:
00104 return "header name exceeds maximum size";
00105 case ERROR_HEADER_VALUE_SIZE:
00106 return "header value exceeds maximum size";
00107 case ERROR_INVALID_CONTENT_LENGTH:
00108 return "invalid Content-Length header";
00109 case ERROR_CHUNK_CHAR:
00110 return "invalid chunk character";
00111 case ERROR_MISSING_HEADER_DATA:
00112 return "missing header data";
00113 case ERROR_MISSING_CHUNK_DATA:
00114 return "missing chunk data";
00115 case ERROR_MISSING_TOO_MUCH_CONTENT:
00116 return "missing too much content";
00117 }
00118 return "parser error";
00119 }
00120 };
00121
00129 parser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00130 : m_logger(PION_GET_LOGGER("pion.http.parser")), m_is_request(is_request),
00131 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00132 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00133 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00134 m_bytes_content_remaining(0), m_bytes_content_read(0),
00135 m_bytes_last_read(0), m_bytes_total_read(0),
00136 m_max_content_length(max_content_length),
00137 m_parse_headers_only(false), m_save_raw_headers(false)
00138 {}
00139
00141 virtual ~parser() {}
00142
00154 boost::tribool parse(http::message& http_msg, boost::system::error_code& ec);
00155
00168 boost::tribool parse_missing_data(http::message& http_msg, std::size_t len,
00169 boost::system::error_code& ec);
00170
00176 void finish(http::message& http_msg) const;
00177
00184 inline void set_read_buffer(const char *ptr, size_t len) {
00185 m_read_ptr = ptr;
00186 m_read_end_ptr = ptr + len;
00187 }
00188
00195 inline void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const {
00196 read_ptr = m_read_ptr;
00197 read_end_ptr = m_read_end_ptr;
00198 }
00199
00208 inline bool check_premature_eof(http::message& http_msg) {
00209 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00210 return true;
00211 m_message_parse_state = PARSE_END;
00212 http_msg.concatenate_chunks();
00213 finish(http_msg);
00214 return false;
00215 }
00216
00222 inline void parse_headers_only(bool b = true) { m_parse_headers_only = b; }
00223
00229 inline void skip_header_parsing(http::message& http_msg) {
00230 boost::system::error_code ec;
00231 finish_header_parsing(http_msg, ec);
00232 }
00233
00235 inline void reset(void) {
00236 m_message_parse_state = PARSE_START;
00237 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00238 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00239 m_status_code = 0;
00240 m_status_message.erase();
00241 m_method.erase();
00242 m_resource.erase();
00243 m_query_string.erase();
00244 m_raw_headers.erase();
00245 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00246 }
00247
00249 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00250
00252 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
00253
00255 inline std::size_t gcount(void) const { return m_bytes_last_read; }
00256
00258 inline std::size_t get_total_bytes_read(void) const { return m_bytes_total_read; }
00259
00261 inline std::size_t get_content_bytes_read(void) const { return m_bytes_content_read; }
00262
00264 inline std::size_t get_max_content_length(void) const { return m_max_content_length; }
00265
00267 inline const std::string& get_raw_headers(void) const { return m_raw_headers; }
00268
00270 inline bool get_save_raw_headers(void) const { return m_save_raw_headers; }
00271
00273 inline bool get_parse_headers_only(void) { return m_parse_headers_only; }
00274
00276 inline bool is_parsing_request(void) const { return m_is_request; }
00277
00279 inline bool is_parsing_response(void) const { return ! m_is_request; }
00280
00282 inline void set_payload_handler(payload_handler_t& h) { m_payload_handler = h; }
00283
00285 inline void set_max_content_length(std::size_t n) { m_max_content_length = n; }
00286
00288 inline void reset_max_content_length(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00289
00291 inline void set_save_raw_headers(bool b) { m_save_raw_headers = b; }
00292
00294 inline void set_logger(logger log_ptr) { m_logger = log_ptr; }
00295
00297 inline logger get_logger(void) { return m_logger; }
00298
00299
00312 static bool parse_uri(const std::string& uri, std::string& proto,
00313 std::string& host, boost::uint16_t& port, std::string& path,
00314 std::string& query);
00315
00326 static bool parse_url_encoded(ihash_multimap& dict,
00327 const char *ptr, const std::size_t len);
00328
00340 static bool parse_multipart_form_data(ihash_multimap& dict,
00341 const std::string& content_type,
00342 const char *ptr, const std::size_t len);
00343
00355 static bool parse_cookie_header(ihash_multimap& dict,
00356 const char *ptr, const std::size_t len,
00357 bool set_cookie_header);
00358
00369 static inline bool parse_cookie_header(ihash_multimap& dict,
00370 const std::string& cookie_header, bool set_cookie_header)
00371 {
00372 return parse_cookie_header(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00373 }
00374
00384 static inline bool parse_url_encoded(ihash_multimap& dict,
00385 const std::string& query)
00386 {
00387 return parse_url_encoded(dict, query.c_str(), query.size());
00388 }
00389
00400 static inline bool parse_multipart_form_data(ihash_multimap& dict,
00401 const std::string& content_type,
00402 const std::string& form_data)
00403 {
00404 return parse_multipart_form_data(dict, content_type, form_data.c_str(), form_data.size());
00405 }
00406
00419 boost::tribool finish_header_parsing(http::message& http_msg,
00420 boost::system::error_code& ec);
00421
00431 static bool parse_forwarded_for(const std::string& header, std::string& public_ip);
00432
00434 static inline error_category_t& get_error_category(void) {
00435 boost::call_once(parser::create_error_category, m_instance_flag);
00436 return *m_error_category_ptr;
00437 }
00438
00439
00440 protected:
00441
00443 virtual void finished_parsing_headers(const boost::system::error_code& ec) {}
00444
00457 boost::tribool parse_headers(http::message& http_msg, boost::system::error_code& ec);
00458
00464 void update_message_with_header_data(http::message& http_msg) const;
00465
00477 boost::tribool parse_chunks(http::message::chunk_cache_t& chunk_buffers,
00478 boost::system::error_code& ec);
00479
00491 boost::tribool consume_content(http::message& http_msg,
00492 boost::system::error_code& ec);
00493
00501 std::size_t consume_content_as_next_chunk(http::message::chunk_cache_t& chunk_buffers);
00502
00508 static void compute_msg_status(http::message& http_msg, bool msg_parsed_ok);
00509
00516 static inline void set_error(boost::system::error_code& ec, error_value_t ev) {
00517 ec = boost::system::error_code(static_cast<int>(ev), get_error_category());
00518 }
00519
00521 static void create_error_category(void);
00522
00523
00524
00525 inline static bool is_char(int c);
00526 inline static bool is_control(int c);
00527 inline static bool is_special(int c);
00528 inline static bool is_digit(int c);
00529 inline static bool is_hex_digit(int c);
00530 inline static bool is_cookie_attribute(const std::string& name, bool set_cookie_header);
00531
00532
00534 static const boost::uint32_t STATUS_MESSAGE_MAX;
00535
00537 static const boost::uint32_t METHOD_MAX;
00538
00540 static const boost::uint32_t RESOURCE_MAX;
00541
00543 static const boost::uint32_t QUERY_STRING_MAX;
00544
00546 static const boost::uint32_t HEADER_NAME_MAX;
00547
00549 static const boost::uint32_t HEADER_VALUE_MAX;
00550
00552 static const boost::uint32_t QUERY_NAME_MAX;
00553
00555 static const boost::uint32_t QUERY_VALUE_MAX;
00556
00558 static const boost::uint32_t COOKIE_NAME_MAX;
00559
00561 static const boost::uint32_t COOKIE_VALUE_MAX;
00562
00563
00565 mutable logger m_logger;
00566
00568 const bool m_is_request;
00569
00571 const char * m_read_ptr;
00572
00574 const char * m_read_end_ptr;
00575
00576
00577 private:
00578
00580 enum message_parse_state_t {
00581 PARSE_START, PARSE_HEADERS, PARSE_FOOTERS, PARSE_CONTENT,
00582 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00583 };
00584
00587 enum header_parse_state_t {
00588 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00589 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00590 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00591 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00592 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00593 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00594 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00595 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00596 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00597 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00598 };
00599
00602 enum chunk_parse_state_t {
00603 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
00604 PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE,
00605 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00606 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
00607 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00608 PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK,
00609 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00610 };
00611
00612
00614 message_parse_state_t m_message_parse_state;
00615
00617 header_parse_state_t m_headers_parse_state;
00618
00620 chunk_parse_state_t m_chunked_content_parse_state;
00621
00623 payload_handler_t m_payload_handler;
00624
00626 boost::uint16_t m_status_code;
00627
00629 std::string m_status_message;
00630
00632 std::string m_method;
00633
00635 std::string m_resource;
00636
00638 std::string m_query_string;
00639
00641 std::string m_raw_headers;
00642
00644 std::string m_header_name;
00645
00647 std::string m_header_value;
00648
00650 std::string m_chunk_size_str;
00651
00653 std::size_t m_size_of_current_chunk;
00654
00656 std::size_t m_bytes_read_in_current_chunk;
00657
00659 std::size_t m_bytes_content_remaining;
00660
00662 std::size_t m_bytes_content_read;
00663
00665 std::size_t m_bytes_last_read;
00666
00668 std::size_t m_bytes_total_read;
00669
00671 std::size_t m_max_content_length;
00672
00674 bool m_parse_headers_only;
00675
00677 bool m_save_raw_headers;
00678
00680 static error_category_t * m_error_category_ptr;
00681
00683 static boost::once_flag m_instance_flag;
00684 };
00685
00686
00687
00688
00689 inline bool parser::is_char(int c)
00690 {
00691 return(c >= 0 && c <= 127);
00692 }
00693
00694 inline bool parser::is_control(int c)
00695 {
00696 return( (c >= 0 && c <= 31) || c == 127);
00697 }
00698
00699 inline bool parser::is_special(int c)
00700 {
00701 switch (c) {
00702 case '(': case ')': case '<': case '>': case '@':
00703 case ',': case ';': case ':': case '\\': case '"':
00704 case '/': case '[': case ']': case '?': case '=':
00705 case '{': case '}': case ' ': case '\t':
00706 return true;
00707 default:
00708 return false;
00709 }
00710 }
00711
00712 inline bool parser::is_digit(int c)
00713 {
00714 return(c >= '0' && c <= '9');
00715 }
00716
00717 inline bool parser::is_hex_digit(int c)
00718 {
00719 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00720 }
00721
00722 inline bool parser::is_cookie_attribute(const std::string& name, bool set_cookie_header)
00723 {
00724 return (name.empty() || name[0] == '$' || (set_cookie_header &&
00725 (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00726 ) );
00727 }
00728
00729 }
00730 }
00731
00732 #endif