00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <cstring>
00012 #include <boost/regex.hpp>
00013 #include <boost/assert.hpp>
00014 #include <boost/logic/tribool.hpp>
00015 #include <boost/algorithm/string.hpp>
00016 #include <pion/algorithm.hpp>
00017 #include <pion/http/parser.hpp>
00018 #include <pion/http/request.hpp>
00019 #include <pion/http/response.hpp>
00020 #include <pion/http/message.hpp>
00021
00022
00023 namespace pion {
00024 namespace http {
00025
00026
00027
00028
00029 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024;
00030 const boost::uint32_t parser::METHOD_MAX = 1024;
00031 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024;
00032 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024;
00033 const boost::uint32_t parser::HEADER_NAME_MAX = 1024;
00034 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024;
00035 const boost::uint32_t parser::QUERY_NAME_MAX = 1024;
00036 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024;
00037 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024;
00038 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024;
00039 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00040 parser::error_category_t * parser::m_error_category_ptr = NULL;
00041 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT;
00042
00043
00044
00045
00046 boost::tribool parser::parse(http::message& http_msg,
00047 boost::system::error_code& ec)
00048 {
00049 BOOST_ASSERT(! eof() );
00050
00051 boost::tribool rc = boost::indeterminate;
00052 std::size_t total_bytes_parsed = 0;
00053
00054 if(http_msg.has_missing_packets()) {
00055 http_msg.set_data_after_missing_packet(true);
00056 }
00057
00058 do {
00059 switch (m_message_parse_state) {
00060
00061 case PARSE_START:
00062 m_message_parse_state = PARSE_HEADERS;
00063
00064
00065
00066 case PARSE_HEADERS:
00067 case PARSE_FOOTERS:
00068 rc = parse_headers(http_msg, ec);
00069 total_bytes_parsed += m_bytes_last_read;
00070
00071 if (rc == true && m_message_parse_state == PARSE_HEADERS) {
00072
00073
00074 rc = finish_header_parsing(http_msg, ec);
00075 }
00076 break;
00077
00078
00079 case PARSE_CHUNKS:
00080 rc = parse_chunks(http_msg.get_chunk_cache(), ec);
00081 total_bytes_parsed += m_bytes_last_read;
00082
00083 if (rc == true && !m_payload_handler) {
00084 http_msg.concatenate_chunks();
00085
00086
00087 rc = ((m_message_parse_state == PARSE_FOOTERS) ?
00088 boost::indeterminate : (boost::tribool)true);
00089 }
00090 break;
00091
00092
00093 case PARSE_CONTENT:
00094 rc = consume_content(http_msg, ec);
00095 total_bytes_parsed += m_bytes_last_read;
00096 break;
00097
00098
00099 case PARSE_CONTENT_NO_LENGTH:
00100 consume_content_as_next_chunk(http_msg.get_chunk_cache());
00101 total_bytes_parsed += m_bytes_last_read;
00102 break;
00103
00104
00105 case PARSE_END:
00106 rc = true;
00107 break;
00108 }
00109 } while ( boost::indeterminate(rc) && ! eof() );
00110
00111
00112 if (rc == true) {
00113 m_message_parse_state = PARSE_END;
00114 finish(http_msg);
00115 } else if(rc == false) {
00116 compute_msg_status(http_msg, false);
00117 }
00118
00119
00120 m_bytes_last_read = total_bytes_parsed;
00121
00122 return rc;
00123 }
00124
00125 boost::tribool parser::parse_missing_data(http::message& http_msg,
00126 std::size_t len, boost::system::error_code& ec)
00127 {
00128 static const char MISSING_DATA_CHAR = 'X';
00129 boost::tribool rc = boost::indeterminate;
00130
00131 http_msg.set_missing_packets(true);
00132
00133 switch (m_message_parse_state) {
00134
00135
00136 case PARSE_START:
00137 case PARSE_HEADERS:
00138 case PARSE_FOOTERS:
00139 set_error(ec, ERROR_MISSING_HEADER_DATA);
00140 rc = false;
00141 break;
00142
00143
00144 case PARSE_CHUNKS:
00145
00146 if (m_chunked_content_parse_state == PARSE_CHUNK
00147 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00148 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00149 {
00150
00151 if (m_payload_handler) {
00152 for (std::size_t n = 0; n < len; ++n)
00153 m_payload_handler(&MISSING_DATA_CHAR, 1);
00154 } else {
00155 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00156 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00157 }
00158
00159 m_bytes_read_in_current_chunk += len;
00160 m_bytes_last_read = len;
00161 m_bytes_total_read += len;
00162 m_bytes_content_read += len;
00163
00164 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00165 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00166 }
00167 } else {
00168
00169 set_error(ec, ERROR_MISSING_CHUNK_DATA);
00170 rc = false;
00171 }
00172 break;
00173
00174
00175 case PARSE_CONTENT:
00176
00177 if (m_bytes_content_remaining == 0) {
00178
00179 rc = true;
00180 } else if (m_bytes_content_remaining < len) {
00181
00182 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00183 rc = false;
00184 } else {
00185
00186
00187 if (m_payload_handler) {
00188 for (std::size_t n = 0; n < len; ++n)
00189 m_payload_handler(&MISSING_DATA_CHAR, 1);
00190 } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
00191
00192 for (std::size_t n = 0; n < len; ++n)
00193 http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00194 } else {
00195 m_bytes_content_read += len;
00196 }
00197
00198 m_bytes_content_remaining -= len;
00199 m_bytes_total_read += len;
00200 m_bytes_last_read = len;
00201
00202 if (m_bytes_content_remaining == 0)
00203 rc = true;
00204 }
00205 break;
00206
00207
00208 case PARSE_CONTENT_NO_LENGTH:
00209
00210 if (m_payload_handler) {
00211 for (std::size_t n = 0; n < len; ++n)
00212 m_payload_handler(&MISSING_DATA_CHAR, 1);
00213 } else {
00214 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00215 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00216 }
00217 m_bytes_last_read = len;
00218 m_bytes_total_read += len;
00219 m_bytes_content_read += len;
00220 break;
00221
00222
00223 case PARSE_END:
00224 rc = true;
00225 break;
00226 }
00227
00228
00229 if (rc == true) {
00230 m_message_parse_state = PARSE_END;
00231 finish(http_msg);
00232 } else if(rc == false) {
00233 compute_msg_status(http_msg, false);
00234 }
00235
00236 return rc;
00237 }
00238
00239 boost::tribool parser::parse_headers(http::message& http_msg,
00240 boost::system::error_code& ec)
00241 {
00242
00243
00244
00245
00246
00247
00248
00249 const char *read_start_ptr = m_read_ptr;
00250 m_bytes_last_read = 0;
00251 while (m_read_ptr < m_read_end_ptr) {
00252
00253 if (m_save_raw_headers)
00254 m_raw_headers += *m_read_ptr;
00255
00256 switch (m_headers_parse_state) {
00257 case PARSE_METHOD_START:
00258
00259 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00260 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00261 set_error(ec, ERROR_METHOD_CHAR);
00262 return false;
00263 }
00264 m_headers_parse_state = PARSE_METHOD;
00265 m_method.erase();
00266 m_method.push_back(*m_read_ptr);
00267 }
00268 break;
00269
00270 case PARSE_METHOD:
00271
00272 if (*m_read_ptr == ' ') {
00273 m_resource.erase();
00274 m_headers_parse_state = PARSE_URI_STEM;
00275 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00276 set_error(ec, ERROR_METHOD_CHAR);
00277 return false;
00278 } else if (m_method.size() >= METHOD_MAX) {
00279 set_error(ec, ERROR_METHOD_SIZE);
00280 return false;
00281 } else {
00282 m_method.push_back(*m_read_ptr);
00283 }
00284 break;
00285
00286 case PARSE_URI_STEM:
00287
00288 if (*m_read_ptr == ' ') {
00289 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00290 } else if (*m_read_ptr == '?') {
00291 m_query_string.erase();
00292 m_headers_parse_state = PARSE_URI_QUERY;
00293 } else if (*m_read_ptr == '\r') {
00294 http_msg.set_version_major(0);
00295 http_msg.set_version_minor(0);
00296 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00297 } else if (*m_read_ptr == '\n') {
00298 http_msg.set_version_major(0);
00299 http_msg.set_version_minor(0);
00300 m_headers_parse_state = PARSE_EXPECTING_CR;
00301 } else if (is_control(*m_read_ptr)) {
00302 set_error(ec, ERROR_URI_CHAR);
00303 return false;
00304 } else if (m_resource.size() >= RESOURCE_MAX) {
00305 set_error(ec, ERROR_URI_SIZE);
00306 return false;
00307 } else {
00308 m_resource.push_back(*m_read_ptr);
00309 }
00310 break;
00311
00312 case PARSE_URI_QUERY:
00313
00314 if (*m_read_ptr == ' ') {
00315 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00316 } else if (*m_read_ptr == '\r') {
00317 http_msg.set_version_major(0);
00318 http_msg.set_version_minor(0);
00319 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00320 } else if (*m_read_ptr == '\n') {
00321 http_msg.set_version_major(0);
00322 http_msg.set_version_minor(0);
00323 m_headers_parse_state = PARSE_EXPECTING_CR;
00324 } else if (is_control(*m_read_ptr)) {
00325 set_error(ec, ERROR_QUERY_CHAR);
00326 return false;
00327 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00328 set_error(ec, ERROR_QUERY_SIZE);
00329 return false;
00330 } else {
00331 m_query_string.push_back(*m_read_ptr);
00332 }
00333 break;
00334
00335 case PARSE_HTTP_VERSION_H:
00336
00337 if (*m_read_ptr == '\r') {
00338
00339 if (! m_is_request) {
00340 set_error(ec, ERROR_VERSION_EMPTY);
00341 return false;
00342 }
00343 http_msg.set_version_major(0);
00344 http_msg.set_version_minor(0);
00345 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00346 } else if (*m_read_ptr == '\n') {
00347
00348 if (! m_is_request) {
00349 set_error(ec, ERROR_VERSION_EMPTY);
00350 return false;
00351 }
00352 http_msg.set_version_major(0);
00353 http_msg.set_version_minor(0);
00354 m_headers_parse_state = PARSE_EXPECTING_CR;
00355 } else if (*m_read_ptr != 'H') {
00356 set_error(ec, ERROR_VERSION_CHAR);
00357 return false;
00358 }
00359 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00360 break;
00361
00362 case PARSE_HTTP_VERSION_T_1:
00363
00364 if (*m_read_ptr != 'T') {
00365 set_error(ec, ERROR_VERSION_CHAR);
00366 return false;
00367 }
00368 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00369 break;
00370
00371 case PARSE_HTTP_VERSION_T_2:
00372
00373 if (*m_read_ptr != 'T') {
00374 set_error(ec, ERROR_VERSION_CHAR);
00375 return false;
00376 }
00377 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00378 break;
00379
00380 case PARSE_HTTP_VERSION_P:
00381
00382 if (*m_read_ptr != 'P') {
00383 set_error(ec, ERROR_VERSION_CHAR);
00384 return false;
00385 }
00386 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00387 break;
00388
00389 case PARSE_HTTP_VERSION_SLASH:
00390
00391 if (*m_read_ptr != '/') {
00392 set_error(ec, ERROR_VERSION_CHAR);
00393 return false;
00394 }
00395 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00396 break;
00397
00398 case PARSE_HTTP_VERSION_MAJOR_START:
00399
00400 if (!is_digit(*m_read_ptr)) {
00401 set_error(ec, ERROR_VERSION_CHAR);
00402 return false;
00403 }
00404 http_msg.set_version_major(*m_read_ptr - '0');
00405 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00406 break;
00407
00408 case PARSE_HTTP_VERSION_MAJOR:
00409
00410 if (*m_read_ptr == '.') {
00411 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00412 } else if (is_digit(*m_read_ptr)) {
00413 http_msg.set_version_major( (http_msg.get_version_major() * 10)
00414 + (*m_read_ptr - '0') );
00415 } else {
00416 set_error(ec, ERROR_VERSION_CHAR);
00417 return false;
00418 }
00419 break;
00420
00421 case PARSE_HTTP_VERSION_MINOR_START:
00422
00423 if (!is_digit(*m_read_ptr)) {
00424 set_error(ec, ERROR_VERSION_CHAR);
00425 return false;
00426 }
00427 http_msg.set_version_minor(*m_read_ptr - '0');
00428 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00429 break;
00430
00431 case PARSE_HTTP_VERSION_MINOR:
00432
00433 if (*m_read_ptr == ' ') {
00434
00435 if (! m_is_request) {
00436 m_headers_parse_state = PARSE_STATUS_CODE_START;
00437 }
00438 } else if (*m_read_ptr == '\r') {
00439
00440 if (! m_is_request) {
00441 set_error(ec, ERROR_STATUS_EMPTY);
00442 return false;
00443 }
00444 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00445 } else if (*m_read_ptr == '\n') {
00446
00447 if (! m_is_request) {
00448 set_error(ec, ERROR_STATUS_EMPTY);
00449 return false;
00450 }
00451 m_headers_parse_state = PARSE_EXPECTING_CR;
00452 } else if (is_digit(*m_read_ptr)) {
00453 http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
00454 + (*m_read_ptr - '0') );
00455 } else {
00456 set_error(ec, ERROR_VERSION_CHAR);
00457 return false;
00458 }
00459 break;
00460
00461 case PARSE_STATUS_CODE_START:
00462
00463 if (!is_digit(*m_read_ptr)) {
00464 set_error(ec, ERROR_STATUS_CHAR);
00465 return false;
00466 }
00467 m_status_code = (*m_read_ptr - '0');
00468 m_headers_parse_state = PARSE_STATUS_CODE;
00469 break;
00470
00471 case PARSE_STATUS_CODE:
00472
00473 if (*m_read_ptr == ' ') {
00474 m_status_message.erase();
00475 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00476 } else if (is_digit(*m_read_ptr)) {
00477 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00478 } else if (*m_read_ptr == '\r') {
00479
00480 m_status_message.erase();
00481 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00482 } else if (*m_read_ptr == '\n') {
00483
00484 m_status_message.erase();
00485 m_headers_parse_state = PARSE_EXPECTING_CR;
00486 } else {
00487 set_error(ec, ERROR_STATUS_CHAR);
00488 return false;
00489 }
00490 break;
00491
00492 case PARSE_STATUS_MESSAGE:
00493
00494 if (*m_read_ptr == '\r') {
00495 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00496 } else if (*m_read_ptr == '\n') {
00497 m_headers_parse_state = PARSE_EXPECTING_CR;
00498 } else if (is_control(*m_read_ptr)) {
00499 set_error(ec, ERROR_STATUS_CHAR);
00500 return false;
00501 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00502 set_error(ec, ERROR_STATUS_CHAR);
00503 return false;
00504 } else {
00505 m_status_message.push_back(*m_read_ptr);
00506 }
00507 break;
00508
00509 case PARSE_EXPECTING_NEWLINE:
00510
00511 if (*m_read_ptr == '\n') {
00512 m_headers_parse_state = PARSE_HEADER_START;
00513 } else if (*m_read_ptr == '\r') {
00514
00515
00516
00517 ++m_read_ptr;
00518 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00519 m_bytes_total_read += m_bytes_last_read;
00520 return true;
00521 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00522 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00523 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00524 set_error(ec, ERROR_HEADER_CHAR);
00525 return false;
00526 } else {
00527
00528 m_header_name.erase();
00529 m_header_name.push_back(*m_read_ptr);
00530 m_headers_parse_state = PARSE_HEADER_NAME;
00531 }
00532 break;
00533
00534 case PARSE_EXPECTING_CR:
00535
00536 if (*m_read_ptr == '\r') {
00537 m_headers_parse_state = PARSE_HEADER_START;
00538 } else if (*m_read_ptr == '\n') {
00539
00540
00541
00542 ++m_read_ptr;
00543 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00544 m_bytes_total_read += m_bytes_last_read;
00545 return true;
00546 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00547 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00548 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00549 set_error(ec, ERROR_HEADER_CHAR);
00550 return false;
00551 } else {
00552
00553 m_header_name.erase();
00554 m_header_name.push_back(*m_read_ptr);
00555 m_headers_parse_state = PARSE_HEADER_NAME;
00556 }
00557 break;
00558
00559 case PARSE_HEADER_WHITESPACE:
00560
00561 if (*m_read_ptr == '\r') {
00562 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00563 } else if (*m_read_ptr == '\n') {
00564 m_headers_parse_state = PARSE_EXPECTING_CR;
00565 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00566 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr))
00567 set_error(ec, ERROR_HEADER_CHAR);
00568 return false;
00569
00570 m_header_name.erase();
00571 m_header_name.push_back(*m_read_ptr);
00572 m_headers_parse_state = PARSE_HEADER_NAME;
00573 }
00574 break;
00575
00576 case PARSE_HEADER_START:
00577
00578 if (*m_read_ptr == '\r') {
00579 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00580 } else if (*m_read_ptr == '\n') {
00581 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00582 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00583 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00584 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00585 set_error(ec, ERROR_HEADER_CHAR);
00586 return false;
00587 } else {
00588
00589 m_header_name.erase();
00590 m_header_name.push_back(*m_read_ptr);
00591 m_headers_parse_state = PARSE_HEADER_NAME;
00592 }
00593 break;
00594
00595 case PARSE_HEADER_NAME:
00596
00597 if (*m_read_ptr == ':') {
00598 m_header_value.erase();
00599 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00600 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00601 set_error(ec, ERROR_HEADER_CHAR);
00602 return false;
00603 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00604 set_error(ec, ERROR_HEADER_NAME_SIZE);
00605 return false;
00606 } else {
00607
00608 m_header_name.push_back(*m_read_ptr);
00609 }
00610 break;
00611
00612 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00613
00614 if (*m_read_ptr == ' ') {
00615 m_headers_parse_state = PARSE_HEADER_VALUE;
00616 } else if (*m_read_ptr == '\r') {
00617 http_msg.add_header(m_header_name, m_header_value);
00618 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00619 } else if (*m_read_ptr == '\n') {
00620 http_msg.add_header(m_header_name, m_header_value);
00621 m_headers_parse_state = PARSE_EXPECTING_CR;
00622 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00623 set_error(ec, ERROR_HEADER_CHAR);
00624 return false;
00625 } else {
00626
00627 m_header_value.push_back(*m_read_ptr);
00628 m_headers_parse_state = PARSE_HEADER_VALUE;
00629 }
00630 break;
00631
00632 case PARSE_HEADER_VALUE:
00633
00634 if (*m_read_ptr == '\r') {
00635 http_msg.add_header(m_header_name, m_header_value);
00636 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00637 } else if (*m_read_ptr == '\n') {
00638 http_msg.add_header(m_header_name, m_header_value);
00639 m_headers_parse_state = PARSE_EXPECTING_CR;
00640 } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
00641
00642
00643
00644
00645
00646
00647 set_error(ec, ERROR_HEADER_CHAR);
00648 return false;
00649 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00650 set_error(ec, ERROR_HEADER_VALUE_SIZE);
00651 return false;
00652 } else {
00653
00654 m_header_value.push_back(*m_read_ptr);
00655 }
00656 break;
00657
00658 case PARSE_EXPECTING_FINAL_NEWLINE:
00659 if (*m_read_ptr == '\n') ++m_read_ptr;
00660 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00661 m_bytes_total_read += m_bytes_last_read;
00662 return true;
00663
00664 case PARSE_EXPECTING_FINAL_CR:
00665 if (*m_read_ptr == '\r') ++m_read_ptr;
00666 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00667 m_bytes_total_read += m_bytes_last_read;
00668 return true;
00669 }
00670
00671 ++m_read_ptr;
00672 }
00673
00674 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00675 m_bytes_total_read += m_bytes_last_read;
00676 return boost::indeterminate;
00677 }
00678
00679 void parser::update_message_with_header_data(http::message& http_msg) const
00680 {
00681 if (is_parsing_request()) {
00682
00683
00684
00685 http::request& http_request(dynamic_cast<http::request&>(http_msg));
00686 http_request.set_method(m_method);
00687 http_request.set_resource(m_resource);
00688 http_request.set_query_string(m_query_string);
00689
00690
00691 if (! m_query_string.empty()) {
00692 if (! parse_url_encoded(http_request.get_queries(),
00693 m_query_string.c_str(),
00694 m_query_string.size()))
00695 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00696 }
00697
00698
00699 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00700 cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
00701 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00702 cookie_iterator != http_request.get_headers().end()
00703 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00704 {
00705 if (! parse_cookie_header(http_request.get_cookies(),
00706 cookie_iterator->second, false) )
00707 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00708 }
00709
00710 } else {
00711
00712
00713
00714 http::response& http_response(dynamic_cast<http::response&>(http_msg));
00715 http_response.set_status_code(m_status_code);
00716 http_response.set_status_message(m_status_message);
00717
00718
00719 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00720 cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
00721 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00722 cookie_iterator != http_response.get_headers().end()
00723 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00724 {
00725 if (! parse_cookie_header(http_response.get_cookies(),
00726 cookie_iterator->second, true) )
00727 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00728 }
00729
00730 }
00731 }
00732
00733 boost::tribool parser::finish_header_parsing(http::message& http_msg,
00734 boost::system::error_code& ec)
00735 {
00736 boost::tribool rc = boost::indeterminate;
00737
00738 m_bytes_content_remaining = m_bytes_content_read = 0;
00739 http_msg.set_content_length(0);
00740 http_msg.update_transfer_encoding_using_header();
00741 update_message_with_header_data(http_msg);
00742
00743 if (http_msg.is_chunked()) {
00744
00745
00746 m_message_parse_state = PARSE_CHUNKS;
00747
00748
00749 if (m_parse_headers_only)
00750 rc = true;
00751
00752 } else if (http_msg.is_content_length_implied()) {
00753
00754
00755 m_message_parse_state = PARSE_END;
00756 rc = true;
00757
00758 } else {
00759
00760
00761 if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
00762
00763
00764 try {
00765 http_msg.update_content_length_using_header();
00766 } catch (...) {
00767 PION_LOG_ERROR(m_logger, "Unable to update content length");
00768 set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
00769 return false;
00770 }
00771
00772
00773 if (http_msg.get_content_length() == 0) {
00774 m_message_parse_state = PARSE_END;
00775 rc = true;
00776 } else {
00777 m_message_parse_state = PARSE_CONTENT;
00778 m_bytes_content_remaining = http_msg.get_content_length();
00779
00780
00781 if (m_bytes_content_remaining > m_max_content_length)
00782 http_msg.set_content_length(m_max_content_length);
00783
00784 if (m_parse_headers_only) {
00785
00786 rc = true;
00787 } else {
00788
00789 http_msg.create_content_buffer();
00790 }
00791 }
00792
00793 } else {
00794
00795
00796
00797
00798 if (! m_is_request) {
00799
00800 http_msg.get_chunk_cache().clear();
00801
00802
00803 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00804
00805
00806 if (m_parse_headers_only)
00807 rc = true;
00808 } else {
00809 m_message_parse_state = PARSE_END;
00810 rc = true;
00811 }
00812 }
00813 }
00814
00815 finished_parsing_headers(ec);
00816
00817 return rc;
00818 }
00819
00820 bool parser::parse_uri(const std::string& uri, std::string& proto,
00821 std::string& host, boost::uint16_t& port,
00822 std::string& path, std::string& query)
00823 {
00824 size_t proto_end = uri.find("://");
00825 size_t proto_len = 0;
00826
00827 if(proto_end != std::string::npos) {
00828 proto = uri.substr(0, proto_end);
00829 proto_len = proto_end + 3;
00830 } else {
00831 proto.clear();
00832 }
00833
00834
00835
00836 size_t server_port_end = uri.find('/', proto_len);
00837 if(server_port_end == std::string::npos) {
00838 return false;
00839 }
00840
00841
00842 std::string t;
00843 t = uri.substr(proto_len, server_port_end - proto_len);
00844 size_t port_pos = t.find(':', 0);
00845
00846
00847
00848 host = t.substr(0, port_pos);
00849 if(host.length() == 0) {
00850 return false;
00851 }
00852
00853
00854 if(port_pos != std::string::npos) {
00855 try {
00856 port = boost::lexical_cast<int>(t.substr(port_pos+1));
00857 } catch (boost::bad_lexical_cast &) {
00858 return false;
00859 }
00860 } else if (proto == "http" || proto == "HTTP") {
00861 port = 80;
00862 } else if (proto == "https" || proto == "HTTPS") {
00863 port = 443;
00864 } else {
00865 port = 0;
00866 }
00867
00868
00869 path = uri.substr(server_port_end);
00870
00871
00872 size_t query_pos = path.find('?', 0);
00873
00874 if(query_pos != std::string::npos) {
00875 query = path.substr(query_pos + 1, path.length() - query_pos - 1);
00876 path = path.substr(0, query_pos);
00877 } else {
00878 query.clear();
00879 }
00880
00881 return true;
00882 }
00883
00884 bool parser::parse_url_encoded(ihash_multimap& dict,
00885 const char *ptr, const size_t len)
00886 {
00887
00888 if (ptr == NULL || len == 0)
00889 return true;
00890
00891
00892 enum QueryParseState {
00893 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00894 } parse_state = QUERY_PARSE_NAME;
00895
00896
00897 const char * const end = ptr + len;
00898 std::string query_name;
00899 std::string query_value;
00900
00901
00902 while (ptr < end) {
00903 switch (parse_state) {
00904
00905 case QUERY_PARSE_NAME:
00906
00907 if (*ptr == '=') {
00908
00909 parse_state = QUERY_PARSE_VALUE;
00910 } else if (*ptr == '&') {
00911
00912 if (! query_name.empty()) {
00913
00914 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00915 query_name.erase();
00916 }
00917 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00918
00919 } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00920
00921 return false;
00922 } else {
00923
00924 query_name.push_back(*ptr);
00925 }
00926 break;
00927
00928 case QUERY_PARSE_VALUE:
00929
00930 if (*ptr == '&') {
00931
00932 if (! query_name.empty()) {
00933 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00934 query_name.erase();
00935 }
00936 query_value.erase();
00937 parse_state = QUERY_PARSE_NAME;
00938 } else if (*ptr == ',') {
00939
00940 if (! query_name.empty())
00941 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00942 query_value.erase();
00943 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00944
00945 } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00946
00947 return false;
00948 } else {
00949
00950 query_value.push_back(*ptr);
00951 }
00952 break;
00953 }
00954
00955 ++ptr;
00956 }
00957
00958
00959 if (! query_name.empty())
00960 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00961
00962 return true;
00963 }
00964
00965 bool parser::parse_multipart_form_data(ihash_multimap& dict,
00966 const std::string& content_type,
00967 const char *ptr, const size_t len)
00968 {
00969
00970 if (ptr == NULL || len == 0)
00971 return true;
00972
00973
00974 std::size_t pos = content_type.find("boundary=");
00975 if (pos == std::string::npos)
00976 return false;
00977 const std::string boundary = std::string("--") + content_type.substr(pos+9);
00978
00979
00980 enum MultiPartParseState {
00981 MP_PARSE_START,
00982 MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
00983 MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
00984 MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
00985 } parse_state = MP_PARSE_START;
00986
00987
00988 std::string header_name;
00989 std::string header_value;
00990 std::string field_name;
00991 std::string field_value;
00992 bool found_parameter = false;
00993 bool save_current_field = true;
00994 const char * const end_ptr = ptr + len;
00995
00996 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
00997
00998 while (ptr != NULL && ptr < end_ptr) {
00999 switch (parse_state) {
01000 case MP_PARSE_START:
01001
01002 header_name.clear();
01003 header_value.clear();
01004 field_name.clear();
01005 field_value.clear();
01006 save_current_field = true;
01007 ptr += boundary.size() - 1;
01008 parse_state = MP_PARSE_HEADER_CR;
01009 break;
01010 case MP_PARSE_HEADER_CR:
01011
01012 if (*ptr == '\r') {
01013
01014 parse_state = MP_PARSE_HEADER_LF;
01015 } else if (*ptr == '\n') {
01016
01017 parse_state = MP_PARSE_HEADER_NAME;
01018 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
01019
01020 return true;
01021 } else return false;
01022 break;
01023 case MP_PARSE_HEADER_LF:
01024
01025 if (*ptr == '\n') {
01026
01027 parse_state = MP_PARSE_HEADER_NAME;
01028 } else return false;
01029 break;
01030 case MP_PARSE_HEADER_NAME:
01031
01032 if (*ptr == '\r' || *ptr == '\n') {
01033 if (header_name.empty()) {
01034
01035 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
01036 } else {
01037
01038 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01039 }
01040 } else if (*ptr == ':') {
01041
01042 parse_state = MP_PARSE_HEADER_SPACE;
01043 } else {
01044
01045 header_name += *ptr;
01046 }
01047 break;
01048 case MP_PARSE_HEADER_SPACE:
01049
01050 if (*ptr == '\r') {
01051
01052 parse_state = MP_PARSE_HEADER_LF;
01053 } else if (*ptr == '\n') {
01054
01055 parse_state = MP_PARSE_HEADER_NAME;
01056 } else if (*ptr != ' ') {
01057
01058 header_value += *ptr;
01059 parse_state = MP_PARSE_HEADER_VALUE;
01060 }
01061
01062 break;
01063 case MP_PARSE_HEADER_VALUE:
01064
01065 if (*ptr == '\r' || *ptr == '\n') {
01066
01067 if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
01068
01069 save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
01070 } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
01071
01072 std::size_t name_pos = header_value.find("name=\"");
01073 if (name_pos != std::string::npos) {
01074 for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
01075 field_name += header_value[name_pos];
01076 }
01077 }
01078 }
01079
01080 header_name.clear();
01081 header_value.clear();
01082 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01083 } else {
01084
01085 header_value += *ptr;
01086 }
01087 break;
01088 case MP_PARSE_HEADER_LAST_LF:
01089
01090 if (*ptr == '\n') {
01091
01092 if (save_current_field && !field_name.empty()) {
01093
01094 parse_state = MP_PARSE_FIELD_DATA;
01095 } else {
01096
01097 parse_state = MP_PARSE_START;
01098 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01099 }
01100 } else return false;
01101 break;
01102 case MP_PARSE_FIELD_DATA:
01103
01104 const char *field_end_ptr = end_ptr;
01105 const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01106 if (next_ptr) {
01107
01108 const char *temp_ptr = next_ptr - 2;
01109 if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
01110 field_end_ptr = temp_ptr;
01111 else field_end_ptr = next_ptr;
01112 }
01113 field_value.assign(ptr, field_end_ptr - ptr);
01114
01115 dict.insert( std::make_pair(field_name, field_value) );
01116 found_parameter = true;
01117
01118 parse_state = MP_PARSE_START;
01119 ptr = next_ptr;
01120 break;
01121 }
01122
01123 if (parse_state != MP_PARSE_START)
01124 ++ptr;
01125 }
01126
01127 return found_parameter;
01128 }
01129
01130 bool parser::parse_cookie_header(ihash_multimap& dict,
01131 const char *ptr, const size_t len,
01132 bool set_cookie_header)
01133 {
01134
01135
01136
01137
01138
01139
01140
01141 enum CookieParseState {
01142 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
01143 } parse_state = COOKIE_PARSE_NAME;
01144
01145
01146 const char * const end = ptr + len;
01147 std::string cookie_name;
01148 std::string cookie_value;
01149 char value_quote_character = '\0';
01150
01151
01152 while (ptr < end) {
01153 switch (parse_state) {
01154
01155 case COOKIE_PARSE_NAME:
01156
01157 if (*ptr == '=') {
01158
01159 value_quote_character = '\0';
01160 parse_state = COOKIE_PARSE_VALUE;
01161 } else if (*ptr == ';' || *ptr == ',') {
01162
01163
01164 if (! cookie_name.empty()) {
01165
01166 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01167 dict.insert( std::make_pair(cookie_name, cookie_value) );
01168 cookie_name.erase();
01169 }
01170 } else if (*ptr != ' ') {
01171
01172 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
01173 return false;
01174
01175 cookie_name.push_back(*ptr);
01176 }
01177 break;
01178
01179 case COOKIE_PARSE_VALUE:
01180
01181 if (value_quote_character == '\0') {
01182
01183 if (*ptr == ';' || *ptr == ',') {
01184
01185 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01186 dict.insert( std::make_pair(cookie_name, cookie_value) );
01187 cookie_name.erase();
01188 cookie_value.erase();
01189 parse_state = COOKIE_PARSE_NAME;
01190 } else if (*ptr == '\'' || *ptr == '"') {
01191 if (cookie_value.empty()) {
01192
01193 value_quote_character = *ptr;
01194 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01195
01196 return false;
01197 } else {
01198
01199 cookie_value.push_back(*ptr);
01200 }
01201 } else if (*ptr != ' ' || !cookie_value.empty()) {
01202
01203 if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
01204 return false;
01205
01206 cookie_value.push_back(*ptr);
01207 }
01208 } else {
01209
01210 if (*ptr == value_quote_character) {
01211
01212 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01213 dict.insert( std::make_pair(cookie_name, cookie_value) );
01214 cookie_name.erase();
01215 cookie_value.erase();
01216 parse_state = COOKIE_PARSE_IGNORE;
01217 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01218
01219 return false;
01220 } else {
01221
01222 cookie_value.push_back(*ptr);
01223 }
01224 }
01225 break;
01226
01227 case COOKIE_PARSE_IGNORE:
01228
01229 if (*ptr == ';' || *ptr == ',')
01230 parse_state = COOKIE_PARSE_NAME;
01231 break;
01232 }
01233
01234 ++ptr;
01235 }
01236
01237
01238 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01239 dict.insert( std::make_pair(cookie_name, cookie_value) );
01240
01241 return true;
01242 }
01243
01244 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks,
01245 boost::system::error_code& ec)
01246 {
01247
01248
01249
01250
01251
01252
01253
01254 const char *read_start_ptr = m_read_ptr;
01255 m_bytes_last_read = 0;
01256 while (m_read_ptr < m_read_end_ptr) {
01257
01258 switch (m_chunked_content_parse_state) {
01259 case PARSE_CHUNK_SIZE_START:
01260
01261 if (is_hex_digit(*m_read_ptr)) {
01262 m_chunk_size_str.erase();
01263 m_chunk_size_str.push_back(*m_read_ptr);
01264 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
01265 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
01266
01267
01268 break;
01269 } else {
01270 set_error(ec, ERROR_CHUNK_CHAR);
01271 return false;
01272 }
01273 break;
01274
01275 case PARSE_CHUNK_SIZE:
01276 if (is_hex_digit(*m_read_ptr)) {
01277 m_chunk_size_str.push_back(*m_read_ptr);
01278 } else if (*m_read_ptr == '\x0D') {
01279 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01280 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01281
01282
01283 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01284 } else if (*m_read_ptr == ';') {
01285
01286
01287 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
01288 } else {
01289 set_error(ec, ERROR_CHUNK_CHAR);
01290 return false;
01291 }
01292 break;
01293
01294 case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
01295 if (*m_read_ptr == '\x0D') {
01296 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01297 }
01298 break;
01299
01300 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01301 if (*m_read_ptr == '\x0D') {
01302 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01303 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01304
01305
01306 break;
01307 } else {
01308 set_error(ec, ERROR_CHUNK_CHAR);
01309 return false;
01310 }
01311 break;
01312
01313 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01314
01315
01316 if (*m_read_ptr == '\x0A') {
01317 m_bytes_read_in_current_chunk = 0;
01318 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01319 if (m_size_of_current_chunk == 0) {
01320 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
01321 } else {
01322 m_chunked_content_parse_state = PARSE_CHUNK;
01323 }
01324 } else {
01325 set_error(ec, ERROR_CHUNK_CHAR);
01326 return false;
01327 }
01328 break;
01329
01330 case PARSE_CHUNK:
01331 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01332 if (m_payload_handler) {
01333 const std::size_t bytes_avail = bytes_available();
01334 const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
01335 const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
01336 m_payload_handler(m_read_ptr, len);
01337 m_bytes_read_in_current_chunk += len;
01338 if (len > 1) m_read_ptr += (len - 1);
01339 } else if (chunks.size() < m_max_content_length) {
01340 chunks.push_back(*m_read_ptr);
01341 m_bytes_read_in_current_chunk++;
01342 }
01343 }
01344 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01345 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01346 }
01347 break;
01348
01349 case PARSE_EXPECTING_CR_AFTER_CHUNK:
01350
01351 if (*m_read_ptr == '\x0D') {
01352 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01353 } else {
01354 set_error(ec, ERROR_CHUNK_CHAR);
01355 return false;
01356 }
01357 break;
01358
01359 case PARSE_EXPECTING_LF_AFTER_CHUNK:
01360
01361 if (*m_read_ptr == '\x0A') {
01362 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01363 } else {
01364 set_error(ec, ERROR_CHUNK_CHAR);
01365 return false;
01366 }
01367 break;
01368
01369 case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
01370
01371 if (*m_read_ptr == '\x0D') {
01372 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01373 } else {
01374
01375
01376 m_message_parse_state = PARSE_FOOTERS;
01377 m_headers_parse_state = PARSE_HEADER_START;
01378 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01379 m_bytes_total_read += m_bytes_last_read;
01380 m_bytes_content_read += m_bytes_last_read;
01381 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01382 return true;
01383 }
01384 break;
01385
01386 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01387
01388 if (*m_read_ptr == '\x0A') {
01389 ++m_read_ptr;
01390 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01391 m_bytes_total_read += m_bytes_last_read;
01392 m_bytes_content_read += m_bytes_last_read;
01393 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01394 return true;
01395 } else {
01396 set_error(ec, ERROR_CHUNK_CHAR);
01397 return false;
01398 }
01399 }
01400
01401 ++m_read_ptr;
01402 }
01403
01404 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01405 m_bytes_total_read += m_bytes_last_read;
01406 m_bytes_content_read += m_bytes_last_read;
01407 return boost::indeterminate;
01408 }
01409
01410 boost::tribool parser::consume_content(http::message& http_msg,
01411 boost::system::error_code& ec)
01412 {
01413 size_t content_bytes_to_read;
01414 size_t content_bytes_available = bytes_available();
01415 boost::tribool rc = boost::indeterminate;
01416
01417 if (m_bytes_content_remaining == 0) {
01418
01419 return true;
01420 } else {
01421 if (content_bytes_available >= m_bytes_content_remaining) {
01422
01423 rc = true;
01424 content_bytes_to_read = m_bytes_content_remaining;
01425 } else {
01426
01427 content_bytes_to_read = content_bytes_available;
01428 }
01429 m_bytes_content_remaining -= content_bytes_to_read;
01430 }
01431
01432
01433 if (m_payload_handler) {
01434 m_payload_handler(m_read_ptr, content_bytes_to_read);
01435 } else if (m_bytes_content_read < m_max_content_length) {
01436 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01437
01438
01439 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr,
01440 m_max_content_length - m_bytes_content_read);
01441 } else {
01442
01443 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01444 }
01445 }
01446
01447 m_read_ptr += content_bytes_to_read;
01448 m_bytes_content_read += content_bytes_to_read;
01449 m_bytes_total_read += content_bytes_to_read;
01450 m_bytes_last_read = content_bytes_to_read;
01451
01452 return rc;
01453 }
01454
01455 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks)
01456 {
01457 if (bytes_available() == 0) {
01458 m_bytes_last_read = 0;
01459 } else {
01460
01461 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01462 if (m_payload_handler) {
01463 m_payload_handler(m_read_ptr, m_bytes_last_read);
01464 m_read_ptr += m_bytes_last_read;
01465 } else {
01466 while (m_read_ptr < m_read_end_ptr) {
01467 if (chunks.size() < m_max_content_length)
01468 chunks.push_back(*m_read_ptr);
01469 ++m_read_ptr;
01470 }
01471 }
01472 m_bytes_total_read += m_bytes_last_read;
01473 m_bytes_content_read += m_bytes_last_read;
01474 }
01475 return m_bytes_last_read;
01476 }
01477
01478 void parser::finish(http::message& http_msg) const
01479 {
01480 switch (m_message_parse_state) {
01481 case PARSE_START:
01482 http_msg.set_is_valid(false);
01483 http_msg.set_content_length(0);
01484 http_msg.create_content_buffer();
01485 return;
01486 case PARSE_END:
01487 http_msg.set_is_valid(true);
01488 break;
01489 case PARSE_HEADERS:
01490 case PARSE_FOOTERS:
01491 http_msg.set_is_valid(false);
01492 update_message_with_header_data(http_msg);
01493 http_msg.set_content_length(0);
01494 http_msg.create_content_buffer();
01495 break;
01496 case PARSE_CONTENT:
01497 http_msg.set_is_valid(false);
01498 if (get_content_bytes_read() < m_max_content_length)
01499 http_msg.set_content_length(get_content_bytes_read());
01500 break;
01501 case PARSE_CHUNKS:
01502 http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01503 if (!m_payload_handler)
01504 http_msg.concatenate_chunks();
01505 break;
01506 case PARSE_CONTENT_NO_LENGTH:
01507 http_msg.set_is_valid(true);
01508 if (!m_payload_handler)
01509 http_msg.concatenate_chunks();
01510 break;
01511 }
01512
01513 compute_msg_status(http_msg, http_msg.is_valid());
01514
01515 if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) {
01516
01517
01518
01519 http::request& http_request(dynamic_cast<http::request&>(http_msg));
01520 const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
01521 if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
01522 http::types::CONTENT_TYPE_URLENCODED) == 0)
01523 {
01524 if (! parse_url_encoded(http_request.get_queries(),
01525 http_request.get_content(),
01526 http_request.get_content_length()))
01527 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
01528 } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
01529 http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
01530 {
01531 if (! parse_multipart_form_data(http_request.get_queries(),
01532 content_type_header,
01533 http_request.get_content(),
01534 http_request.get_content_length()))
01535 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
01536 }
01537 }
01538 }
01539
01540 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
01541 {
01542 http::message::data_status_t st = http::message::STATUS_NONE;
01543
01544 if(http_msg.has_missing_packets()) {
01545 st = http_msg.has_data_after_missing_packets() ?
01546 http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
01547 } else {
01548 st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
01549 }
01550
01551 http_msg.set_status(st);
01552 }
01553
01554 void parser::create_error_category(void)
01555 {
01556 static error_category_t UNIQUE_ERROR_CATEGORY;
01557 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01558 }
01559
01560 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
01561 {
01562
01563 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01564
01570 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01571
01572
01573 if (header.empty())
01574 return false;
01575
01576
01577 boost::match_results<std::string::const_iterator> m;
01578 std::string::const_iterator start_it = header.begin();
01579
01580
01581 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01582
01583 std::string ip_str(m[0].first, m[0].second);
01584
01585 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01586
01587 public_ip = ip_str;
01588 return true;
01589 }
01590
01591 start_it = m[0].second;
01592 }
01593
01594
01595 return false;
01596 }
01597
01598 }
01599 }