Files
crossdesk/thirdparty/websocketpp/include/websocketpp/http/parser.hpp
2023-08-21 17:31:44 +08:00

630 lines
19 KiB
C++

/*
* Copyright (c) 2014, Peter Thorson. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the WebSocket++ Project nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef HTTP_PARSER_HPP
#define HTTP_PARSER_HPP
#include <algorithm>
#include <map>
#include <string>
#include <utility>
#include <websocketpp/utilities.hpp>
#include <websocketpp/http/constants.hpp>
namespace websocketpp {
namespace http {
namespace parser {
namespace state {
enum value {
method,
resource,
version,
headers
};
}
namespace body_encoding {
enum value {
unknown,
plain,
chunked
};
}
typedef std::map<std::string, std::string, utility::ci_less > header_list;
/// Read and return the next token in the stream
/**
* Read until a non-token character is found and then return the token and
* iterator to the next character to read
*
* @param begin An iterator to the beginning of the sequence
* @param end An iterator to the end of the sequence
* @return A pair containing the token and an iterator to the next character in
* the stream
*/
template <typename InputIterator>
std::pair<std::string,InputIterator> extract_token(InputIterator begin,
InputIterator end)
{
InputIterator it = std::find_if(begin,end,&is_not_token_char);
return std::make_pair(std::string(begin,it),it);
}
/// Read and return the next quoted string in the stream
/**
* Read a double quoted string starting at `begin`. The quotes themselves are
* stripped. The quoted value is returned along with an iterator to the next
* character to read
*
* @param begin An iterator to the beginning of the sequence
* @param end An iterator to the end of the sequence
* @return A pair containing the string read and an iterator to the next
* character in the stream
*/
template <typename InputIterator>
std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
InputIterator end)
{
std::string s;
if (end == begin) {
return std::make_pair(s,begin);
}
if (*begin != '"') {
return std::make_pair(s,begin);
}
InputIterator cursor = begin+1;
InputIterator marker = cursor;
cursor = std::find(cursor,end,'"');
while (cursor != end) {
// either this is the end or a quoted string
if (*(cursor-1) == '\\') {
s.append(marker,cursor-1);
s.append(1,'"');
++cursor;
marker = cursor;
} else {
s.append(marker,cursor);
++cursor;
return std::make_pair(s,cursor);
}
cursor = std::find(cursor,end,'"');
}
return std::make_pair("",begin);
}
/// Read and discard one unit of linear whitespace
/**
* Read one unit of linear white space and return the iterator to the character
* afterwards. If `begin` is returned, no whitespace was extracted.
*
* @param begin An iterator to the beginning of the sequence
* @param end An iterator to the end of the sequence
* @return An iterator to the character after the linear whitespace read
*/
template <typename InputIterator>
InputIterator extract_lws(InputIterator begin, InputIterator end) {
InputIterator it = begin;
// strip leading CRLF
if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
{
it+=3;
}
it = std::find_if(it,end,&is_not_whitespace_char);
return it;
}
/// Read and discard linear whitespace
/**
* Read linear white space until a non-lws character is read and return an
* iterator to that character. If `begin` is returned, no whitespace was
* extracted.
*
* @param begin An iterator to the beginning of the sequence
* @param end An iterator to the end of the sequence
* @return An iterator to the character after the linear whitespace read
*/
template <typename InputIterator>
InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
InputIterator old_it;
InputIterator new_it = begin;
do {
// Pull value from previous iteration
old_it = new_it;
// look ahead another pass
new_it = extract_lws(old_it,end);
} while (new_it != end && old_it != new_it);
return new_it;
}
/// Extract HTTP attributes
/**
* An http attributes list is a semicolon delimited list of key value pairs in
* the format: *( ";" attribute "=" value ) where attribute is a token and value
* is a token or quoted string.
*
* Attributes extracted are appended to the supplied attributes list
* `attributes`.
*
* @param [in] begin An iterator to the beginning of the sequence
* @param [in] end An iterator to the end of the sequence
* @param [out] attributes A reference to the attributes list to append
* attribute/value pairs extracted to
* @return An iterator to the character after the last atribute read
*/
template <typename InputIterator>
InputIterator extract_attributes(InputIterator begin, InputIterator end,
attribute_list & attributes)
{
InputIterator cursor;
bool first = true;
if (begin == end) {
return begin;
}
cursor = begin;
std::pair<std::string,InputIterator> ret;
while (cursor != end) {
std::string name;
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end) {
break;
}
if (first) {
// ignore this check for the very first pass
first = false;
} else {
if (*cursor == ';') {
// advance past the ';'
++cursor;
} else {
// non-semicolon in this position indicates end end of the
// attribute list, break and return.
break;
}
}
cursor = http::parser::extract_all_lws(cursor,end);
ret = http::parser::extract_token(cursor,end);
if (ret.first.empty()) {
// error: expected a token
return begin;
} else {
name = ret.first;
cursor = ret.second;
}
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end || *cursor != '=') {
// if there is an equals sign, read the attribute value. Otherwise
// record a blank value and continue
attributes[name].clear();
continue;
}
// advance past the '='
++cursor;
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end) {
// error: expected a token or quoted string
return begin;
}
ret = http::parser::extract_quoted_string(cursor,end);
if (ret.second != cursor) {
attributes[name] = ret.first;
cursor = ret.second;
continue;
}
ret = http::parser::extract_token(cursor,end);
if (ret.first.empty()) {
// error : expected token or quoted string
return begin;
} else {
attributes[name] = ret.first;
cursor = ret.second;
}
}
return cursor;
}
/// Extract HTTP parameters
/**
* An http parameters list is a comma delimited list of tokens followed by
* optional semicolon delimited attributes lists.
*
* Parameters extracted are appended to the supplied parameters list
* `parameters`.
*
* @param [in] begin An iterator to the beginning of the sequence
* @param [in] end An iterator to the end of the sequence
* @param [out] parameters A reference to the parameters list to append
* paramter values extracted to
* @return An iterator to the character after the last parameter read
*/
template <typename InputIterator>
InputIterator extract_parameters(InputIterator begin, InputIterator end,
parameter_list &parameters)
{
InputIterator cursor;
if (begin == end) {
// error: expected non-zero length range
return begin;
}
cursor = begin;
std::pair<std::string,InputIterator> ret;
/**
* LWS
* token
* LWS
* *(";" method-param)
* LWS
* ,=loop again
*/
while (cursor != end) {
std::string parameter_name;
attribute_list attributes;
// extract any stray whitespace
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end) {break;}
ret = http::parser::extract_token(cursor,end);
if (ret.first.empty()) {
// error: expected a token
return begin;
} else {
parameter_name = ret.first;
cursor = ret.second;
}
// Safe break point, insert parameter with blank attributes and exit
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end) {
//parameters[parameter_name] = attributes;
parameters.push_back(std::make_pair(parameter_name,attributes));
break;
}
// If there is an attribute list, read it in
if (*cursor == ';') {
InputIterator acursor;
++cursor;
acursor = http::parser::extract_attributes(cursor,end,attributes);
if (acursor == cursor) {
// attribute extraction ended in syntax error
return begin;
}
cursor = acursor;
}
// insert parameter into output list
//parameters[parameter_name] = attributes;
parameters.push_back(std::make_pair(parameter_name,attributes));
cursor = http::parser::extract_all_lws(cursor,end);
if (cursor == end) {break;}
// if next char is ',' then read another parameter, else stop
if (*cursor != ',') {
break;
}
// advance past comma
++cursor;
if (cursor == end) {
// expected more bytes after a comma
return begin;
}
}
return cursor;
}
inline std::string strip_lws(std::string const & input) {
std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
if (begin == input.end()) {
return std::string();
}
std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
if (rbegin == input.rend()) {
return std::string();
}
return std::string(begin,rbegin.base());
}
/// Base HTTP parser
/**
* Includes methods and data elements common to all types of HTTP messages such
* as headers, versions, bodies, etc.
*/
class parser {
public:
parser()
: m_header_bytes(0)
, m_body_bytes_needed(0)
, m_body_bytes_max(max_body_size)
, m_body_encoding(body_encoding::unknown) {}
/// Get the HTTP version string
/**
* @return The version string for this parser
*/
std::string const & get_version() const {
return m_version;
}
/// Set HTTP parser Version
/**
* Input should be in format: HTTP/x.y where x and y are positive integers.
* @todo Does this method need any validation?
*
* @param [in] version The value to set the HTTP version to.
*/
void set_version(std::string const & version);
/// Get the value of an HTTP header
/**
* @todo Make this method case insensitive.
*
* @param [in] key The name/key of the header to get.
* @return The value associated with the given HTTP header key.
*/
std::string const & get_header(std::string const & key) const;
/// Extract an HTTP parameter list from a parser header.
/**
* If the header requested doesn't exist or exists and is empty the
* parameter list is valid (but empty).
*
* @param [in] key The name/key of the HTTP header to use as input.
* @param [out] out The parameter list to store extracted parameters in.
* @return Whether or not the input was a valid parameter list.
*/
bool get_header_as_plist(std::string const & key, parameter_list & out)
const;
/// Return a list of all HTTP headers
/**
* Return a list of all HTTP headers
*
* @since 0.8.0
*
* @return A list of all HTTP headers
*/
header_list const & get_headers() const;
/// Append a value to an existing HTTP header
/**
* This method will set the value of the HTTP header `key` with the
* indicated value. If a header with the name `key` already exists, `val`
* will be appended to the existing value.
*
* @todo Make this method case insensitive.
* @todo Should there be any restrictions on which keys are allowed?
* @todo Exception free varient
*
* @see replace_header
*
* @param [in] key The name/key of the header to append to.
* @param [in] val The value to append.
*/
void append_header(std::string const & key, std::string const & val);
/// Set a value for an HTTP header, replacing an existing value
/**
* This method will set the value of the HTTP header `key` with the
* indicated value. If a header with the name `key` already exists, `val`
* will replace the existing value.
*
* @todo Make this method case insensitive.
* @todo Should there be any restrictions on which keys are allowed?
* @todo Exception free varient
*
* @see append_header
*
* @param [in] key The name/key of the header to append to.
* @param [in] val The value to append.
*/
void replace_header(std::string const & key, std::string const & val);
/// Remove a header from the parser
/**
* Removes the header entirely from the parser. This is different than
* setting the value of the header to blank.
*
* @todo Make this method case insensitive.
*
* @param [in] key The name/key of the header to remove.
*/
void remove_header(std::string const & key);
/// Get HTTP body
/**
* Gets the body of the HTTP object
*
* @return The body of the HTTP message.
*/
std::string const & get_body() const {
return m_body;
}
/// Set body content
/**
* Set the body content of the HTTP response to the parameter string. Note
* set_body will also set the Content-Length HTTP header to the appropriate
* value. If you want the Content-Length header to be something else, do so
* via replace_header("Content-Length") after calling set_body()
*
* @param value String data to include as the body content.
*/
void set_body(std::string const & value);
/// Get body size limit
/**
* Retrieves the maximum number of bytes to parse & buffer before canceling
* a request.
*
* @since 0.5.0
*
* @return The maximum length of a message body.
*/
size_t get_max_body_size() const {
return m_body_bytes_max;
}
/// Set body size limit
/**
* Set the maximum number of bytes to parse and buffer before canceling a
* request.
*
* @since 0.5.0
*
* @param value The size to set the max body length to.
*/
void set_max_body_size(size_t value) {
m_body_bytes_max = value;
}
/// Extract an HTTP parameter list from a string.
/**
* @param [in] in The input string.
* @param [out] out The parameter list to store extracted parameters in.
* @return Whether or not the input was a valid parameter list.
*/
bool parse_parameter_list(std::string const & in, parameter_list & out)
const;
protected:
/// Process a header line
/**
* @todo Update this method to be exception free.
*
* @param [in] begin An iterator to the beginning of the sequence.
* @param [in] end An iterator to the end of the sequence.
*/
void process_header(std::string::iterator begin, std::string::iterator end);
/// Prepare the parser to begin parsing body data
/**
* Inspects headers to determine if the message has a body that needs to be
* read. If so, sets up the necessary state, otherwise returns false. If
* this method returns true and loading the message body is desired call
* `process_body` until it returns zero bytes or an error.
*
* Must not be called until after all headers have been processed.
*
* @since 0.5.0
*
* @return True if more bytes are needed to load the body, false otherwise.
*/
bool prepare_body();
/// Process body data
/**
* Parses body data.
*
* @since 0.5.0
*
* @param [in] begin An iterator to the beginning of the sequence.
* @param [in] end An iterator to the end of the sequence.
* @return The number of bytes processed
*/
size_t process_body(char const * buf, size_t len);
/// Check if the parser is done parsing the body
/**
* Behavior before a call to `prepare_body` is undefined.
*
* @since 0.5.0
*
* @return True if the message body has been completed loaded.
*/
bool body_ready() const {
return (m_body_bytes_needed == 0);
}
/// Generate and return the HTTP headers as a string
/**
* Each headers will be followed by the \r\n sequence including the last one.
* A second \r\n sequence (blank header) is not appended by this method
*
* @return The HTTP headers as a string.
*/
std::string raw_headers() const;
std::string m_version;
header_list m_headers;
size_t m_header_bytes;
std::string m_body;
size_t m_body_bytes_needed;
size_t m_body_bytes_max;
body_encoding::value m_body_encoding;
};
} // namespace parser
} // namespace http
} // namespace websocketpp
#include <websocketpp/http/impl/parser.hpp>
#endif // HTTP_PARSER_HPP