--------------------- PatchSet 8572 Date: 2006/08/31 04:52:25 Author: adri Branch: parserwork Tag: (none) Log: Flesh out the rest of the stuff required for HTTP/0.9 parsing Members: src/HttpMsg.c:1.8.8.5->1.8.8.6 src/client_side.c:1.143.2.12->1.143.2.13 Index: squid/src/HttpMsg.c =================================================================== RCS file: /cvsroot/squid-sf//squid/src/HttpMsg.c,v retrieving revision 1.8.8.5 retrieving revision 1.8.8.6 diff -u -r1.8.8.5 -r1.8.8.6 --- squid/src/HttpMsg.c 27 Aug 2006 02:16:37 -0000 1.8.8.5 +++ squid/src/HttpMsg.c 31 Aug 2006 04:52:25 -0000 1.8.8.6 @@ -1,6 +1,6 @@ /* - * $Id: HttpMsg.c,v 1.8.8.5 2006/08/27 02:16:37 adri Exp $ + * $Id: HttpMsg.c,v 1.8.8.6 2006/08/31 04:52:25 adri Exp $ * * DEBUG: section 74 HTTP Message * AUTHOR: Alex Rousskov @@ -70,6 +70,7 @@ int i = 0; int retcode = 0; int maj = -1, min = -1; + int last_whitespace = -1, line_end = -1; /* Find \r\n - end of URL+Version (and the request) */ for (i = 0; i < hmsg->size; i++) { @@ -77,6 +78,7 @@ break; } if (i < hmsg->size - 1 && hmsg->buf[i - 1] == '\r' && hmsg->buf[i] == '\n') { + i++; break; } } @@ -113,78 +115,104 @@ } hmsg->u_start = i; - /* Find whitespace; end of URL */ - for (; i < hmsg->req_end && (! isspace(hmsg->buf[i])); i++); - if (i >= hmsg->req_end) { - retcode = 0; - goto finish; - } - hmsg->u_end = i - 1; - - /* XXX yes, this doesn't support HTTP/0.9 requests just yet .. */ - - /* Find non-whitespace, version */ - for (; i < hmsg->req_end && (isspace(hmsg->buf[i])); i++); - if (i >= hmsg->req_end) { - retcode = 0; - goto finish; + /* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */ + for (; i <= hmsg->req_end; i++) { + /* If \n - its end of line */ + if (hmsg->buf[i] == '\n') { + line_end = i; + break; + } + /* XXX could be off-by-one wrong! */ + if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') { + line_end = i; + break; + } + /* If its a whitespace, note it as it'll delimit our version */ + if (hmsg->buf[i] == ' ' || hmsg->buf[i] == '\t') { + last_whitespace = i; + } } - hmsg->v_start = i; - - /* next five characters should be HTTP/ */ - if (i + 5 >= hmsg->req_end) { + debug(1, 1) ("foo: %d-%d, whitespace:%d\n", hmsg->u_start, line_end, last_whitespace); + if (i > hmsg->req_end) { retcode = 0; goto finish; } - if (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0) { - retcode = -1; - goto finish; - } - i += 5; - /* next should be 1 or more digits */ - maj = 0; - for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { - maj = maj * 10; - maj = maj + (hmsg->buf[i]) - '0'; + /* At this point we don't need the 'i' value; so we'll recycle it for version parsing */ - } - if (i >= hmsg->req_end) { - retcode = 0; - goto finish; - } + /* + * At this point: line_end points to the first eol char (\r or \n); + * last_whitespace points to the last whitespace char in the URL. + * We know we have a full buffer here! + */ + if (last_whitespace == -1) { + maj = 0; min = 9; + hmsg->u_end = line_end - 1; + assert(hmsg->u_end >= hmsg->u_start); + } else { + /* Find the first non-whitespace after last_whitespace */ + /* XXX why <= vs < ? I do need to really re-audit all of this ..*/ + for (i = last_whitespace; i <= hmsg->req_end && isspace(hmsg->buf[i]); i++); + if (i > hmsg->req_end) { + retcode = 0; + goto finish; + } - /* next should be . */ - if (hmsg->buf[i] != '.') { - retcode = -1; - goto finish; - } - if (i + 1 >= hmsg->req_end) { - retcode = 0; - goto finish; - } + /* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */ + if (i + 5 >= hmsg->req_end || (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) { + maj = 0; min = 9; + hmsg->u_end = line_end - 1; + assert(hmsg->u_end >= hmsg->u_start); + } else { + /* Ok, lets try parsing! Yes, this needs refactoring! */ + hmsg->v_start = i; + i += 5; + + /* next should be 1 or more digits */ + maj = 0; + for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { + maj = maj * 10; + maj = maj + (hmsg->buf[i]) - '0'; + } + if (i >= hmsg->req_end) { + retcode = 0; + goto finish; + } + + /* next should be .; we -have- to have this as we have a whole line.. */ + if (hmsg->buf[i] != '.') { + retcode = 0; + goto finish; + } + if (i + 1 >= hmsg->req_end) { + retcode = 0; + goto finish; + } - /* next should be one or more digits */ - i++; - min = 0; - for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { - min = min * 10; - min = min + (hmsg->buf[i]) - '0'; - + /* next should be one or more digits */ + i++; + min = 0; + for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { + min = min * 10; + min = min + (hmsg->buf[i]) - '0'; + } + + /* Find whitespace, end of version */ + hmsg->v_end = i; + hmsg->u_end = last_whitespace - 1; + } } - /* Find whitespace, end of version */ - hmsg->v_end = i; - - hmsg->v_maj = maj; - hmsg->v_min = min; - /* * Rightio - we have all the schtuff. Return true; we've got enough. */ retcode = 1; finish: + hmsg->v_maj = maj; + hmsg->v_min = min; + assert(maj != -1); + assert(min != -1); debug(1, 2) ("Parser: retval %d: from %d->%d: method %d->%d; url %d->%d; version %d->%d (%d/%d)\n", retcode, hmsg->req_start, hmsg->req_end, hmsg->m_start, hmsg->m_end, Index: squid/src/client_side.c =================================================================== RCS file: /cvsroot/squid-sf//squid/src/client_side.c,v retrieving revision 1.143.2.12 retrieving revision 1.143.2.13 diff -u -r1.143.2.12 -r1.143.2.13 --- squid/src/client_side.c 30 Aug 2006 07:47:42 -0000 1.143.2.12 +++ squid/src/client_side.c 31 Aug 2006 04:52:25 -0000 1.143.2.13 @@ -1,6 +1,6 @@ /* - * $Id: client_side.c,v 1.143.2.12 2006/08/30 07:47:42 adri Exp $ + * $Id: client_side.c,v 1.143.2.13 2006/08/31 04:52:25 adri Exp $ * * DEBUG: section 33 Client-side Routines * AUTHOR: Duane Wessels @@ -3597,12 +3597,19 @@ /* * Process headers after request line */ - /* for now, assume there's going to be headers. Worry about HTTP/0.9 later */ - assert(hmsg->headers_start > -1); - assert(hmsg->headers_end > -1); + /* XXX Are these sizes off by one? */ + if (hmsg->v_maj > 0) { + /* http/1.0 and above */ + assert(hmsg->headers_start > -1); + assert(hmsg->headers_end > -1); + header_sz = hmsg->headers_end - hmsg->headers_start; + prefix_sz = hmsg->headers_end - hmsg->req_start; + } else { + /* http/0.9 - no headers */ + header_sz = 0; + prefix_sz = hmsg->req_end - hmsg->req_start; + } - header_sz = hmsg->headers_end - hmsg->headers_start; - prefix_sz = hmsg->headers_end - hmsg->req_start; //debug(33, 3) ("parseHttpRequest: req_hdr = {%s}\n", req_hdr); //debug(33, 3) ("parseHttpRequest: end = {%s}\n", end);