--------------------- PatchSet 1185 Date: 2005/01/12 21:58:24 Author: nlewycky Branch: prefetching Tag: (none) Log: Initiali commit, applying patch in bug 1160. Members: configure.in:1.62->1.62.2.1 doc/debug-sections.txt:1.6->1.6.6.1 src/Debug.h:1.7->1.7.6.1 src/ESIInclude.cc:1.5->1.5.2.1 src/Makefile.am:1.57->1.57.2.1 src/cf.data.pre:1.65->1.65.2.1 src/cf_gen_defines:1.2->1.2.2.1 src/client_side_request.cc:1.33->1.33.2.1 src/client_side_request.h:1.17->1.17.6.1 src/http.cc:1.43->1.43.2.1 src/http.h:1.10->1.10.2.1 src/protos.h:1.48->1.48.2.1 src/squid.h:1.18->1.18.2.1 src/structs.h:1.65->1.65.2.1 src/url.cc:1.9->1.9.6.1 Index: squid3/configure.in =================================================================== RCS file: /cvsroot/squid-sf//squid3/configure.in,v retrieving revision 1.62 retrieving revision 1.62.2.1 diff -u -r1.62 -r1.62.2.1 --- squid3/configure.in 7 Jan 2005 03:13:19 -0000 1.62 +++ squid3/configure.in 12 Jan 2005 21:58:24 -0000 1.62.2.1 @@ -3,7 +3,7 @@ dnl dnl Duane Wessels, wessels@nlanr.net, February 1996 (autoconf v2.9) dnl -dnl $Id: configure.in,v 1.62 2005/01/07 03:13:19 squidadm Exp $ +dnl $Id: configure.in,v 1.62.2.1 2005/01/12 21:58:24 nlewycky Exp $ dnl dnl dnl @@ -13,7 +13,7 @@ AC_CONFIG_AUX_DIR(cfgaux) AM_INIT_AUTOMAKE(squid, 3.0-PRE3-CVS) AM_CONFIG_HEADER(include/autoconf.h) -AC_REVISION($Revision: 1.62 $)dnl +AC_REVISION($Revision: 1.62.2.1 $)dnl AC_PREFIX_DEFAULT(/usr/local/squid) AM_MAINTAINER_MODE @@ -670,6 +670,20 @@ fi ]) +AM_CONDITIONAL(USE_HTMLPREFETCH, false) +AC_ARG_ENABLE(html-analysis, + AC_HELP_STRING([--enable-html-analysis],[Enable HTML content analysis and prefetching. Requires libxml2.]), + ac_cv_use_htmlprefetch=$enableval, ac_cv_use_htmlprefetch=no) +AC_CACHE_CHECK(whether to enable HTML prefetching, ac_cv_use_htmlprefetch, + ac_cv_use_htmlprefetch=no) +if test "$ac_cv_use_htmlprefetch" = "yes" ; then + AC_DEFINE(USE_HTMLPREFETCH,1,[Compile the HTML analysis support]) + AM_CONDITIONAL(USE_HTMLPREFETCH, true) + XTRA_LIBS="$XTRA_LIBS -lxml2" +else + AC_DEFINE(USE_HTMLPREFETCH,0,[Compile the HTML analysis support]) +fi + AM_CONDITIONAL(USE_ESI, false) AC_ARG_ENABLE(esi, AC_HELP_STRING([--enable-esi],[Enable ESI for accelerators. Requires libexpat. Enabling ESI will cause squid to follow the Edge Acceleration Specification (www.esi.org). This causes squid to IGNORE client Cache-Control headers. DO NOT use this in a squid configured as a web proxy, ONLY use it in a squid configured for webserver acceleration.]), Index: squid3/doc/debug-sections.txt =================================================================== RCS file: /cvsroot/squid-sf//squid3/doc/debug-sections.txt,v retrieving revision 1.6 retrieving revision 1.6.6.1 diff -u -r1.6 -r1.6.6.1 --- squid3/doc/debug-sections.txt 23 Jul 2003 02:12:50 -0000 1.6 +++ squid3/doc/debug-sections.txt 12 Jan 2005 21:58:27 -0000 1.6.6.1 @@ -96,3 +96,4 @@ section 90 Store Client section 91 Http Surrogate-Control Header section 92 Store File System +section 93 HTML content analysis and fetcher Index: squid3/src/Debug.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/Debug.h,v retrieving revision 1.7 retrieving revision 1.7.6.1 diff -u -r1.7 -r1.7.6.1 --- squid3/src/Debug.h 1 Sep 2003 02:12:45 -0000 1.7 +++ squid3/src/Debug.h 12 Jan 2005 21:58:27 -0000 1.7.6.1 @@ -1,6 +1,6 @@ /* - * $Id: Debug.h,v 1.7 2003/09/01 02:12:45 squidadm Exp $ + * $Id: Debug.h,v 1.7.6.1 2005/01/12 21:58:27 nlewycky Exp $ * * DEBUG: section 0 Debug Routines * AUTHOR: Harvest Derived @@ -36,6 +36,8 @@ #ifndef SQUID_DEBUG #define SQUID_DEBUG +#include "defines.h" + #include #undef assert #include @@ -55,15 +57,15 @@ class Debug { -public: + public: static int Levels[MAX_DEBUG_SECTIONS]; static int level; - static std::ostream &getDebugOut(); + static std::ostream & getDebugOut(); static void finishDebug(); static void parseOptions(char const *); -private: - static std::ostringstream *CurrentDebug; + private: + static std::ostringstream * CurrentDebug; }; /* Debug stream */ Index: squid3/src/ESIInclude.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ESIInclude.cc,v retrieving revision 1.5 retrieving revision 1.5.2.1 diff -u -r1.5 -r1.5.2.1 --- squid3/src/ESIInclude.cc 22 Dec 2004 03:13:57 -0000 1.5 +++ squid3/src/ESIInclude.cc 12 Jan 2005 21:58:27 -0000 1.5.2.1 @@ -1,6 +1,6 @@ /* - * $Id: ESIInclude.cc,v 1.5 2004/12/22 03:13:57 squidadm Exp $ + * $Id: ESIInclude.cc,v 1.5.2.1 2005/01/12 21:58:27 nlewycky Exp $ * * DEBUG: section 86 ESI processing * AUTHOR: Robert Collins @@ -331,7 +331,7 @@ debug (86,5)("ESIIncludeStart: Starting subrequest with url '%s'\n", tempUrl); - if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ)) { + if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ, true)) { debug (86,0) ("starting new ESI subrequest failed\n"); } Index: squid3/src/Makefile.am =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/Makefile.am,v retrieving revision 1.57 retrieving revision 1.57.2.1 diff -u -r1.57 -r1.57.2.1 --- squid3/src/Makefile.am 4 Jan 2005 03:13:37 -0000 1.57 +++ squid3/src/Makefile.am 12 Jan 2005 21:58:27 -0000 1.57.2.1 @@ -1,7 +1,7 @@ # # Makefile for the Squid Object Cache server # -# $Id: Makefile.am,v 1.57 2005/01/04 03:13:37 squidadm Exp $ +# $Id: Makefile.am,v 1.57.2.1 2005/01/12 21:58:27 nlewycky Exp $ # # Uncomment and customize the following to suit your needs: # @@ -92,6 +92,15 @@ ESI_SOURCE = endif +HTMLPREFETCH_ALL_SOURCE = \ + HTMLPrefetchParser.cc \ + HTMLPrefetchParser.h +if USE_HTMLPREFETCH + HTMLPREFETCH_SOURCE = $(HTMLPREFETCH_ALL_SOURCE) +else + HTMLPREFETCH_SOURCE = +endif + if ENABLE_XPROF_STATS XPROF_STATS_SOURCE = ProfStats.cc else @@ -170,8 +179,10 @@ EXTRA_LIBRARIES = libAIO.a libBlocking.a libDiskDaemon.a libDiskThreads.a noinst_LIBRARIES = @DISK_LIBS@ -INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/lib/libTrie/include -INCLUDES += @SQUID_CPPUNIT_INC@ +INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/lib/libTrie/include @SQUID_CPPUNIT_INC@ +if USE_HTMLPREFETCH +INCLUDES += -I/usr/include/libxml2 +endif EXTRA_PROGRAMS = \ DiskIO/DiskDaemon/diskd \ @@ -252,6 +263,7 @@ htcp.h \ $(IDENT_ALL_SOURCE) \ $(ESI_ALL_SOURCE) \ + $(HTMLPREFETCH_ALL_SOURCE) \ ProfStats.cc \ leakfinder.cc \ snmp_core.cc \ @@ -423,6 +435,7 @@ HttpRequest.cc \ HttpRequest.h \ HttpVersion.h \ + $(HTMLPREFETCH_SOURCE) \ icmp.cc \ ICP.h \ icp_v2.cc \ Index: squid3/src/cf.data.pre =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/cf.data.pre,v retrieving revision 1.65 retrieving revision 1.65.2.1 diff -u -r1.65 -r1.65.2.1 --- squid3/src/cf.data.pre 4 Jan 2005 03:13:38 -0000 1.65 +++ squid3/src/cf.data.pre 12 Jan 2005 21:58:27 -0000 1.65.2.1 @@ -1,6 +1,6 @@ # -# $Id: cf.data.pre,v 1.65 2005/01/04 03:13:38 squidadm Exp $ +# $Id: cf.data.pre,v 1.65.2.1 2005/01/12 21:58:27 nlewycky Exp $ # # # SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -3887,6 +3887,16 @@ the same value since they both use port 2048. DOC_END +NAME: html_analysis +IFDEF: USE_HTMLPREFETCH +COMMENT: on|off +TYPE: onoff +LOC: Config.onoff.htmlPrefetch +DEFAULT: on +DOC_START + Analyze HTML documents received from the server and fetch all + embedded documents into the cache. The default is on. +DOC_END COMMENT_START DELAY POOL PARAMETERS (all require DELAY_POOLS compilation option) Index: squid3/src/cf_gen_defines =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/cf_gen_defines,v retrieving revision 1.2 retrieving revision 1.2.2.1 diff -u -r1.2 -r1.2.2.1 --- squid3/src/cf_gen_defines 10 Dec 2004 03:13:53 -0000 1.2 +++ squid3/src/cf_gen_defines 12 Jan 2005 21:58:29 -0000 1.2.2.1 @@ -11,6 +11,7 @@ define["USE_DNSSERVERS"]="--disable-internal-dns" define["!USE_DNSSERVERS"]="--enable-internal-dns" define["USE_HTCP"]="--enable-htcp" + define["USE_HTMLPREFETCH"]="--enable-html-analysis" define["USE_ICMP"]="--enable-icmp" define["USE_IDENT"]="--enable-ident-lookups" define["USE_REFERER_LOG"]="--enable-referer-log" Index: squid3/src/client_side_request.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/client_side_request.cc,v retrieving revision 1.33 retrieving revision 1.33.2.1 diff -u -r1.33 -r1.33.2.1 --- squid3/src/client_side_request.cc 22 Dec 2004 03:13:58 -0000 1.33 +++ squid3/src/client_side_request.cc 12 Jan 2005 21:58:29 -0000 1.33.2.1 @@ -1,6 +1,6 @@ /* - * $Id: client_side_request.cc,v 1.33 2004/12/22 03:13:58 squidadm Exp $ + * $Id: client_side_request.cc,v 1.33.2.1 2005/01/12 21:58:29 nlewycky Exp $ * * DEBUG: section 85 Client-side Request Routines * AUTHOR: Robert Collins (Originally Duane Wessels in client_side.c) @@ -264,7 +264,7 @@ int /* returns nonzero on failure */ clientBeginRequest(method_t method, char const *url, CSCB * streamcallback, CSD * streamdetach, ClientStreamData streamdata, HttpHeader const *header, - char *tailbuf, size_t taillen) + char *tailbuf, size_t taillen, bool accel) { size_t url_sz; HttpVersion http_ver (1, 0); @@ -285,9 +285,8 @@ /* make it visible in the 'current acctive requests list' */ dlinkAdd(http, &http->active, &ClientActiveRequests); /* Set flags */ - /* internal requests only makes sense in an - * accelerator today. TODO: accept flags ? */ - http->flags.accel = 1; + /* TODO: accept flags */ + http->flags.accel = accel; /* allow size for url rewriting */ url_sz = strlen(url) + Config.appendDomainLen + 5; http->uri = (char *)xcalloc(url_sz, 1); @@ -299,7 +298,7 @@ } /* - * now update the headers in request with our supplied headers. urLParse + * now update the headers in request with our supplied headers. urlParse * should return a blank header set, but we use Update to be sure of * correctness. */ Index: squid3/src/client_side_request.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/client_side_request.h,v retrieving revision 1.17 retrieving revision 1.17.6.1 diff -u -r1.17 -r1.17.6.1 --- squid3/src/client_side_request.h 2 Sep 2003 02:12:39 -0000 1.17 +++ squid3/src/client_side_request.h 12 Jan 2005 21:58:29 -0000 1.17.6.1 @@ -1,6 +1,6 @@ /* - * $Id: client_side_request.h,v 1.17 2003/09/02 02:12:39 squidadm Exp $ + * $Id: client_side_request.h,v 1.17.6.1 2005/01/12 21:58:29 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -40,7 +40,7 @@ #include "AccessLogEntry.h" /* client_side_request.c - client side request related routines (pure logic) */ -extern int clientBeginRequest(method_t, char const *, CSCB *, CSD *, ClientStreamData, HttpHeader const *, char *, size_t); +extern int clientBeginRequest(method_t, char const *, CSCB *, CSD *, ClientStreamData, HttpHeader const *, char *, size_t, bool); class MemObject; Index: squid3/src/http.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/http.cc,v retrieving revision 1.43 retrieving revision 1.43.2.1 diff -u -r1.43 -r1.43.2.1 --- squid3/src/http.cc 7 Jan 2005 03:13:20 -0000 1.43 +++ squid3/src/http.cc 12 Jan 2005 21:58:29 -0000 1.43.2.1 @@ -1,6 +1,6 @@ /* - * $Id: http.cc,v 1.43 2005/01/07 03:13:20 squidadm Exp $ + * $Id: http.cc,v 1.43.2.1 2005/01/12 21:58:29 nlewycky Exp $ * * DEBUG: section 11 Hypertext Transfer Protocol (HTTP) * AUTHOR: Harvest Derived @@ -50,6 +50,13 @@ #if DELAY_POOLS #include "DelayPools.h" #endif +#if USE_HTMLPREFETCH +#include "HTMLPrefetchParser.h" +#endif + +#include + +using namespace std; CBDATA_TYPE(HttpStateData); @@ -68,13 +75,13 @@ static void httpMaybeRemovePublic(StoreEntry *, http_status); static void copyOneHeaderFromClientsideRequestToUpstreamRequest(const HttpHeaderEntry *e, String strConnection, HttpRequest * request, HttpRequest * orig_request, HttpHeader * hdr_out, int we_do_ranges, http_state_flags); -static int decideIfWeDoRanges (HttpRequest * orig_request); +static int decideIfWeDoRanges(HttpRequest * orig_request); static void httpStateFree(int fd, void *data) { - HttpStateData *httpState = static_cast(data); + HttpStateData *httpState = static_cast < HttpStateData * >(data); if (httpState == NULL) return; @@ -107,7 +114,7 @@ static void httpTimeout(int fd, void *data) { - HttpStateData *httpState = static_cast(data); + HttpStateData *httpState = static_cast < HttpStateData * >(data); StoreEntry *entry = httpState->entry; debug(11, 4) ("httpTimeout: FD %d: '%s'\n", fd, storeUrl(entry)); @@ -287,7 +294,7 @@ } void -HttpStateData::processSurrogateControl(HttpReply *reply) +HttpStateData::processSurrogateControl(HttpReply * reply) { #if ESI @@ -326,6 +333,8 @@ } #endif + + // TODO: prefetch from HDR_LINK if applicable. } int @@ -544,7 +553,7 @@ * Returns false if the variance cannot be stored */ const char * -httpMakeVaryMark(HttpRequest * request, HttpReply const * reply) +httpMakeVaryMark(HttpRequest * request, HttpReply const *reply) { String vary, hdr; const char *pos = NULL; @@ -557,7 +566,7 @@ vary = httpHeaderGetList(&reply->header, HDR_VARY); while (strListGetItem(&vary, ',', &item, &ilen, &pos)) { - char *name = (char *)xmalloc(ilen + 1); + char *name = (char *) xmalloc(ilen + 1); xstrncpy(name, item, ilen + 1); Tolower(name); @@ -590,7 +599,7 @@ vary = httpHeaderGetList(&reply->header, HDR_X_ACCELERATOR_VARY); while (strListGetItem(&vary, ',', &item, &ilen, &pos)) { - char *name = (char *)xmalloc(ilen + 1); + char *name = (char *) xmalloc(ilen + 1); xstrncpy(name, item, ilen + 1); Tolower(name); strListAdd(&vstr, name, ','); @@ -616,11 +625,11 @@ } void -HttpStateData::failReply(HttpReply *reply, http_status const & status) +HttpStateData::failReply(HttpReply * reply, http_status const &status) { reply->sline.version = HttpVersion(1, 0); reply->sline.status = status; - storeEntryReplaceObject (entry, reply); + storeEntryReplaceObject(entry, reply); if (eof == 1) { fwdComplete(fwd); @@ -662,7 +671,7 @@ debugs(11, 3, "httpProcessReplyHeader: Non-HTTP-compliant header: '" << reply_hdr.buf << "'"); reply_hdr_state += 2; memBufClean(&reply_hdr); - failReply (reply, HTTP_INVALID_HEADER); + failReply(reply, HTTP_INVALID_HEADER); return; } @@ -678,7 +687,7 @@ if (!memBufIsNull(&reply_hdr)) memBufClean(&reply_hdr); - failReply (reply, HTTP_HEADER_TOO_LARGE); + failReply(reply, HTTP_HEADER_TOO_LARGE); return; } @@ -717,7 +726,7 @@ return; } - processSurrogateControl (reply); + processSurrogateControl(reply); /* TODO: we need our own reply * in the httpState, as we probably don't want to replace * the storeEntry with interim headers */ @@ -827,7 +836,7 @@ /* If the reply wants to close the connection, it takes precedence */ if (httpHeaderHasConnDir(&reply->header, "close")) - return COMPLETE_NONPERSISTENT_MSG; + return COMPLETE_NONPERSISTENT_MSG; /* If we didn't send a keep-alive request header, then this * can not be a persistent connection. @@ -889,12 +898,12 @@ * error or connection closed. */ /* XXX this function is too long! */ static void -httpReadReply(int fd, char *buf, size_t len, comm_err_t flag, int xerrno,void *data) +httpReadReply(int fd, char *buf, size_t len, comm_err_t flag, int xerrno, void *data) { - HttpStateData *httpState = static_cast(data); - assert (fd == httpState->fd); + HttpStateData *httpState = static_cast (data); + assert(fd == httpState->fd); PROF_start(HttpStateData_readReply); - httpState->readReply (fd, buf, len, flag, xerrno, data); + httpState->readReply(fd, buf, len, flag, xerrno, data); PROF_stop(HttpStateData_readReply); } @@ -909,10 +918,10 @@ assert(buf == readBuf); /* Bail out early on COMM_ERR_CLOSING - close handlers will tidy up for us - */ + */ if (flag == COMM_ERR_CLOSING) { - debug (11,3)("http socket closing\n"); + debug(11, 3) ("http socket closing\n"); return; } @@ -929,7 +938,7 @@ #endif - debug(11, 5) ("httpReadReply: FD %d: len %d.\n", fd, (int)len); + debug(11, 5) ("httpReadReply: FD %d: len %d.\n", fd, (int) len); if (flag == COMM_OK && len > 0) { #if DELAY_POOLS @@ -1001,7 +1010,7 @@ */ /* doesn't return */ processReplyHeader(buf, len); - else if (entry->getReply()->sline.status == HTTP_INVALID_HEADER && HttpVersion(0,9) != entry->getReply()->sline.version) { + else if (entry->getReply()->sline.status == HTTP_INVALID_HEADER && HttpVersion(0, 9) != entry->getReply()->sline.version) { ErrorState *err; err = errorCon(ERR_INVALID_REQ, HTTP_BAD_GATEWAY); err->request = requestLink((HttpRequest *) request); @@ -1030,7 +1039,7 @@ http_status s = entry->getReply()->sline.status; HttpVersion httpver = entry->getReply()->sline.version; - if (s == HTTP_INVALID_HEADER && httpver != HttpVersion(0,9)) { + if (s == HTTP_INVALID_HEADER && httpver != HttpVersion(0, 9)) { ErrorState *err; storeEntryReset(entry); err = errorCon(ERR_INVALID_REQ, HTTP_BAD_GATEWAY); @@ -1055,6 +1064,17 @@ } } +#if USE_HTMLPREFETCH + if (Config.onoff.htmlPrefetch) { + String content_type = entry->getReply()->content_type; + + if (content_type.buf() && + (content_type.caseCmp("text/html") == 0 || + content_type.caseCmp("application/xhtml+xml") == 0)) + htmlpp->init(); + } +#endif + PROF_start(HttpStateData_processReplyData); processReplyData(buf, len); PROF_stop(HttpStateData_processReplyData); @@ -1075,22 +1095,31 @@ if (!flags.headers_pushed) { /* The first block needs us to skip the headers */ /* TODO: make this cleaner. WE should push the headers, NOT the parser */ - size_t end = headersEnd (buf, len); + size_t end = headersEnd(buf, len); /* IF len > end, we need to append data after the * out of band update to the store */ if (len > end) { - tempBuffer.data = (char *)buf+end; +#if USE_HTMLPREFETCH + htmlpp->parse(buf + end, len - end, + persistentConnStatus() == INCOMPLETE_MSG); +#endif + + tempBuffer.data = (char *) buf + end; tempBuffer.length = len - end; tempBuffer.offset = currentOffset; currentOffset += tempBuffer.length; - entry->write (tempBuffer); + entry->write(tempBuffer); } flags.headers_pushed = 1; } else { - tempBuffer.data = (char *)buf; +#if USE_HTMLPREFETCH + htmlpp->parse(buf, len, persistentConnStatus() == INCOMPLETE_MSG); +#endif + + tempBuffer.data = (char *) buf; tempBuffer.length = len; tempBuffer.offset = currentOffset; currentOffset += len; @@ -1143,7 +1172,7 @@ fwdUnregister(fd, fwd); fwdComplete(fwd); /* TODO: check that fd is still open here */ - comm_close (fd); + comm_close(fd); fd = -1; httpStateFree(fd, this); return; @@ -1226,15 +1255,15 @@ const HttpHeaderEntry *e; String strFwd; HttpHeaderPos pos = HttpHeaderInitPos; - assert (hdr_out->owner == hoRequest); + assert(hdr_out->owner == hoRequest); /* append our IMS header */ if (request->lastmod > -1 && request->method == METHOD_GET) httpHeaderPutTime(hdr_out, HDR_IF_MODIFIED_SINCE, request->lastmod); - bool we_do_ranges = decideIfWeDoRanges (orig_request); + bool we_do_ranges = decideIfWeDoRanges(orig_request); - String strConnection (httpHeaderGetList(hdr_in, HDR_CONNECTION)); + String strConnection(httpHeaderGetList(hdr_in, HDR_CONNECTION)); while ((e = httpHeaderGetEntry(hdr_in, &pos))) copyOneHeaderFromClientsideRequestToUpstreamRequest(e, strConnection, request, orig_request, hdr_out, we_do_ranges, flags); @@ -1347,7 +1376,7 @@ /* No credentials to forward.. (should have been done above if available) */ } else if (strcmp(orig_request->peer_login, "PROXYPASS") == 0) { /* Special mode, convert proxy authentication to WWW authentication - * (also applies to authentication provided by external acl) + * (also applies to authentication provided by external acl) */ const char *auth = httpHeaderGetStr(hdr_in, HDR_PROXY_AUTHORIZATION); @@ -1431,7 +1460,7 @@ void -copyOneHeaderFromClientsideRequestToUpstreamRequest(const HttpHeaderEntry *e, String strConnection, HttpRequest * request, HttpRequest * orig_request, HttpHeader * hdr_out, int we_do_ranges, http_state_flags flags) +copyOneHeaderFromClientsideRequestToUpstreamRequest(const HttpHeaderEntry * e, String strConnection, HttpRequest * request, HttpRequest * orig_request, HttpHeader * hdr_out, int we_do_ranges, http_state_flags flags) { debug(11, 5) ("httpBuildRequestHeader: %s: %s\n", e->name.buf(), e->value.buf()); @@ -1558,7 +1587,7 @@ } int -decideIfWeDoRanges (HttpRequest * orig_request) +decideIfWeDoRanges(HttpRequest * orig_request) { int result = 1; /* decide if we want to do Ranges ourselves @@ -1577,7 +1606,7 @@ result = 0; debug(11, 8) ("decideIfWeDoRanges: range specs: %p, cachable: %d; we_do_ranges: %d\n", - orig_request->range, orig_request->flags.cachable, result); + orig_request->range, orig_request->flags.cachable, result); return result; } @@ -1596,8 +1625,8 @@ HttpVersion httpver(1, 0); memBufPrintf(mb, "%s %s HTTP/%d.%d\r\n", RequestMethodStr[request->method], - request->urlpath.size() ? request->urlpath.buf() : "/", - httpver.major,httpver.minor); + request->urlpath.size()? request->urlpath.buf() : "/", + httpver.major, httpver.minor); /* build and pack headers */ { HttpHeader hdr(hoRequest); @@ -1694,8 +1723,13 @@ httpState->entry = fwd->entry; httpState->fd = fd; +#if USE_HTMLPREFETCH + auto_ptr htmlpp(new HTMLPrefetchParser(httpState)); + httpState->htmlpp = htmlpp; +#endif + if (fwd->servers) - httpState->_peer = fwd->servers->_peer; /* might be NULL */ + httpState->_peer = fwd->servers->_peer; /* might be NULL */ if (httpState->_peer) { const char *url; @@ -1762,7 +1796,7 @@ static void httpSendRequestEntityDone(int fd, void *data) { - HttpStateData *httpState = static_cast(data); + HttpStateData *httpState = static_cast < HttpStateData * >(data); ACLChecklist ch; debug(11, 5) ("httpSendRequestEntityDone: FD %d\n", fd); ch.request = requestLink(httpState->request); @@ -1803,7 +1837,7 @@ static void httpSendRequestEntity(int fd, char *bufnotused, size_t size, comm_err_t errflag, void *data) { - HttpStateData *httpState = static_cast(data); + HttpStateData *httpState = static_cast < HttpStateData * >(data); StoreEntry *entry = httpState->entry; ErrorState *err; debug(11, 5) ("httpSendRequestEntity: FD %d: size %d: errflag %d.\n", @@ -1832,7 +1866,7 @@ return; } - clientReadBody(httpState->orig_request, (char *)memAllocate(MEM_8K_BUF), 8192, httpRequestBodyHandler, httpState); + clientReadBody(httpState->orig_request, (char *) memAllocate(MEM_8K_BUF), 8192, httpRequestBodyHandler, httpState); } void Index: squid3/src/http.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/http.h,v retrieving revision 1.10 retrieving revision 1.10.2.1 diff -u -r1.10 -r1.10.2.1 --- squid3/src/http.h 22 Dec 2004 03:13:58 -0000 1.10 +++ squid3/src/http.h 12 Jan 2005 21:58:30 -0000 1.10.2.1 @@ -1,6 +1,6 @@ /* - * $Id: http.h,v 1.10 2004/12/22 03:13:58 squidadm Exp $ + * $Id: http.h,v 1.10.2.1 2005/01/12 21:58:30 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -34,13 +34,19 @@ #ifndef SQUID_HTTP_H #define SQUID_HTTP_H +#include + #include "StoreIOBuffer.h" #include "comm.h" +#if USE_HTMLPREFETCH +class HTMLPrefetchParser; +#endif + class HttpStateData { -public: + public: static CWCB SendComplete; /* should be private */ void processReplyHeader(const char *buf, int size); @@ -67,15 +73,19 @@ bool surrogateNoStore; void processSurrogateControl(HttpReply *); -private: +#if USE_HTMLPREFETCH + std::auto_ptr htmlpp; +#endif + + private: enum ConnectionStatus { - INCOMPLETE_MSG, - COMPLETE_PERSISTENT_MSG, - COMPLETE_NONPERSISTENT_MSG + INCOMPLETE_MSG, + COMPLETE_PERSISTENT_MSG, + COMPLETE_NONPERSISTENT_MSG }; ConnectionStatus statusIfComplete() const; ConnectionStatus persistentConnStatus() const; - void failReply (HttpReply *reply, http_status const &status); + void failReply(HttpReply * reply, http_status const &status); }; #endif /* SQUID_HTTP_H */ Index: squid3/src/protos.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/protos.h,v retrieving revision 1.48 retrieving revision 1.48.2.1 diff -u -r1.48 -r1.48.2.1 --- squid3/src/protos.h 4 Jan 2005 03:13:39 -0000 1.48 +++ squid3/src/protos.h 12 Jan 2005 21:58:30 -0000 1.48.2.1 @@ -1,6 +1,6 @@ /* - * $Id: protos.h,v 1.48 2005/01/04 03:13:39 squidadm Exp $ + * $Id: protos.h,v 1.48.2.1 2005/01/12 21:58:30 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -113,7 +113,7 @@ /* comm.c */ extern void comm_calliocallback(void); -extern bool comm_iocallbackpending(void); /* inline candidate */ +extern bool comm_iocallbackpending(void); /* inline candidate */ extern int comm_listen(int fd); SQUIDCEXTERN int commSetNonBlocking(int fd); @@ -165,7 +165,7 @@ SQUIDCEXTERN void packerAppend(Packer * p, const char *buf, int size); #if STDC_HEADERS SQUIDCEXTERN void -packerPrintf(Packer * p, const char *fmt,...) PRINTF_FORMAT_ARG2; +packerPrintf(Packer * p, const char *fmt, ...) PRINTF_FORMAT_ARG2; #else SQUIDCEXTERN void packerPrintf(); #endif @@ -179,7 +179,7 @@ SQUIDCEXTERN void _db_rotate_log(void); #if STDC_HEADERS -SQUIDCEXTERN void _db_print(const char *,...) PRINTF_FORMAT_ARG1; +SQUIDCEXTERN void _db_print(const char *, ...) PRINTF_FORMAT_ARG1; #else SQUIDCEXTERN void _db_print(); #endif @@ -197,7 +197,7 @@ void FreeObject(void *address) { - O *anObject = static_cast (address); + O *anObject = static_cast < O * >(address); delete anObject; } @@ -230,7 +230,7 @@ SQUIDCEXTERN void idnsPTRLookup(const struct in_addr, IDNSCB *, void *); -extern void eventAdd(const char *name, EVH * func, void *arg, double when, int, bool cbdata=true); +extern void eventAdd(const char *name, EVH * func, void *arg, double when, int, bool cbdata = true); SQUIDCEXTERN void eventAddIsh(const char *name, EVH * func, void *arg, double delta_ish, int); SQUIDCEXTERN void eventRun(void); SQUIDCEXTERN int eventNextTime(void); @@ -284,15 +284,15 @@ SQUIDCEXTERN int httpCachable(method_t); SQUIDCEXTERN void httpStart(FwdState *); SQUIDCEXTERN mb_size_t httpBuildRequestPrefix(HttpRequest * request, - HttpRequest * orig_request, - StoreEntry * entry, - MemBuf * mb, - http_state_flags); + HttpRequest * orig_request, + StoreEntry * entry, + MemBuf * mb, + http_state_flags); SQUIDCEXTERN void httpAnonInitModule(void); SQUIDCEXTERN int httpAnonHdrAllowed(http_hdr_type hdr_id); SQUIDCEXTERN int httpAnonHdrDenied(http_hdr_type hdr_id); SQUIDCEXTERN void httpBuildRequestHeader(HttpRequest *, HttpRequest *, StoreEntry *, HttpHeader *, http_state_flags); -SQUIDCEXTERN const char *httpMakeVaryMark(HttpRequest * request, HttpReply const * reply); +SQUIDCEXTERN const char *httpMakeVaryMark(HttpRequest * request, HttpReply const *reply); /* ETag */ SQUIDCEXTERN int etagParseInit(ETag * etag, const char *str); @@ -328,8 +328,8 @@ /* Http Surrogate Control Header Field */ extern void httpHdrScStatDumper(StoreEntry * sentry, int idx, double val, double size, int count); -extern void httpHdrScInitModule (void); -extern void httpHdrScCleanModule (void); +extern void httpHdrScInitModule(void); +extern void httpHdrScCleanModule(void); extern HttpHdrSc *httpHdrScCreate(void); extern HttpHdrSc *httpHdrScParseCreate(String const *); extern void httpHdrScDestroy(HttpHdrSc * sc); @@ -338,11 +338,11 @@ extern void httpHdrScJoinWith(HttpHdrSc *, const HttpHdrSc *); extern void httpHdrScSetMaxAge(HttpHdrSc *, char const *, int); extern void httpHdrScUpdateStats(const HttpHdrSc *, StatHist *); -extern HttpHdrScTarget * httpHdrScFindTarget (HttpHdrSc *sc, const char *target); -extern HttpHdrScTarget * httpHdrScGetMergedTarget (HttpHdrSc *sc, const char *ourtarget); +extern HttpHdrScTarget *httpHdrScFindTarget(HttpHdrSc * sc, const char *target); +extern HttpHdrScTarget * httpHdrScGetMergedTarget(HttpHdrSc * sc, const char *ourtarget); /* Http Surrogate control header field 'targets' */ -extern HttpHdrScTarget * httpHdrScTargetCreate (const char *); +extern HttpHdrScTarget *httpHdrScTargetCreate(const char *); extern void httpHdrScTargetDestroy(HttpHdrScTarget *); extern HttpHdrScTarget *httpHdrScTargetDup(const HttpHdrScTarget *); extern void httpHdrScTargetPackInto(const HttpHdrScTarget *, Packer *); @@ -479,8 +479,8 @@ #endif /* USE_WCCP */ SQUIDCEXTERN void ipcache_nbgethostbyname(const char *name, - IPH * handler, - void *handlerData); + IPH * handler, + void *handlerData); SQUIDCEXTERN EVH ipcache_purgelru; SQUIDCEXTERN const ipcache_addrs *ipcache_gethostbyname(const char *, int flags); SQUIDCEXTERN void ipcacheInvalidate(const char *); @@ -774,7 +774,7 @@ SQUIDCEXTERN void fatal(const char *message); #if STDC_HEADERS SQUIDCEXTERN void -fatalf(const char *fmt,...) PRINTF_FORMAT_ARG1; +fatalf(const char *fmt, ...) PRINTF_FORMAT_ARG1; #else SQUIDCEXTERN void fatalf(); #endif @@ -832,6 +832,10 @@ SQUIDCEXTERN char *urlCanonicalClean(const HttpRequest *); SQUIDCEXTERN char *urlHostname(const char *url); SQUIDCEXTERN void urlExtMethodConfigure(void); +SQUIDCEXTERN void urlParseRFC1808(const char *url, + char **fragment, char **scheme, + char **net_loc, char **query, char **params, char **path); +SQUIDCEXTERN char *urlResolveRelative(const char *embedded, const char *base); SQUIDCEXTERN void useragentOpenLog(void); SQUIDCEXTERN void useragentRotateLog(void); @@ -952,7 +956,7 @@ SQUIDCEXTERN void logfileFlush(Logfile * lf); #if STDC_HEADERS SQUIDCEXTERN void -logfilePrintf(Logfile * lf, const char *fmt,...) PRINTF_FORMAT_ARG2; +logfilePrintf(Logfile * lf, const char *fmt, ...) PRINTF_FORMAT_ARG2; #else SQUIDCEXTERN void logfilePrintf(va_alist); #endif @@ -960,7 +964,7 @@ /* * Removal Policies */ -SQUIDCEXTERN RemovalPolicy *createRemovalPolicy(RemovalPolicySettings * settings); +SQUIDCEXTERN RemovalPolicy *createRemovalPolicy(RemovalPolicySettings *settings); /* * prototypes for system functions missing from system includes Index: squid3/src/squid.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/squid.h,v retrieving revision 1.18 retrieving revision 1.18.2.1 diff -u -r1.18 -r1.18.2.1 --- squid3/src/squid.h 7 Jan 2005 03:13:21 -0000 1.18 +++ squid3/src/squid.h 12 Jan 2005 21:58:30 -0000 1.18.2.1 @@ -1,6 +1,6 @@ /* - * $Id: squid.h,v 1.18 2005/01/07 03:13:21 squidadm Exp $ + * $Id: squid.h,v 1.18.2.1 2005/01/12 21:58:30 nlewycky Exp $ * * AUTHOR: Duane Wessels * @@ -37,6 +37,9 @@ #include "config.h" +#ifdef assert +#undef assert +#endif #if PURIFY #define assert(EX) ((void)0) #elif defined(NODEBUG) @@ -380,10 +383,10 @@ template inline A const & -min(A const & lhs, A const & rhs) +min(A const &lhs, A const &rhs) { if (rhs < lhs) - return rhs; + return rhs; return lhs; } @@ -394,10 +397,10 @@ #ifndef max template inline A const & -max(A const & lhs, A const & rhs) +max(A const &lhs, A const &rhs) { if (rhs > lhs) - return rhs; + return rhs; return lhs; } Index: squid3/src/structs.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/structs.h,v retrieving revision 1.65 retrieving revision 1.65.2.1 diff -u -r1.65 -r1.65.2.1 --- squid3/src/structs.h 9 Jan 2005 03:13:53 -0000 1.65 +++ squid3/src/structs.h 12 Jan 2005 21:58:30 -0000 1.65.2.1 @@ -1,6 +1,6 @@ /* - * $Id: structs.h,v 1.65 2005/01/09 03:13:53 squidadm Exp $ + * $Id: structs.h,v 1.65.2.1 2005/01/12 21:58:30 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -571,6 +571,13 @@ int check_hostnames; int via; int emailErrData; + +#if USE_HTMLPREFETCH + + int htmlPrefetch; + +#endif + } onoff; Index: squid3/src/url.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/url.cc,v retrieving revision 1.9 retrieving revision 1.9.6.1 diff -u -r1.9 -r1.9.6.1 --- squid3/src/url.cc 11 Aug 2003 02:13:03 -0000 1.9 +++ squid3/src/url.cc 12 Jan 2005 21:58:38 -0000 1.9.6.1 @@ -1,6 +1,6 @@ /* - * $Id: url.cc,v 1.9 2003/08/11 02:13:03 squidadm Exp $ + * $Id: url.cc,v 1.9.6.1 2005/01/12 21:58:38 nlewycky Exp $ * * DEBUG: section 23 URL Parsing * AUTHOR: Duane Wessels @@ -182,10 +182,10 @@ /* more cases? */ } -method_t &operator++ (method_t &aMethod) +method_t & operator++(method_t & aMethod) { - int tmp = (int)aMethod; - aMethod = (method_t)(++tmp); + int tmp = (int) aMethod; + aMethod = (method_t) (++tmp); return aMethod; } @@ -711,10 +711,10 @@ { public: - char * extract(char const *url); + char *extract(char const *url); private: - static char Host [SQUIDHOSTNAMELEN]; + static char Host[SQUIDHOSTNAMELEN]; void init(char const *); void findHostStart(); void trimTrailingChars(); @@ -832,3 +832,387 @@ w = w->next; } } + +void +urlParseRFC1808(const char *url, + char **fragment, char **scheme, char **net_loc, + char **query, char **params, char **path) +{ + char *url_ = xstrdup(url); + + /* 2.4.1. Parsing the Fragment Identifier + * + * If the parse string contains a crosshatch "#" character, then + * the substring after the first (left-most) crosshatch "#" and up + * to the end of the parse string is the identifier. If + * the crosshatch is the last character, or no crosshatch is + * present, then the fragment identifier is empty. The matched + * substring, including the crosshatch character, is removed from + * the parse string before continuing. + * + * Note that the fragment identifier is not considered part of the + * URL. However, since it is often attached to the URL, parsers + * must be able to recognize and set aside fragment identifiers as + * part of the process. + */ + char *p = strchr(url_, '#'); + + if (p) { + *p = 0; + p++; + size_t len = strlen(p); + *fragment = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*fragment, p, len); + } else + *fragment = xstrdup(""); + + /* 2.4.2. Parsing the Scheme + * + * If the parse string contains a colon ":" after the first + * character and before any characters not allowed as part of a + * scheme name (i.e., any not an alphanumeric, plus "+", period + * ".", or hyphen "-"), the of the URL is the substring + * of characters up to but not including the first colon. These + * characters and the colon are then removed from the parse string + * before continuing. + */ + p = url_; + + while (*p && (isalnum(*p) || *p == '+' || *p == '.' || *p == '-')) + p++; + + if (*p == ':') { + *p = 0; + p++; + size_t len = strlen(url_); + *scheme = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*scheme, url_, len); + } else { + *scheme = xstrdup(""); + p = url_; + } + + /* 2.4.3. Parsing the Network Location/Login + * + * If the parse string begins with a double-slash "//", then the + * substring of characters after the double-slash and up to, but + * not including, the next slash "/" character is the network + * location/login () of the URL. If no trailing slash "/" + * is present, the entire remaining parse string is assigned to + * . The double- slash and are removed from the + * parse string before continuing. + */ + char *begin = p; + + if (begin[0] == '/' && begin[1] == '/') { // depends on short circuit + p = strchr(begin + 2, '/'); + *begin = 0; + *(begin + 1) = 0; + begin += 2; + size_t len = p ? p - begin : strlen(begin); + *net_loc = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*net_loc, begin, len); + } else + *net_loc = xstrdup(""); + + /* 2.4.4. Parsing the Query Information + * + * If the parse string contains a question mark "?" character, + * then the substring after the first (left-most) question mark + * "?" and up to the end of the parse string is the + * information. If the question mark is the last character, or no + * question mark is present, then the query information is + * empty. The matched substring, including the question mark + * character, is removed from the parse string before continuing. + */ + begin = p; + + if (p) + p = strchr(p, '?'); // eg., scheme://host?query + + if (p && *p) { // depends on short circuit (obvious this time) + *p = 0; + p++; + size_t len = strlen(p); + *query = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*query, p, len); + } else + *query = xstrdup(""); + + /* 2.4.5. Parsing the Parameters + * + * If the parse string contains a semicolon ";" character, then + * the substring after the first (left-most) semicolon ";" and up + * to the end of the parse string is the parameters (). + * If the semicolon is the last character, or no semicolon is + * present, then is empty. The matched substring, + * including the semicolon character, is removed from the parse + * string before continuing. + */ + p = begin ? strchr(begin, ';') : NULL; + + if (p && *p) { + *p = 0; + p++; + size_t len = strlen(p); + *params = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*params, p, len); + } else + *params = xstrdup(""); + + /* 2.4.6. Parsing the Path + * + * After the above steps, all that is left of the parse string is + * the URL and the slash "/" that may precede it. Even + * though the initial slash is not part of the URL path, the + * parser must remember whether or not it was present so that + * later processes can differentiate between relative and absolute + * paths. Often this is done by simply storing the preceding + * slash along with the path. + */ + if (begin && *begin) { + size_t len = strlen(begin); + *path = static_cast < char *>(xcalloc(len + 1, sizeof(char))); + strncpy(*path, begin, len); + } else + *path = xstrdup(""); + + xfree(url_); +} + +// This isn't *exactly* RFC 1808, but it's very close. Specifically, +// it won't inherit param/query/fragment from the base into the +// embedded URL. Also, it doesn't check whether a path is a "complete +// path", so it won't emit ".." or "." in certain cases it should. +// Neither of these deviations are dangerous. +char * +urlResolveRelative(const char *embedded, const char *base) +{ + char *base_scheme, *base_host, *base_path, *base_params, + *base_query, *base_fragment; + char *embd_scheme, *embd_host, *embd_path, *embd_params, + *embd_query, *embd_fragment; + + /* Step 1: The base URL is established according to the rules of + * Section 3. If the base URL is the empty string (unknown), the + * embedded URL is interpreted as an absolute URL and we are done. + */ + + if (strlen(base) == 0) + return xstrdup(embedded); + + /* Step 2: Both the base and embedded URLs are parsed into their + * component parts as described in Section 2.4. + * + * a) If the embedded URL is entirely empty, it inherits the + * entire base URL (i.e., is set equal to the base URL) and we are + * done. + */ + if (strlen(embedded) == 0) + return xstrdup(base); + + /* b) If the embedded URL starts with a scheme name, it is + * interpreted as an absolute URL and we are done. + */ + urlParseRFC1808(embedded, &embd_fragment, &embd_scheme, &embd_host, + &embd_query, &embd_params, &embd_path); + + if (strlen(embd_scheme) != 0) { + xfree(embd_scheme); + xfree(embd_host); + xfree(embd_path); + xfree(embd_params); + xfree(embd_query); + xfree(embd_fragment); + + return xstrdup(embedded); + } + + /* c) Otherwise, the embedded URL inherits the scheme of the base + * URL. + */ + urlParseRFC1808(base, &base_fragment, &base_scheme, &base_host, + &base_query, &base_params, &base_path); + + xfree(embd_scheme); + + embd_scheme = xstrdup(base_scheme); + + /* Step 3: If the embedded URL's is non-empty, we skip + * to Step 7. Otherwise, the embedded URL inherits the + * (if any) of the base URL. + */ + if (strlen(embd_host) == 0) { + xfree(embd_host); + embd_host = xstrdup(base_host); + + /* Step 4: If the embedded URL path is preceded by a slash + * "/", the path is not relative and we skip to Step 7. + */ + + if (embd_path[0] != '/') { + /* Step 5: If the embedded URL path is empty (and not + * preceded by a slash), then the embedded URL inherits + * the base URL path. + */ + // The RFC goes on to inherit and , + // however that sounds bogus to me, and Mozilla agrees, so + // we don't do that. + + if (strlen(embd_path) == 0) { + xfree(embd_path); + embd_path = xstrdup(base_path); + } else { + /* Step 6: The last segment of the base URL's path + * (anything following the rightmost slash "/", or the + * entire path if no slash is present) is removed and + * the embedded URL's path is appended in its place. + * The following operations are then applied, in + * order, to the new path: + */ + char *new_path = + static_cast < + char *>(xcalloc(strlen(base_path) + strlen(embd_path), + sizeof(char))); + + strcat(new_path, base_path); + + char *p = strrchr(new_path, '/'); + + if (!p) + p = new_path; + else + p++; + + *p = 0; + + strcat(new_path, embd_path); + + /* a) All occurrences of "./", where "." is a complete + * path segment, are removed. + */ + while ((p = strstr(new_path, "/./"))) { + memmove(p, p + 2, strlen(p + 2) + 1); + } + + while (new_path[0] == '.' && new_path[1] == '/') { + memmove(new_path, new_path + 2, strlen(new_path + 2) + 1); + } + + /* b) If the path ends with "." as a complete path + * segment, that "." is removed. + */ + if (strlen(new_path) == 1 && new_path[0] == '.') + new_path[0] = 0; + + while (strlen(new_path) >= 2 && + new_path[strlen(new_path) - 2] == '/' && + new_path[strlen(new_path) - 1] == '.') { + new_path[strlen(new_path) - 2] = 0; + } + + /* c) All occurrences of "/../", where + * is a complete path segment not equal to + * "..", are removed. Removal of these path segments + * is performed iteratively, removing the leftmost + * matching pattern on each iteration, until no + * matching pattern remains. + */ + p = new_path; + + while ((p = strstr(p, "/../"))) { + char *segment = new_path; + + if (p != new_path) { + segment = p - 1; + + while (segment != new_path && *segment != '/') + segment--; + } + + if (segment + 3 != p && + !(segment[0] == '.' && segment[1] == '.')) { + memmove(segment, p + 3, strlen(p + 3) + 1); + p = segment; + } + } + + /* d) If the path ends with "/..", where + * is a complete path segment not equal to + * "..", that "/.." is removed. + */ + while (strlen(new_path) >= 3 && + new_path[strlen(new_path) - 3] == '/' && + new_path[strlen(new_path) - 2] == '.' && + new_path[strlen(new_path) - 1] == '.') { + char *segment = new_path + strlen(new_path) - 4; + + while (segment != new_path && *segment != '/') + segment--; + + *segment = 0; + } + + xfree(embd_path); + embd_path = new_path; + } + } + } + + /* Step 7: The resulting URL components, including any inherited + * from the base URL, are recombined to give the absolute form of + * the embedded URL. + */ + char *new_url = + static_cast < + char *>(xcalloc(strlen(embd_scheme) + strlen(embd_host) + + strlen(embd_path) + strlen(embd_params) + strlen(embd_query) + + strlen(embd_fragment) + 8, + sizeof(char))); + + if (strlen(embd_scheme)) { + strcat(new_url, embd_scheme); + strcat(new_url, ":"); + } + + if (strlen(embd_host)) { + strcat(new_url, "//"); + strcat(new_url, embd_host); + + if (strlen(embd_path) && embd_path[0] != '/') + strcat(new_url, "/"); + } + + strcat(new_url, embd_path); + + if (strlen(embd_params)) { + strcat(new_url, ";"); + strcat(new_url, embd_params); + } + + if (strlen(embd_query)) { + strcat(new_url, "?"); + strcat(new_url, embd_query); + } + + if (strlen(embd_fragment)) { + strcat(new_url, "#"); + strcat(new_url, embd_fragment); + } + + xfree(base_scheme); + xfree(base_host); + xfree(base_path); + xfree(base_params); + xfree(base_query); + xfree(base_fragment); + + xfree(embd_scheme); + xfree(embd_host); + xfree(embd_path); + xfree(embd_params); + xfree(embd_query); + xfree(embd_fragment); + + return new_url; +}