This patch is generated from the prefetching branch of HEAD in squid3 Mon Jan 23 03:19:07 2006 GMT See http://devel.squid-cache.org/ Index: squid3/configure.in diff -u squid3/configure.in:1.87 squid3/configure.in:1.62.2.5 --- squid3/configure.in:1.87 Wed Dec 28 19:13:07 2005 +++ squid3/configure.in Sat Jan 21 12:37:51 2006 @@ -711,6 +711,20 @@ fi ]) +AM_CONDITIONAL(USE_HTMLPREFETCH, false) +AC_ARG_ENABLE(html-analysis, + AC_HELP_STRING([--enable-html-analysis],[Enable HTML content analysis and prefetching. Requires libxml2.]), + ac_cv_use_htmlprefetch=$enableval, ac_cv_use_htmlprefetch=no) +AC_CACHE_CHECK(whether to enable HTML prefetching, ac_cv_use_htmlprefetch, + ac_cv_use_htmlprefetch=no) +if test "$ac_cv_use_htmlprefetch" = "yes" ; then + AC_DEFINE(USE_HTMLPREFETCH,1,[Compile the HTML analysis support]) + AM_CONDITIONAL(USE_HTMLPREFETCH, true) + XTRA_LIBS="$XTRA_LIBS -lxml2" +else + AC_DEFINE(USE_HTMLPREFETCH,0,[Compile the HTML analysis support]) +fi + AM_CONDITIONAL(USE_ESI, false) AC_ARG_ENABLE(esi, AC_HELP_STRING([--enable-esi],[Enable ESI for accelerators. Requires libexpat. Enabling ESI will cause squid to follow the Edge Acceleration Specification (www.esi.org). This causes squid to IGNORE client Cache-Control headers. DO NOT use this in a squid configured as a web proxy, ONLY use it in a squid configured for webserver acceleration.]), Index: squid3/doc/debug-sections.txt diff -u squid3/doc/debug-sections.txt:1.6 squid3/doc/debug-sections.txt:1.6.6.1 --- squid3/doc/debug-sections.txt:1.6 Tue Jul 22 19:12:50 2003 +++ squid3/doc/debug-sections.txt Wed Jan 12 13:58:27 2005 @@ -96,3 +96,4 @@ section 90 Store Client section 91 Http Surrogate-Control Header section 92 Store File System +section 93 HTML content analysis and fetcher Index: squid3/src/Debug.h diff -u squid3/src/Debug.h:1.8 squid3/src/Debug.h:1.7.6.3 --- squid3/src/Debug.h:1.8 Mon Dec 19 19:12:57 2005 +++ squid3/src/Debug.h Sat Jan 21 12:37:55 2006 @@ -36,6 +36,8 @@ #ifndef SQUID_DEBUG #define SQUID_DEBUG +#include "defines.h" + #include #undef assert #include Index: squid3/src/ESIExpatParser.h diff -u squid3/src/ESIExpatParser.h:1.4 squid3/src/ESIExpatParser.h:1.2.8.2 --- squid3/src/ESIExpatParser.h:1.4 Sun Jul 3 19:14:11 2005 +++ squid3/src/ESIExpatParser.h Thu Oct 13 20:16:26 2005 @@ -36,6 +36,11 @@ #include "ESIParser.h" #include "expat.h" +#ifdef XMLCALL +#define EXPAT_XMLCALL XMLCALL +#undef XMLCALL +#endif + class ESIExpatParser : public ESIParser { Index: squid3/src/ESIInclude.cc diff -u squid3/src/ESIInclude.cc:1.8 squid3/src/ESIInclude.cc:1.5.2.4 --- squid3/src/ESIInclude.cc:1.8 Sat Nov 5 15:03:11 2005 +++ squid3/src/ESIInclude.cc Sat Jan 21 12:37:58 2006 @@ -331,7 +331,7 @@ debug (86,5)("ESIIncludeStart: Starting subrequest with url '%s'\n", tempUrl); - if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ)) { + if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ, (ClientHttpRequest::flags_type){1,0,0,0,0}, no_addr)) { debug (86,0) ("starting new ESI subrequest failed\n"); } Index: squid3/src/ESILibxml2Parser.h diff -u squid3/src/ESILibxml2Parser.h:1.3 squid3/src/ESILibxml2Parser.h:1.1.4.2 --- squid3/src/ESILibxml2Parser.h:1.3 Sun Jul 3 19:14:11 2005 +++ squid3/src/ESILibxml2Parser.h Thu Oct 13 20:16:26 2005 @@ -51,6 +51,11 @@ #include #include +#ifdef XMLCALL +#define LIBXML2_XMLCALL XMLCALL +#undef XMLCALL +#endif + #ifdef OLD_FREE #define free OLD_FREE #endif Index: squid3/src/HTMLAnalysisStream.cc diff -u /dev/null squid3/src/HTMLAnalysisStream.cc:1.1.2.7 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/src/HTMLAnalysisStream.cc Mon May 23 18:22:11 2005 @@ -0,0 +1,405 @@ + +/* + * $Id$ + * + * DEBUG: section 93 HTML parsing and fetching + * AUTHOR: Nick Lewycky + * + * SQUID Web Proxy Cache http://www.squid-cache.org/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from + * the Internet community; see the CONTRIBUTORS file for full + * details. Many organizations have provided support for Squid's + * development; see the SPONSORS file for full details. Squid is + * Copyrighted (C) 2001 by the Regents of the University of + * California; see the COPYRIGHT file for full details. Squid + * incorporates software developed and/or copyrighted by other + * sources; see the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + * Copyright (c) 2004, Nick Lewycky + * + */ + +#include +#include + +#include + +#include "squid.h" +#include "Debug.h" +#include "HttpReply.h" +#include "HttpRequest.h" +#include "HTMLAnalysisStream.h" +#include "http.h" +#include "PrefetchStream.h" +#include "protos.h" + +using namespace std; + +CBDATA_CLASS_INIT(HTMLAnalysisStream); + +/* NOTE: this does get called after detach. */ +void +htmlBufferData(clientStreamNode *node, ClientHttpRequest *req, + HttpReply *reply, StoreIOBuffer buffer) +{ + assert(node->data.getRaw()); + debugs(93, 4, "(AS) bufferData"); + + HTMLAnalysisStream::Pointer self = + dynamic_cast(node->data.getRaw()); + if (!self.getRaw()) + { + /* detach time. but if it's not my node->data, whose is it? */ + debugs(93, 4, "(AS) this makes no sense at all!"); + return; + } + + debugs(93, 4, "(AS) length: " << buffer.length << + ", offset: " << buffer.offset); + self->parse(buffer.data, buffer.length, 1); + node->readBuffer.offset += buffer.length; + + //clientStreamCallback(node, req, reply, node->readBuffer); // detaches + clientStreamCallback(node, req, reply, buffer); +} + +void +htmlStreamRead(clientStreamNode *node, ClientHttpRequest *req) +{ + HTMLAnalysisStream::Pointer self = + dynamic_cast(node->data.getRaw()); + + if (!req) return; + + debugs(93, 4, "(AS) before read"); + clientStreamRead(node, req, node->readBuffer); + debugs(93, 4, "(AS) after read"); +} + +void +htmlStreamDetach(clientStreamNode *node, ClientHttpRequest *req) +{ + debugs(93, 4, "(AS) detach"); + clientStreamDetach(node, req); +} + +clientStream_status_t +htmlStreamStatus(clientStreamNode *node, ClientHttpRequest *req) +{ + debugs(93, 4, "(AS) status"); + return clientStreamStatus(node, req); +} + +HTMLAnalysisStream::HTMLAnalysisStream(const HttpRequest *req) + : + request(req), + parser(htmlCreatePushParserCtxt(&handler, static_cast(this), + NULL, 0, NULL, XML_CHAR_ENCODING_NONE)) +{ + if (!parser) + throw runtime_error("Unable to create parser."); + + xmlSubstituteEntitiesDefault(1); + + relative_url = urlCanonicalClean(request); + + debugs(93, 5, "(AS) depth: " << request->recursion_depth); + + debugs(93, 4, "(AS) analyzing " << relative_url); +} + +HTMLAnalysisStream::~HTMLAnalysisStream() +{ + htmlFreeParserCtxt(parser); +} + +inline void +HTMLAnalysisStream::prefetch(const string &url, bool allow_recursion) +{ + PrefetchStream::prefetch(url, relative_url, request, allow_recursion); +} + +void +HTMLAnalysisStream::parse(const char *document, size_t len, bool partial) +{ + debugs(93, 4, "(AS) chunk to parse, " << len << " long " << (partial?"partial":"")); + debugs(93, 4, "(AS) chunk is: " << string(document, len)); + htmlParseChunk(parser, document, len, partial ? 0 : 1); +} + +void +HTMLAnalysisStream::start_element_handler(void *userData, + const xmlChar * xname, const xmlChar ** xattr) +{ + const char *name = reinterpret_cast(xname); + const char **attr = reinterpret_cast(xattr); + + if (!name || !attr) + return; + + HTMLAnalysisStream *self = static_cast(userData); + + // Example: + // + if (strcasecmp(name, "img") == 0) { + while (*attr) { + if (strcasecmp(*attr, "src") == 0 && *(attr + 1)) { + self->prefetch(*(attr + 1), false); + return; + } + + attr += 2; + } + + return; + } + + // Example: + // + //