--------------------- PatchSet 1239 Date: 2005/02/25 05:43:31 Author: nlewycky Branch: prefetching Tag: (none) Log: * Make address check test on client's IP address when prefetching. * Generalize clientBeginRequest to take arbitrary flags in a struct. * Rewrite PrefetchStream::bufferData, based off the right esi function this time: - Fixes infinite looping problem. * Use node->readBuffer, not node->next()->readBuffer in HTMLAnalysisStream. * Make it compile even with ESI enabled. (Conflict between libxml2 and expat.) Still no idea why it doesn't work. detach() just isn't happening in cases where the server is keep-alive and not closing the connection. Members: src/ESIExpatParser.h:1.2->1.2.8.1 src/ESIInclude.cc:1.5.2.1->1.5.2.2 src/ESILibxml2Parser.h:1.1->1.1.4.1 src/HTMLAnalysisStream.cc:1.1.2.2->1.1.2.3 src/HTMLAnalysisStream.h:1.1.2.1->1.1.2.2 src/PrefetchStream.cc:1.1.2.4->1.1.2.5 src/client_side_request.cc:1.33.2.1->1.33.2.2 src/client_side_request.h:1.17.6.1->1.17.6.2 Index: squid3/src/ESIExpatParser.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ESIExpatParser.h,v retrieving revision 1.2 retrieving revision 1.2.8.1 diff -u -r1.2 -r1.2.8.1 --- squid3/src/ESIExpatParser.h 5 Aug 2003 02:12:48 -0000 1.2 +++ squid3/src/ESIExpatParser.h 25 Feb 2005 05:43:31 -0000 1.2.8.1 @@ -1,5 +1,5 @@ /* - * $Id: ESIExpatParser.h,v 1.2 2003/08/05 02:12:48 squidadm Exp $ + * $Id: ESIExpatParser.h,v 1.2.8.1 2005/02/25 05:43:31 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -36,6 +36,11 @@ #include "ESIParser.h" #include "expat.h" +#ifdef XMLCALL +#define EXPAT_XMLCALL XMLCALL +#undef XMLCALL +#endif + class ESIExpatParser : public ESIParser { Index: squid3/src/ESIInclude.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ESIInclude.cc,v retrieving revision 1.5.2.1 retrieving revision 1.5.2.2 diff -u -r1.5.2.1 -r1.5.2.2 --- squid3/src/ESIInclude.cc 12 Jan 2005 21:58:27 -0000 1.5.2.1 +++ squid3/src/ESIInclude.cc 25 Feb 2005 05:43:31 -0000 1.5.2.2 @@ -1,6 +1,6 @@ /* - * $Id: ESIInclude.cc,v 1.5.2.1 2005/01/12 21:58:27 nlewycky Exp $ + * $Id: ESIInclude.cc,v 1.5.2.2 2005/02/25 05:43:31 nlewycky Exp $ * * DEBUG: section 86 ESI processing * AUTHOR: Robert Collins @@ -331,7 +331,7 @@ debug (86,5)("ESIIncludeStart: Starting subrequest with url '%s'\n", tempUrl); - if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ, true)) { + if (clientBeginRequest(METHOD_GET, tempUrl, esiBufferRecipient, esiBufferDetach, stream.getRaw(), &tempheaders, stream->localbuffer->buf, HTTP_REQBUF_SZ, (ClientHttpRequest::flags_type){1,0,0,0,0}, no_addr)) { debug (86,0) ("starting new ESI subrequest failed\n"); } Index: squid3/src/ESILibxml2Parser.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ESILibxml2Parser.h,v retrieving revision 1.1 retrieving revision 1.1.4.1 diff -u -r1.1 -r1.1.4.1 --- squid3/src/ESILibxml2Parser.h 10 Dec 2004 03:13:52 -0000 1.1 +++ squid3/src/ESILibxml2Parser.h 25 Feb 2005 05:43:31 -0000 1.1.4.1 @@ -1,5 +1,5 @@ /* - * $Id: ESILibxml2Parser.h,v 1.1 2004/12/10 03:13:52 squidadm Exp $ + * $Id: ESILibxml2Parser.h,v 1.1.4.1 2005/02/25 05:43:31 nlewycky Exp $ * * AUTHOR: Joachim Bauch (mail@joachim-bauch.de) * @@ -51,6 +51,11 @@ #include #include +#ifdef XMLCALL +#define LIBXML2_XMLCALL XMLCALL +#undef XMLCALL +#endif + #ifdef OLD_FREE #define free OLD_FREE #endif Index: squid3/src/HTMLAnalysisStream.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/Attic/HTMLAnalysisStream.cc,v retrieving revision 1.1.2.2 retrieving revision 1.1.2.3 diff -u -r1.1.2.2 -r1.1.2.3 --- squid3/src/HTMLAnalysisStream.cc 28 Jan 2005 19:47:19 -0000 1.1.2.2 +++ squid3/src/HTMLAnalysisStream.cc 25 Feb 2005 05:43:31 -0000 1.1.2.3 @@ -1,6 +1,6 @@ /* - * $Id: HTMLAnalysisStream.cc,v 1.1.2.2 2005/01/28 19:47:19 nlewycky Exp $ + * $Id: HTMLAnalysisStream.cc,v 1.1.2.3 2005/02/25 05:43:31 nlewycky Exp $ * * DEBUG: section 93 HTML parsing and fetching * AUTHOR: Nick Lewycky @@ -59,20 +59,24 @@ HttpReply *reply, StoreIOBuffer buffer) { assert(node->data.getRaw()); - debugs(93, 1, "bufferData"); - clientStreamCallback(node, req, reply, buffer); + debugs(93, 1, "(AS) bufferData"); + HTMLAnalysisStream::Pointer self = dynamic_cast(node->data.getRaw()); if (!self.getRaw()) { /* detach time. but if it's not my node->data, whose is it? */ + debugs(93, 1, "(AS) this makes no sense at all!"); return; } - //self->parse(buffer.data, buffer.length, 1 /*?*/); - self->parse(node->next()->readBuffer.data + (reply?reply->hdr_sz:0), - node->next()->readBuffer.length - (reply?reply->hdr_sz:0), - 1); + debugs(93, 1, "(AS) length: " << buffer.length << + ", offset: " << buffer.offset); + self->parse(buffer.data, buffer.length, 1); + node->readBuffer.offset += buffer.length; + + //clientStreamCallback(node, req, reply, node->readBuffer); // detaches + clientStreamCallback(node, req, reply, buffer); } void @@ -81,22 +85,24 @@ HTMLAnalysisStream::Pointer self = dynamic_cast(node->data.getRaw()); - debugs(93, 1, "before read"); - clientStreamRead(node, req, /*self->buffer*/ node->next()->readBuffer); - debugs(93, 1, "after read"); + if (!req) return; + + debugs(93, 1, "(AS) before read"); + clientStreamRead(node, req, node->readBuffer); + debugs(93, 1, "(AS) after read"); } void htmlStreamDetach(clientStreamNode *node, ClientHttpRequest *req) { - debugs(93, 1, "detach"); - clientStreamDetach(node, req); + debugs(93, 1, "(AS) detach"); + clientStreamDetach(node, req); } clientStream_status_t htmlStreamStatus(clientStreamNode *node, ClientHttpRequest *req) { - debugs(93, 1, "status"); + debugs(93, 1, "(AS) status"); return clientStreamStatus(node, req); } @@ -129,8 +135,8 @@ void HTMLAnalysisStream::parse(const char *document, size_t len, bool partial) { - debugs(93, 1, "chunk to parse, " << len << " long " << (partial?"partial":"")); - debugs(93, 1, "chunk is: " << string(document, len)); + debugs(93, 1, "(AS) chunk to parse, " << len << " long " << (partial?"partial":"")); + debugs(93, 1, "(AS) chunk is: " << string(document, len)); htmlParseChunk(parser, document, len, partial ? 0 : 1); } Index: squid3/src/HTMLAnalysisStream.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/Attic/HTMLAnalysisStream.h,v retrieving revision 1.1.2.1 retrieving revision 1.1.2.2 diff -u -r1.1.2.1 -r1.1.2.2 --- squid3/src/HTMLAnalysisStream.h 26 Jan 2005 19:51:33 -0000 1.1.2.1 +++ squid3/src/HTMLAnalysisStream.h 25 Feb 2005 05:43:31 -0000 1.1.2.2 @@ -53,8 +53,6 @@ const HttpRequest *request; - StoreIOBuffer buffer; - htmlParserCtxtPtr parser; std::string relative_url; Index: squid3/src/PrefetchStream.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/Attic/PrefetchStream.cc,v retrieving revision 1.1.2.4 retrieving revision 1.1.2.5 diff -u -r1.1.2.4 -r1.1.2.5 --- squid3/src/PrefetchStream.cc 23 Feb 2005 14:04:48 -0000 1.1.2.4 +++ squid3/src/PrefetchStream.cc 25 Feb 2005 05:43:31 -0000 1.1.2.5 @@ -1,6 +1,6 @@ /* - * $Id: PrefetchStream.cc,v 1.1.2.4 2005/02/23 14:04:48 nlewycky Exp $ + * $Id: PrefetchStream.cc,v 1.1.2.5 2005/02/25 05:43:31 nlewycky Exp $ * * DEBUG: section 93 HTML parsing and fetching * AUTHOR: Nick Lewycky @@ -95,7 +95,7 @@ // Refuse re-download negatively cached entries ... by refusing to // fetch any entry found in the cache. - if (storeGetPublic(url, METHOD_GET) != NULL) { + if (storeGetPublic(url, METHOD_GET)) { debugs(93, 1, "No need to prefetch " << url); return; } @@ -122,7 +122,8 @@ // instead of the IP causing the prefetch. if (clientBeginRequest(METHOD_GET, url, PrefetchStream::BufferData, PrefetchStream::Detach, stream.getRaw(), &tempheaders, - stream->discard_buffer, /*256*/HTTP_REQBUF_SZ, false)) { + stream->discard_buffer, /*256*/HTTP_REQBUF_SZ, + (ClientHttpRequest::flags_type){0,0,0,0,0}, no_addr)) { debugs(93, 1, "Failed to prefetch " << url); return; } @@ -146,33 +147,40 @@ dynamic_cast(node->data.getRaw())->detach(node, req); } -/* OH NO! This isn't supposed to be akin to esiStreamRead, it's - actually esiProcessStream! Well that explains a lot. - NOTE: remember to review this code and make sure I've whacked the - stupid out of it. -*/ +/* based on esiBufferRecipient */ void PrefetchStream::bufferData(clientStreamNode *node, ClientHttpRequest *req, HttpReply *reply, StoreIOBuffer buffer) { - debugs(93, 1, "bufferData"); + debugs(93, 1, "(PS) bufferData"); //debugs(93, 1, "reply " << reply << " body " << string(buffer.data, buffer.length)); - assert(req); + assert(!req->getConn().getRaw()); + + if (req->out.offset) + assert(!reply); + else { + if (reply) { + if (reply->sline.status != HTTP_OK) { + debugs(93, 1, "Aborting on non-\"HTTP OK\""); + httpReplyDestroy(reply); + httpRequestFree(req); + return; + } +#if HEADERS_LOG + headersLog(0, 0, req->request->method, reply); +#endif - if (req->out.offset && reply) { - if (reply->sline.status != HTTP_OK) { - debugs(93, 1, "Aborting on non-\"HTTP OK\""); httpReplyDestroy(reply); - httpRequestFree(req); - return; + reply = NULL; } - httpReplyDestroy(reply); - reply = NULL; } if (buffer.data && buffer.length) + { + debugs(93, 1, "advancing"); req->out.offset += buffer.length; + } if (!reply && !buffer.data && !buffer.length) { debugs(93, 1, "EOF / Read error / aborted entry"); @@ -180,36 +188,41 @@ return; } + if (clientHttpRequestStatus(-1, req)) { + debugs(93, 1, "XXX some sort of weird overflow condition?"); + node->data = NULL; + return; + } + switch (clientStreamStatus(node, req)) { case STREAM_UNPLANNED_COMPLETE: debugs(93, 1, "stream_unplanned_complete"); - detach(node, req); + httpRequestFree(req); return; case STREAM_COMPLETE: debugs(93, 1, "stream_complete"); - detach(node, req); + httpRequestFree(req); return; case STREAM_FAILED: debugs(93, 1, "stream_failed"); - detach(node, req); + httpRequestFree(req); return; case STREAM_NONE: debugs(93, 1, "stream_none"); - break; + debugs(93, 1, "read"); + /*clientStreamRead(node, req, buffer);*/ + debugs(93, 1, "after read"); + return; default: debugs(93, 1, "default"); return; } - - debugs(93, 1, "read"); - clientStreamRead(node, req, buffer); - debugs(93, 1, "after read"); } void PrefetchStream::detach(clientStreamNode *node, ClientHttpRequest *req) { - debugs(93, 1, "detach"); + debugs(93, 1, "(PS) detach"); pending.erase(pending_element); clientStreamDetach(node, req); } Index: squid3/src/client_side_request.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/client_side_request.cc,v retrieving revision 1.33.2.1 retrieving revision 1.33.2.2 diff -u -r1.33.2.1 -r1.33.2.2 --- squid3/src/client_side_request.cc 12 Jan 2005 21:58:29 -0000 1.33.2.1 +++ squid3/src/client_side_request.cc 25 Feb 2005 05:43:31 -0000 1.33.2.2 @@ -1,6 +1,6 @@ /* - * $Id: client_side_request.cc,v 1.33.2.1 2005/01/12 21:58:29 nlewycky Exp $ + * $Id: client_side_request.cc,v 1.33.2.2 2005/02/25 05:43:31 nlewycky Exp $ * * DEBUG: section 85 Client-side Request Routines * AUTHOR: Robert Collins (Originally Duane Wessels in client_side.c) @@ -264,7 +264,7 @@ int /* returns nonzero on failure */ clientBeginRequest(method_t method, char const *url, CSCB * streamcallback, CSD * streamdetach, ClientStreamData streamdata, HttpHeader const *header, - char *tailbuf, size_t taillen, bool accel) + char *tailbuf, size_t taillen, ClientHttpRequest::flags_type flags, in_addr addr) { size_t url_sz; HttpVersion http_ver (1, 0); @@ -285,8 +285,7 @@ /* make it visible in the 'current acctive requests list' */ dlinkAdd(http, &http->active, &ClientActiveRequests); /* Set flags */ - /* TODO: accept flags */ - http->flags.accel = accel; + http->flags = flags; /* allow size for url rewriting */ url_sz = strlen(url) + Config.appendDomainLen + 5; http->uri = (char *)xcalloc(url_sz, 1); @@ -327,7 +326,7 @@ /* Internally created requests cannot have bodies today */ request->content_length = 0; - request->client_addr = no_addr; + request->client_addr = addr; request->my_addr = no_addr; /* undefined for internal requests */ Index: squid3/src/client_side_request.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/client_side_request.h,v retrieving revision 1.17.6.1 retrieving revision 1.17.6.2 diff -u -r1.17.6.1 -r1.17.6.2 --- squid3/src/client_side_request.h 12 Jan 2005 21:58:29 -0000 1.17.6.1 +++ squid3/src/client_side_request.h 25 Feb 2005 05:43:31 -0000 1.17.6.2 @@ -1,6 +1,6 @@ /* - * $Id: client_side_request.h,v 1.17.6.1 2005/01/12 21:58:29 nlewycky Exp $ + * $Id: client_side_request.h,v 1.17.6.2 2005/02/25 05:43:31 nlewycky Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -40,7 +40,6 @@ #include "AccessLogEntry.h" /* client_side_request.c - client side request related routines (pure logic) */ -extern int clientBeginRequest(method_t, char const *, CSCB *, CSD *, ClientStreamData, HttpHeader const *, char *, size_t, bool); class MemObject; @@ -99,7 +98,7 @@ HttpVersion http_ver; AccessLogEntry al; - struct + struct flags_type { unsigned int accel: @@ -143,6 +142,8 @@ ConnStateData::Pointer conn_; }; +extern int clientBeginRequest(method_t, char const *, CSCB *, CSD *, ClientStreamData, HttpHeader const *, char *, size_t, ClientHttpRequest::flags_type, in_addr = no_addr); + /* client http based routines */ SQUIDCEXTERN char *clientConstructTraceEcho(clientHttpRequest *); SQUIDCEXTERN ACLChecklist *clientAclChecklistCreate(const acl_access * acl,ClientHttpRequest * http);