--------------------- PatchSet 3882 Date: 2006/10/25 23:06:15 Author: rousskov Branch: squid3-icap Tag: (none) Log: - When ICAP transactions fail to connect to the service many times, the service is suspended until the next OPTIONS update. The limit is currently hard-coded to 10. Suspended service is a down service and will be skipped by the ACL service selection algorithm. - Allow ICAP transactions to query service state if the service has valid and fresh options, even if the service is not up. The service may be down because it has been invalidated on reconfigure or because it was suspended. There is probably no good reason to terminate existing service users when that happens. They still have a chance. For example, the service may not accept new connections due to overload but may still serve old connections just fine. - Documented that the 'bypass' flag is currently ignored. - Polished state reporting. - Renamed private lastUpdate data member into theLastUpdate for consistency reasons. - Removed unused "unreachable" data member. Members: src/ICAP/ICAPServiceRep.cc:1.1.2.7->1.1.2.8 src/ICAP/ICAPServiceRep.h:1.1.2.6->1.1.2.7 Index: squid3/src/ICAP/ICAPServiceRep.cc =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ICAP/ICAPServiceRep.cc,v retrieving revision 1.1.2.7 retrieving revision 1.1.2.8 diff -u -r1.1.2.7 -r1.1.2.8 --- squid3/src/ICAP/ICAPServiceRep.cc 25 Oct 2006 17:43:52 -0000 1.1.2.7 +++ squid3/src/ICAP/ICAPServiceRep.cc 25 Oct 2006 23:06:15 -0000 1.1.2.8 @@ -12,9 +12,14 @@ CBDATA_CLASS_INIT(ICAPServiceRep); +// XXX: move to squid.conf +const int ICAPServiceRep::TheSessionFailureLimit = 10; + ICAPServiceRep::ICAPServiceRep(): method(ICAP::methodNone), - point(ICAP::pointNone), port(-1), bypass(false), unreachable(false), - theOptions(NULL), lastUpdate(0), waiting(false), notifying(false), + point(ICAP::pointNone), port(-1), bypass(false), + theOptions(NULL), theLastUpdate(0), + theSessionFailures(0), isSuspended(0), + waiting(false), notifying(false), updateScheduled(false), self(NULL) {} @@ -170,15 +175,43 @@ // TODO: it would be nice to invalidate cbdata(this) when not destroyed } +void ICAPServiceRep::noteFailure() { + ++theSessionFailures; + debugs(93,4, "ICAPService failure " << theSessionFailures << + ", out of " << TheSessionFailureLimit << " allowed"); + + if (theSessionFailures > TheSessionFailureLimit) + suspend("too many failures"); + + // TODO: Should bypass setting affect how much Squid tries to talk to + // the ICAP service that is currently unusable and is likely to remain + // so for some time? The current code says "no". Perhaps the answer + // should be configurable. +} + +void ICAPServiceRep::suspend(const char *reason) { + if (isSuspended) { + debugs(93,4, "keeping ICAPService suspended, also for " << reason); + } else { + const bool wasOk = !probed() || up(); + isSuspended = reason; + debugs(93,2, "suspending ICAPService for " << reason); + announceStatusChange(wasOk, true); + } +} + bool ICAPServiceRep::probed() const { - return lastUpdate != 0; + return theLastUpdate != 0; +} + +bool ICAPServiceRep::hasOptions() const { + return theOptions && theOptions->valid() && theOptions->fresh(); } bool ICAPServiceRep::up() const { - return self != NULL && theOptions && - theOptions->valid() && theOptions->fresh(); + return self != NULL && !isSuspended && hasOptions(); } bool ICAPServiceRep::broken() const @@ -188,13 +221,13 @@ bool ICAPServiceRep::wantsUrl(const String &urlPath) const { - Must(up()); + Must(hasOptions()); return theOptions->transferKind(urlPath) != ICAPOptions::xferIgnore; } bool ICAPServiceRep::wantsPreview(const String &urlPath, size_t &wantedSize) const { - Must(up()); + Must(hasOptions()); if (theOptions->preview < 0) return false; @@ -209,7 +242,7 @@ bool ICAPServiceRep::allows204() const { - Must(up()); + Must(hasOptions()); return true; // in the future, we may have ACLs to prevent 204s } @@ -309,7 +342,9 @@ delete theOptions; theOptions = newOptions; - lastUpdate = squid_curtime; + theSessionFailures = 0; + isSuspended = 0; + theLastUpdate = squid_curtime; checkOptions(); announceStatusChange(wasOk, true); @@ -364,11 +399,10 @@ if (wasOk == up()) // no significant changes to announce return; + const char *what = bypass ? "essential" : "optional"; + const char *state = wasOk ? "down" : "up"; const int level = important ? 1 : 2; - if (wasOk) - debugs(93,level, "ICAP service is down: " << uri); - else - debugs(93,level, "ICAP service is now up: " << uri); + debugs(93,level, what << " ICAP service is " << state << ": " << uri); } static @@ -438,8 +472,8 @@ when = squid_curtime; const int minUpdateGap = 1*60; // seconds - if (when < lastUpdate + minUpdateGap) - when = lastUpdate + minUpdateGap; + if (when < theLastUpdate + minUpdateGap) + when = theLastUpdate + minUpdateGap; // TODO: keep the time of the last update to prevet too-frequent updates @@ -460,20 +494,26 @@ buf.reset(); buf.append("[", 1); - if (!self) - buf.append("!", 1); - if (up()) buf.append("up", 2); else buf.append("down", 4); + if (!self) + buf.append(",gone", 5); + if (waiting) buf.append(",wait", 5); if (notifying) buf.append(",notif", 6); + if (theSessionFailures > 0) + buf.Printf(",F%d", theSessionFailures); + + if (isSuspended) + buf.append(",susp", 5); + buf.append("]", 1); buf.terminate(); Index: squid3/src/ICAP/ICAPServiceRep.h =================================================================== RCS file: /cvsroot/squid-sf//squid3/src/ICAP/ICAPServiceRep.h,v retrieving revision 1.1.2.6 retrieving revision 1.1.2.7 diff -u -r1.1.2.6 -r1.1.2.7 --- squid3/src/ICAP/ICAPServiceRep.h 25 Oct 2006 17:43:52 -0000 1.1.2.6 +++ squid3/src/ICAP/ICAPServiceRep.h 25 Oct 2006 23:06:15 -0000 1.1.2.7 @@ -1,6 +1,6 @@ /* - * $Id: ICAPServiceRep.h,v 1.1.2.6 2006/10/25 17:43:52 rousskov Exp $ + * $Id: ICAPServiceRep.h,v 1.1.2.7 2006/10/25 23:06:15 rousskov Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -95,6 +95,8 @@ bool wantsPreview(const String &urlPath, size_t &wantedSize) const; bool allows204() const; + void noteFailure(); // called by transactions to report service failure + public: String key; ICAP::Method method; @@ -106,9 +108,8 @@ int port; String resource; - // non-options flags; TODO: check that both are used. + // XXX: use it when selecting a service and handling ICAP errors! bool bypass; - bool unreachable; public: // treat these as private, they are for callbacks only void noteTimeToUpdate(); @@ -129,7 +130,11 @@ Clients theClients; // all clients waiting for a call back ICAPOptions *theOptions; - time_t lastUpdate; // time the options were last updated + time_t theLastUpdate; // time the options were last updated + + static const int TheSessionFailureLimit; + int theSessionFailures; + const char *isSuspended; // also stores suspension reason for debugging bool waiting; // for an OPTIONS transaction to finish bool notifying; // may be true in any state except for the initial @@ -139,6 +144,9 @@ ICAP::Method parseMethod(const char *) const; ICAP::VectPoint parseVectPoint(const char *) const; + void suspend(const char *reason); + + bool hasOptions() const; bool needNewOptions() const; void scheduleUpdate();