This patch is generated from the epoll-2_5 branch of s2_5 in squid Mon May 15 00:14:59 2006 GMT See http://devel.squid-cache.org/ Index: squid/acconfig.h diff -u squid/acconfig.h:1.13.2.4 squid/acconfig.h:1.13.2.4.10.1 --- squid/acconfig.h:1.13.2.4 Wed Jun 9 07:05:51 2004 +++ squid/acconfig.h Mon Apr 25 14:22:44 2005 @@ -402,6 +402,11 @@ */ #undef X_ACCELERATOR_VARY +/* + * Supports epoll + */ +#undef HAVE_EPOLL + @BOTTOM@ #endif /* __CONFIGURE_H__ */ Index: squid/configure.in diff -u squid/configure.in:1.42.2.82 squid/configure.in:1.42.2.59.4.10 --- squid/configure.in:1.42.2.82 Mon Mar 13 19:16:32 2006 +++ squid/configure.in Thu Mar 16 15:59:04 2006 @@ -706,6 +706,26 @@ esac ]) +dnl Enable epoll() +AC_ARG_ENABLE(epoll, +[ --enable-epoll Enable epoll() instead of poll() or select(). + epoll() is best where available, but must be + explicitly set at the moment. + --disable-epoll Disable the use of epoll().], +[ + case "$enableval" in + yes) + echo "Forcing epoll() to be enabled" + ac_cv_func_epoll='yes' + ac_cv_func_poll='no' + ;; + no) + echo "Forcing epoll() to be disabled" + ac_cv_func_epoll='no' + ;; + esac +]) + dnl Disable HTTP violations AC_ARG_ENABLE(http-violations, [ --disable-http-violations @@ -1672,6 +1692,21 @@ AC_CHECK_LIB(rt, aio_read) fi +LIB_EPOLL='' +dnl Check for libepoll +if test "$ac_cv_func_epoll" = "yes"; then + AC_CHECK_FUNCS(epoll_ctl,[AC_DEFINE_UNQUOTED(HAVE_EPOLL, 1)], + [AC_CHECK_LIB(epoll, epoll_ctl,[AC_DEFINE_UNQUOTED(HAVE_EPOLL, 1)] LIB_EPOLL="-lepoll", + [echo "Error - no epoll found"; + echo "Try running 'sh ./scripts/get_epoll-lib.sh'"; + echo "then run configure again"; + exit -1], + [-L ./lib] + )] + ) +fi +AC_SUBST(LIB_EPOLL) + dnl -lintl is needed on SCO version 3.2v4.2 for strftime() dnl Robert Side dnl Mon, 18 Jan 1999 17:48:00 GMT Index: squid/scripts/get_epoll-lib.sh diff -u /dev/null squid/scripts/get_epoll-lib.sh:1.1.2.1 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid/scripts/get_epoll-lib.sh Thu Mar 17 18:34:34 2005 @@ -0,0 +1,76 @@ +#!/bin/bash + +set -e + +EPOLL_URL="http://www.xmailserver.org/linux-patches/epoll-lib-0.11.tar.gz" +EPOLL_FILE="epoll-lib-0.11.tar.gz" +EPOLL_DIR="epoll-lib-0.11" + +KERNELSOURCE=$1 + +if [ "$0" != "./scripts/get_epoll-lib.sh" ] +then + echo + echo "You must run this program from the root squid source directory" + echo "ie /usr/src/squid-2.5.STABLE9" + echo "to run type:" + echo + echo "sh ./scripts/get_epoll-lib.sh" + echo + echo + echo + exit -1; +fi + +if [ ! -f $EPOLL_FILE ] +then + if [ ! -x "`which wget`" ] + then + echo + echo "This script uses wget to download the source file" + echo "Please either install wget, or download libepoll from:" + echo "$EPOLL_URL" + echo + echo + echo + exit -1 + fi + + wget $EPOLL_URL +fi + +if [ ! -d $EPOLL_DIR ] +then + tar -zxvf $EPOLL_FILE +fi + +pushd $EPOLL_DIR + set +e + if [ -z "$KERNELSOURCE" ] + then + make lib/libepoll.a PREFIX=.. + else + make lib/libepoll.a PREFIX=.. KERNELDIR=$KERNELSOURCE + fi + + if [ $? -ne 0 ] + then + echo + echo "epoll make failed" + echo "You may need to run $0 /usr/src/linux-2.6" + echo "(or give the correct path to a 2.6 kernel source)" + echo + popd + exit -1 + fi + + if [ ! -d ../include/sys ] + then + set -e + mkdir ../include/sys + set +e + fi + + make install PREFIX=.. 2>/dev/null +popd + Index: squid/src/Makefile.am diff -u squid/src/Makefile.am:1.13.2.11 squid/src/Makefile.am:1.13.2.9.10.3 --- squid/src/Makefile.am:1.13.2.11 Wed Sep 28 19:13:47 2005 +++ squid/src/Makefile.am Sun Oct 9 18:06:24 2005 @@ -227,6 +227,7 @@ @SNMPLIB@ \ @LIB_MALLOC@ \ @SSLLIB@ \ + @LIB_EPOLL@ \ -lmiscutil \ @XTRA_LIBS@ Index: squid/src/client_side.c diff -u squid/src/client_side.c:1.47.2.76 squid/src/client_side.c:1.47.2.59.2.7 --- squid/src/client_side.c:1.47.2.76 Fri Mar 10 19:16:31 2006 +++ squid/src/client_side.c Thu Mar 16 15:59:12 2006 @@ -2963,10 +2963,35 @@ { fde *F = &fd_table[fd]; ConnStateData *conn = data; - if (conn->body.size_left && !F->flags.socket_eof) - return conn->in.offset >= conn->in.size - 1; + if (conn->body.size_left && !F->flags.socket_eof) { + if(conn->in.offset >= conn->in.size - 1) { +#if HAVE_EPOLL + /* The commResumeFD function is called in this file */ + conn->in.clientfd=fd; + commDeferFD(fd); +#endif + return 1; + } + else + { + return 0; + } + } else - return conn->defer.until > squid_curtime; + { + if (conn->defer.until > squid_curtime ) { +#if HAVE_EPOLL + /* This is a second resolution timer, so commEpollBackon will + handle the resume for this defer call */ + commDeferFD(fd); +#endif + return 1; + } + else + { + return 0; + } + } } static void @@ -3368,6 +3393,15 @@ conn->body.size_left -= size; /* Move any remaining data */ conn->in.offset -= size; +#if HAVE_EPOLL + /* Resume the fd if necessary */ + if ( conn->in.clientfd ) { + if(conn->in.offset < conn->in.size - 1) { + commResumeFD(conn->in.clientfd); + conn->in.clientfd=0; + } + } +#endif if (conn->in.offset > 0) xmemmove(conn->in.buf, conn->in.buf + size, conn->in.offset); /* Remove request link if this is the last part of the body, as Index: squid/src/comm.c diff -u squid/src/comm.c:1.18.6.6 squid/src/comm.c:1.18.6.5.4.4 --- squid/src/comm.c:1.18.6.6 Sat Sep 10 19:13:22 2005 +++ squid/src/comm.c Sun Oct 9 18:06:24 2005 @@ -374,6 +374,11 @@ if (F->flags.nodelay) commSetTcpNoDelay(cs->fd); #endif + +#if HAVE_EPOLL + // If we are using epoll(), we need to make sure that this fd will be polled + commSetSelect(cs->fd,0,NULL,NULL,0); +#endif if (Config.tcpRcvBufsz > 0) commSetTcpRcvbuf(cs->fd, Config.tcpRcvBufsz); return 1; @@ -699,6 +704,8 @@ F->defer_data = data; } +/* Epoll redefines this function in comm_select.c */ +#if !HAVE_EPOLL void commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout) { @@ -719,6 +726,7 @@ if (timeout) F->timeout = squid_curtime + timeout; } +#endif void comm_add_close_handler(int fd, PF * handler, void *data) Index: squid/src/comm_select.c diff -u squid/src/comm_select.c:1.8.6.6 squid/src/comm_select.c:1.8.6.6.22.6 --- squid/src/comm_select.c:1.8.6.6 Sun May 11 19:14:21 2003 +++ squid/src/comm_select.c Tue Jan 10 10:14:03 2006 @@ -36,6 +36,8 @@ static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */ +#if !HAVE_EPOLL + #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif @@ -133,15 +135,6 @@ #define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR)) static int -commDeferRead(int fd) -{ - fde *F = &fd_table[fd]; - if (F->defer_check == NULL) - return 0; - return F->defer_check(fd, F->defer_data); -} - -static int fdIsIcp(int fd) { if (fd == theInIcpConnection) @@ -1024,33 +1017,6 @@ #endif static void -checkTimeouts(void) -{ - int fd; - fde *F = NULL; - PF *callback; - for (fd = 0; fd <= Biggest_FD; fd++) { - F = &fd_table[fd]; - if (!F->flags.open) - continue; - if (F->timeout == 0) - continue; - if (F->timeout > squid_curtime) - continue; - debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd); - if (F->timeout_handler) { - debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd); - callback = F->timeout_handler; - F->timeout_handler = NULL; - callback(fd, F->timeout_data); - } else { - debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd); - comm_close(fd); - } - } -} - -static void commIncomingStats(StoreEntry * sentry) { StatCounters *f = &statCounter; @@ -1106,6 +1072,442 @@ } } +#else /* HAVE_EPOLL */ +/* epoll structs */ +static int kdpfd; +static struct epoll_event *pevents; + +/* Array to keep track of backed off filedescriptors */ +static int backoff_fds[FD_SETSIZE]; + +static void checkTimeouts(void); +static int commDeferRead(int fd); + +static const char* +epolltype_atoi(int x) +{ + switch(x) { + + case EPOLL_CTL_ADD: + return "EPOLL_CTL_ADD"; + + case EPOLL_CTL_DEL: + return "EPOLL_CTL_DEL"; + + case EPOLL_CTL_MOD: + return "EPOLL_CTL_MOD"; + + default: + return "UNKNOWN_EPOLLCTL_OP"; + } +} + +/* Bring all fds back online */ +void +commEpollBackon() { + fde *F; + int i; + int fd; + int j=0; + + for(i=0;iepoll_backoff)) { + continue; + } + + /* If the fd is still meant to be backed off, add it to the start of + the list and continue */ + if(commDeferRead(fd) == 1) { + backoff_fds[j++]=fd; + continue; + } + + debug(5, 4) ("commEpollBackon: fd=%d\n",fd); + + /* Resume operations for this fd */ + commResumeFD(fd); + } + else + { + /* Once we hit a non-backed off FD, we can break */ + break; + } + } +} + + +/* Back off on the next epoll for the given fd */ +void +commEpollBackoff(int fd) { + commDeferFD(fd); +} + +/* Defer reads from this fd */ +void +commDeferFD(int fd) { + fde *F = &fd_table[fd]; + struct epoll_event ev; + int epoll_ctl_type = 0; + int i; + + /* Return if the fd is already backed off */ + if(F->epoll_backoff) { + return; + } + + for(i=0;iepoll_state); + assert(fd >= 0); + assert(F->flags.open); + + /* set up ev struct */ + ev.events = 0; + ev.data.fd = fd; + + /* If we were only waiting for reads, delete the fd, otherwise remove the + read event */ + if(F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) { + epoll_ctl_type = EPOLL_CTL_DEL; + } + else + { + epoll_ctl_type = EPOLL_CTL_MOD; + ev.events = (F->epoll_state - EPOLLIN); + } + debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n",epolltype_atoi(epoll_ctl_type),fd,F->epoll_state); + + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + /* If an error occurs, log it */ + debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n",epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } + else + { + backoff_fds[i]=fd; + F->epoll_backoff=1; + F->epoll_state=ev.events; + } +} + +/* Resume reading from the given fd */ +void +commResumeFD(int fd) { + struct epoll_event ev; + int epoll_ctl_type = 0; + fde *F; + + F=&fd_table[fd]; + + /* If the fd has been modified, do nothing and remove the flag */ + if(!(F->read_handler) || !(F->epoll_backoff)) { + debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n",fd,F->read_handler,F->epoll_backoff); + F->epoll_backoff=0; + return; + } + + /* we need to re-add the fd to the epoll list with EPOLLIN set */ + ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR; + ev.data.fd = fd; + + /* If epoll_state is not set, then this fd is only waiting for + reads, and needs adding, otherwise we mod it to add EPOLLIN*/ + if(!(F->epoll_state)) { + epoll_ctl_type = EPOLL_CTL_ADD; + } + else + { + epoll_ctl_type = EPOLL_CTL_MOD; + } + debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n",epolltype_atoi(epoll_ctl_type),fd); + + /* Try and add the fd back into the epoll struct */ + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + /* If an error occurs, log */ + debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n",epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } + else + { + F->epoll_backoff=0; + F->epoll_state=ev.events; + } +} +void +comm_select_init() +{ + int i; + pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event)); + if (!pevents) { + fatalf("comm_select_init: xmalloc() failed: %s\n",xstrerror()); + } + + kdpfd = epoll_create(SQUID_MAXFD); + + if (kdpfd < 0) { + fatalf("comm_select_init: epoll_create(): %s\n",xstrerror()); + } + + for (i = 0; i < FD_SETSIZE; i++) { + backoff_fds[i]=0; + } +} + +void +commUpdateReadBits(int fd, PF *handler) +{ + /* Not imlpemented */ +} + +void +commUpdateWriteBits(int fd, PF *handler) +{ + /* Not imlpemented */ +} + +void +commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout) + { + fde *F = &fd_table[fd]; + int epoll_ctl_type = 0; + struct epoll_event ev; + + assert(fd >= 0); + assert(F->flags.open); + debug(5, 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n",fd,type,handler,client_data,timeout); + + ev.events = 0; + ev.data.fd = fd; + + if (type & COMM_SELECT_READ) { + /* Only add the epoll event if the fd is not backed off */ + if (handler && !(F->epoll_backoff)) { + ev.events |= EPOLLIN; + } + + F->read_handler = handler; + F->read_data = client_data; + + // Otherwise, use previously stored value if the fd is not backed off + } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) { + ev.events |= EPOLLIN; + } + + if (type & COMM_SELECT_WRITE) { + if (handler) { + ev.events |= EPOLLOUT; + } + + F->write_handler = handler; + F->write_data = client_data; + + // Otherwise, use previously stored value + } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)){ + ev.events |= EPOLLOUT; + } + + if (ev.events) { + ev.events |= EPOLLHUP | EPOLLERR; + } + + /* If the type is 0, force adding the fd to the epoll set */ + if(!(type)) { + F->epoll_state=0; + } + + if (ev.events != F->epoll_state) { + // If the struct is already in epoll MOD or DEL, else ADD + if (F->epoll_state) { + epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; + } + else + { + epoll_ctl_type = EPOLL_CTL_ADD; + } + + /* Update the state */ + F->epoll_state = ev.events; + + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n", + epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } + } + + if (timeout) + F->timeout = squid_curtime + timeout; +} + +int comm_epoll(int msec) +{ + struct timespec ts; + static time_t last_timeout = 0; + int i; + int num; + int fd; + fde *F; + PF *hdl; + struct epoll_event *cevents; + double timeout = current_dtime + (msec / 1000.0); + + if (msec > MAX_POLL_TIME) + msec = MAX_POLL_TIME; + + debug(50, 3)("comm_epoll: timeout %d\n", msec); + + do { +#if !ALARM_UPDATES_TIME + double start; + getCurrentTime(); + start = current_dtime; +#endif + ts.tv_sec = msec/1000; + ts.tv_nsec = (msec % 1000) * 1000; + + /* Check timeouts once per second */ + if (last_timeout < squid_curtime) { + last_timeout = squid_curtime; + checkTimeouts(); + + /* bring backed off connections back online */ + commEpollBackon(); + } + + /* Check for disk io callbacks */ + storeDirCallback(); + + for (;;) { + statCounter.syscalls.polls++; + num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec); + statCounter.select_loops++; + + if (num >= 0) + break; + + if (ignoreErrno(errno)) + break; + + debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror()); + + return COMM_ERROR; + } + + statHistCount(&statCounter.select_fds_hist, num); + + if (num <= 0) + continue; + + for (i = 0, cevents = pevents; i < num; i++, cevents++) { + fd = cevents->data.fd; + F = &fd_table[fd]; + debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n" + ,fd,cevents->events,F->epoll_state,F->read_handler,F->write_handler); + if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR)) { + if((hdl = F->read_handler) != NULL) { + // If the descriptor is meant to be deferred, don't handle + if(commDeferRead(fd) == 1) { + if(!(F->epoll_backoff)) { + debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n",fd,F->desc); + commEpollBackoff(fd); + } + goto WRITE_EVENT; + } + + debug(5, 8) ("comm_epoll(): Calling read handler on fd=%d\n",fd); + F->read_handler = NULL; + hdl(fd, F->read_data); + statCounter.select_fds++; + if((F->read_handler == NULL) && (F->flags.open)) { + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); + } + } else if(cevents->events & EPOLLIN) { + debug(5, 2) ("comm_epoll(): no read handler for fd=%d",fd); + if(F->flags.open) { + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); + } + } + } + +WRITE_EVENT: + if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) { + if((hdl = F->write_handler) != NULL) { + debug(5,8) ("comm_epoll(): Calling write handler on fd=%d\n",fd); + F->write_handler = NULL; + hdl(fd, F->write_data); + statCounter.select_fds++; + if((F->write_handler == NULL) && (F->flags.open)) { + commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); + } + } else if(cevents->events & EPOLLOUT) { + debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n",fd); + if(F->flags.open) { + commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); + } + } + } + } +#if !ALARM_UPDATES_TIME + getCurrentTime(); + statCounter.select_time += (current_dtime - start); +#endif + return COMM_OK; + } + while (timeout > current_dtime); + + debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime); + return COMM_TIMEOUT; +} +#endif /* HAVE_EPOLL */ + +static int +commDeferRead(int fd) +{ + fde *F = &fd_table[fd]; + if (F->defer_check == NULL) + return 0; + return F->defer_check(fd, F->defer_data); +} + +static void +checkTimeouts(void) +{ + int fd; + fde *F = NULL; + PF *callback; + for (fd = 0; fd <= Biggest_FD; fd++) { + F = &fd_table[fd]; + if (!F->flags.open) + continue; + if (F->timeout == 0) + continue; + if (F->timeout > squid_curtime) + continue; + debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd); + if (F->timeout_handler) { + debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd); + callback = F->timeout_handler; + F->timeout_handler = NULL; + callback(fd, F->timeout_data); + } else { + debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd); + comm_close(fd); + } + } +} + + /* Called by async-io or diskd to speed up the polling */ void comm_quick_poll_required(void) Index: squid/src/enums.h diff -u squid/src/enums.h:1.29.2.18 squid/src/enums.h:1.29.2.14.8.4 --- squid/src/enums.h:1.29.2.18 Fri Nov 11 19:13:48 2005 +++ squid/src/enums.h Thu Mar 16 15:59:12 2006 @@ -510,7 +510,8 @@ ENTRY_NEGCACHED, ENTRY_VALIDATED, ENTRY_BAD_LENGTH, - ENTRY_ABORTED + ENTRY_ABORTED, + ENTRY_DEFER_READ #if UNUSED_CODE ENTRY_DONT_LOG #endif Index: squid/src/fd.c diff -u squid/src/fd.c:1.7.12.1 squid/src/fd.c:1.7.12.1.16.1 --- squid/src/fd.c:1.7.12.1 Sun Dec 14 19:13:47 2003 +++ squid/src/fd.c Wed Mar 16 19:57:44 2005 @@ -84,6 +84,11 @@ assert(F->write_handler == NULL); } debug(51, 3) ("fd_close FD %d %s\n", fd, F->desc); +#if HAVE_EPOLL + /* the epoll code needs to update the descriptor before flags.ope is 0 */ + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); + commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); +#endif F->flags.open = 0; fdUpdateBiggest(fd, 0); Number_FD--; Index: squid/src/forward.c diff -u squid/src/forward.c:1.13.6.16 squid/src/forward.c:1.13.6.13.4.9 --- squid/src/forward.c:1.13.6.16 Fri Mar 10 19:16:31 2006 +++ squid/src/forward.c Sat May 13 22:42:21 2006 @@ -672,8 +672,13 @@ (void) 0; else { int i = delayMostBytesWanted(mem, INT_MAX); - if (0 == i) + if (0 == i) { +#if HAVE_EPOLL + mem->serverfd=fd; + commDeferFD(fd); +#endif return 1; + } /* was: rc = -(rc != INT_MAX); */ else if (INT_MAX == i) rc = 0; @@ -681,6 +686,8 @@ rc = -1; } #endif + if (EBIT_TEST(e->flags, ENTRY_DEFER_READ)) + return 1; if (EBIT_TEST(e->flags, ENTRY_FWD_HDR_WAIT)) return rc; if (EBIT_TEST(e->flags, RELEASE_REQUEST)) { @@ -689,11 +696,23 @@ * is disk clients pending on a too large object being fetched and a * few other corner cases. */ - if (mem->inmem_hi - mem->inmem_lo > SM_PAGE_SIZE + Config.Store.maxInMemObjSize + READ_AHEAD_GAP) + if (fd >= 0 && mem->inmem_hi - mem->inmem_lo > SM_PAGE_SIZE + Config.Store.maxInMemObjSize + READ_AHEAD_GAP) { + EBIT_SET(e->flags, ENTRY_DEFER_READ); +#if HAVE_EPOLL + mem->serverfd=fd; + commDeferFD(fd); +#endif return 1; + } } - if (mem->inmem_hi - storeLowestMemReaderOffset(e) > READ_AHEAD_GAP) + if (fd >= 0 && mem->inmem_hi - storeLowestMemReaderOffset(e) > READ_AHEAD_GAP) { + EBIT_SET(e->flags, ENTRY_DEFER_READ); +#if HAVE_EPOLL + mem->serverfd=fd; + commDeferFD(fd); +#endif return 1; + } return rc; } Index: squid/src/main.c diff -u squid/src/main.c:1.28.6.25 squid/src/main.c:1.28.6.19.2.5 --- squid/src/main.c:1.28.6.25 Mon Jun 27 19:16:51 2005 +++ squid/src/main.c Sun Oct 9 18:06:25 2005 @@ -741,7 +741,9 @@ eventRun(); if ((loop_delay = eventNextTime()) < 0) loop_delay = 0; -#if HAVE_POLL +#if HAVE_EPOLL + switch (comm_epoll(loop_delay)) { +#elif HAVE_POLL switch (comm_poll(loop_delay)) { #else switch (comm_select(loop_delay)) { Index: squid/src/protos.h diff -u squid/src/protos.h:1.41.6.34 squid/src/protos.h:1.41.6.22.4.9 --- squid/src/protos.h:1.41.6.34 Sat Feb 25 19:13:57 2006 +++ squid/src/protos.h Thu Mar 16 15:59:15 2006 @@ -159,6 +159,10 @@ extern int comm_openex(int, int, struct in_addr, u_short, int, unsigned char TOS, const char *); extern u_short comm_local_port(int fd); +#if HAVE_EPOLL +extern void commDeferFD(int fd); +extern void commResumeFD(int fd); +#endif extern void commSetSelect(int, unsigned int, PF *, void *, time_t); extern void comm_add_close_handler(int fd, PF *, void *); extern void comm_remove_close_handler(int fd, PF *, void *); @@ -181,7 +185,9 @@ * comm_select.c */ extern void comm_select_init(void); -#if HAVE_POLL +#if HAVE_EPOLL +extern int comm_epoll(int); +#elif HAVE_POLL extern int comm_poll(int); #else extern int comm_select(int); Index: squid/src/squid.h diff -u squid/src/squid.h:1.13.6.9 squid/src/squid.h:1.13.6.7.10.3 --- squid/src/squid.h:1.13.6.9 Fri Mar 10 19:16:31 2006 +++ squid/src/squid.h Thu Mar 16 15:59:15 2006 @@ -252,6 +252,10 @@ #endif /* HAVE_POLL_H */ #endif /* HAVE_POLL */ +#if HAVE_EPOLL +#include +#endif + #if defined(HAVE_STDARG_H) #include #define HAVE_STDARGS /* let's hope that works everywhere (mj) */ Index: squid/src/ssl.c diff -u squid/src/ssl.c:1.13.6.13 squid/src/ssl.c:1.13.6.7.10.6 --- squid/src/ssl.c:1.13.6.13 Fri Mar 10 19:16:31 2006 +++ squid/src/ssl.c Thu Mar 16 15:59:15 2006 @@ -132,14 +132,18 @@ #if DELAY_POOLS static int -sslDeferServerRead(int fdnotused, void *data) +sslDeferServerRead(int fd, void *data) { SslStateData *s = data; int i = delayBytesWanted(s->delay_id, 0, INT_MAX); if (i == INT_MAX) return 0; - if (i == 0) + if (i == 0) { +#if HAVE_EPOLL + commDeferFD(fd); +#endif return 1; + } return -1; } #endif Index: squid/src/stat.c diff -u squid/src/stat.c:1.13.6.14 squid/src/stat.c:1.13.6.11.4.3 --- squid/src/stat.c:1.13.6.14 Tue Mar 29 18:17:46 2005 +++ squid/src/stat.c Thu Apr 14 18:06:29 2005 @@ -835,7 +835,7 @@ storeAppendPrintf(sentry, "aborted_requests = %f/sec\n", XAVG(aborted_requests)); -#if HAVE_POLL +#if HAVE_POLL || HAVE_EPOLL storeAppendPrintf(sentry, "syscalls.polls = %f/sec\n", XAVG(syscalls.polls)); #else storeAppendPrintf(sentry, "syscalls.selects = %f/sec\n", XAVG(syscalls.selects)); Index: squid/src/store_client.c diff -u squid/src/store_client.c:1.9.6.4 squid/src/store_client.c:1.9.6.2.18.6 --- squid/src/store_client.c:1.9.6.4 Tue Apr 19 19:18:33 2005 +++ squid/src/store_client.c Tue Apr 18 19:32:37 2006 @@ -201,6 +201,12 @@ sc->copy_buf = buf; sc->copy_size = size; sc->copy_offset = copy_offset; + /* If the read is being deferred, run swapout in case this client has the + lowest seen_offset. storeSwapOut() frees the memory and clears the + ENTRY_DEFER_READ bit if necessary */ + if (EBIT_TEST(e->flags, ENTRY_DEFER_READ)) { + storeSwapOut(e); + } storeClientCopy2(e, sc); } @@ -271,6 +277,21 @@ if (e->store_status == STORE_PENDING && sc->seen_offset >= mem->inmem_hi) { /* client has already seen this, wait for more */ debug(20, 3) ("storeClientCopy3: Waiting for more\n"); + + /* If the read is backed off and all clients have seen all the data in + memory, re-poll the fd */ + if ( (EBIT_TEST(e->flags, ENTRY_DEFER_READ)) && + (storeLowestMemReaderOffset(e) == mem->inmem_hi) ) { + debug(20, 3) ("storeClientCopy3: %s - clearing ENTRY_DEFER_READ\n",e->mem_obj->url); + /* Clear the flag and re-poll the fd */ + EBIT_CLR(e->flags, ENTRY_DEFER_READ); +#if HAVE_EPOLL + if(mem->serverfd !=0) { + commResumeFD(mem->serverfd); + mem->serverfd=0; + } +#endif + } return; } /* Index: squid/src/store_swapout.c diff -u squid/src/store_swapout.c:1.11.2.10 squid/src/store_swapout.c:1.11.2.1.32.10 --- squid/src/store_swapout.c:1.11.2.10 Tue May 10 19:17:58 2005 +++ squid/src/store_swapout.c Wed May 18 00:20:35 2005 @@ -143,9 +143,25 @@ } if (new_mem_lo < mem->inmem_lo) new_mem_lo = mem->inmem_lo; - if (mem->inmem_lo != new_mem_lo) + if (mem->inmem_lo != new_mem_lo) { mem->inmem_lo = stmemFreeDataUpto(&mem->data_hdr, new_mem_lo); + /* If ENTRY_DEFER_READ is set, then the client side will continue to + flush until it has less than READ_AHEAD_GAP bytes in memory */ + if (EBIT_TEST(e->flags, ENTRY_DEFER_READ)) { + + if (mem->inmem_hi - mem->inmem_lo <= READ_AHEAD_GAP) { + EBIT_CLR(e->flags, ENTRY_DEFER_READ); +#if HAVE_EPOLL + if(mem->serverfd !=0) { + commResumeFD(mem->serverfd); + mem->serverfd=0; + } +#endif + } + } + } + return swapout_able; } Index: squid/src/structs.h diff -u squid/src/structs.h:1.48.2.46 squid/src/structs.h:1.48.2.34.2.8 --- squid/src/structs.h:1.48.2.46 Fri Mar 10 19:16:31 2006 +++ squid/src/structs.h Thu Mar 16 15:59:15 2006 @@ -789,6 +789,10 @@ squid_off_t bytes_read; squid_off_t bytes_written; int uses; /* ie # req's over persistent conn */ +#if HAVE_EPOLL + unsigned epoll_state; /* keep track of the epoll state */ + unsigned epoll_backoff; /* keep track of whether the fd is backed off */ +#endif struct _fde_disk { DWCB *wrt_handle; void *wrt_handle_data; @@ -1115,6 +1119,10 @@ char *buf; size_t offset; size_t size; +#if HAVE_EPOLL + int clientfd; /* Record the client's fd if we have too much + data waiting to send to the server */ +#endif } in; struct { squid_off_t size_left; /* How much body left to process */ @@ -1505,6 +1513,10 @@ mem_hdr data_hdr; squid_off_t inmem_hi; squid_off_t inmem_lo; +#if HAVE_EPOLL + int serverfd; /* Record the server's fd if we have too much + data waiting to send to the client */ +#endif dlink_list clients; int nclients; struct { @@ -1864,7 +1876,7 @@ int recvfroms; int sendtos; } sock; -#if HAVE_POLL +#if HAVE_POLL || HAVE_EPOLL int polls; #else int selects;