This patch is generated from the raid branch of tcp_async in squid Wed Sep 29 01:34:58 2004 GMT See http://devel.squid-cache.org/ Index: squid/acconfig.h diff -u squid/acconfig.h:1.4.6.1.2.1 squid/acconfig.h:1.4.6.1.2.1.2.1 --- squid/acconfig.h:1.4.6.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/acconfig.h Sun Dec 17 06:36:23 2000 @@ -286,10 +286,16 @@ /* Signal driven async network IO */ #undef TCP_ASYNC +/* Signal handling from signal handlers */ +#undef CIOCIOSAN_USE_SIGHANDLER + /* Periodically dump profile snapshots from running squid */ #undef PROFILE_SQUID #undef PROFIL_DELAY +/* Enable on-fly drive replacement */ +#undef DISK_REPL + /* * Define this if unlinkd is required * (strongly recommended for ufs storage type) Index: squid/configure.in diff -u squid/configure.in:1.7.6.1.2.1 squid/configure.in:1.7.6.1.2.1.2.1 --- squid/configure.in:1.7.6.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/configure.in Sun Dec 17 06:36:23 2000 @@ -754,6 +754,15 @@ fi ]) +dnl Enable signal handling from signal handlers +AC_ARG_ENABLE(ciociosan-handlers, +[ --enable-ciociosan-handlers Enable signal handling from signal handlers (requires ciociosan)], +[ if test "$enableval" = "yes" ; then + echo "Enabling signal handling from signal handlers" + AC_DEFINE(CIOCIOSAN_USE_SIGHANDLER) + fi +]) + dnl Enable gmon-based profiling AC_ARG_ENABLE(profiling, [ --enable-profiling Enable gmon-based profiling], @@ -784,6 +793,15 @@ echo "Profiling snapshot dumping interval is set to " $PROFILE_DELAY AC_DEFINE_UNQUOTED(PROFIL_DELAY,$PROFILE_DELAY) +dnl Enable on-fly drive replacement +AC_ARG_ENABLE(disk-repl, +[ --enable-disk-repl Enable on-fly drive replacement], +[ if test "$enableval" = "yes" ; then + echo "Enable on-fly drive replacement" + AC_DEFINE(DISK_REPL) + fi +]) + # Force some compilers to use ANSI features # case "$host" in Index: squid/doc/debug-sections.txt diff -u squid/doc/debug-sections.txt:1.2.22.1.2.1 squid/doc/debug-sections.txt:1.2.22.1.2.1.2.1 --- squid/doc/debug-sections.txt:1.2.22.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/doc/debug-sections.txt Sun Dec 17 06:36:23 2000 @@ -90,3 +90,4 @@ section 88 Storage Manager TREE Interface I/O Routines, async section 89 Storage Manager TREE Interface I/O Routines section 90 Signal-driven netword IO +section 91 RAID support Index: squid/src/Makefile.in diff -u squid/src/Makefile.in:1.3.22.1 squid/src/Makefile.in:1.3.22.1.2.2 --- squid/src/Makefile.in:1.3.22.1 Sun Dec 17 06:09:08 2000 +++ squid/src/Makefile.in Fri Dec 22 03:29:00 2000 @@ -101,6 +101,7 @@ comm.o \ comm_cio.o \ comm_select.o \ + raid.o \ debug.o \ @DELAY_OBJS@ \ disk.o \ Index: squid/src/abend.c diff -u /dev/null squid/src/abend.c:1.1.2.1 --- /dev/null Tue Sep 28 18:39:42 2004 +++ squid/src/abend.c Fri Dec 22 05:03:05 2000 @@ -0,0 +1,600 @@ +/* + * $Id$ + * + * DEBUG: section 92 Support for abnormal termination hanlding + * AUTHOR: Nikita Danilov + * COPYRIGHT: This file is Copyright 2000 by Hans Reiser + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * the Regents of the University of California. Please see the + * COPYRIGHT file for full details. Squid incorporates software + * developed and/or copyrighted by other sources. Please see the + * CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" + +#define HAS_BFD +#define USE_DLADDR_DLSYM_FOR_BACKTRACE + +#if defined( HAS_BFD ) +#include +#endif + +#if defined( USE_DLADDR_DLSYM_FOR_BACKTRACE ) +#include +#endif + +#if defined( __GNUC__ ) +#define USE_BUILTIN_RETURN_ADDRESS +#endif + +/** debugging facilities */ +#define ABEND_OUTPUT( format, args... ) \ + ( CURRENT_FUNCTION_NAME " (%s:%i): " format "\n", \ + __FILE__, __LINE__ , ##args ) + +#define DEBUG( format, args... ) debug( 92, 9 ) ABEND_OUTPUT( format, ##args ) +#define ERROR( format, args... ) debug( 92, 1 ) ABEND_OUTPUT( format, ##args ) +#define PRINT( format, args... ) debug( 92, 2 ) ( format, ##args ) + +/** initialises abend sub-system. Should be called from main directly + (that is, not from function called from main. */ +void abendInit( int argc, char **argv ); + +/** prints current stack trace to the debug( 92, 2 ). Tries to resolve + address to symbol name either through bfd interfaces or through + dlsym. If you see spurious `sigaction' in backtrace, read it as + __restore() --- function call inserted into stack by kernel when + vectoring control to the signal handler. Problem is that __restore + is not exported by libc. */ +void printBacktrace(); + +/** abnormal end. Prints backtrace them either dumps core into + specified directory (/tmp by default --- XXX Unixism) or tries to + attach debugger or _exits */ +void abend(); + +/** resolves address to the symbol name. + If exactAddress is not NULL, address of the beginning of the + symbol in question returned there. */ +const char *getNameByAddress( void *address, void **exactAddress ); + +/** resolves symbol name to the address */ +void *getAddressFor( char *name ); + +static void *getFrame( int n ); +static int isValidFrame( void *frame ); +static int isLastFrame( void *frame ); +static int initBfd( char *image ); +static int compare_by_address( const void *symb1, const void *symb2 ); +static int stackGrowth(); +static const char *getBfdNameByAddress( void *address, void **exactAddress ); +static const char *getDlNameByAddress( void *address, void **exactAddress ); + +#define IMAGE "/home/god/run/squidng/bin/squid" +#define DEBUGGER_COMMAND "/usr/bin/gdb " IMAGE " %2$li > /dev/pts/1 < /dev/pts/1 2> /dev/pts/1" + +#define START_MAGIC 0xacc01ade +#define END_MAGIC 0x1abe11ed +static struct data_s +{ + unsigned long startMagic; + void *symbolJustBeforeMain; + void *symbolJustAfterMain; + void *mainEntryAddress; + unsigned int dumpCore :1; + unsigned int attachDebugger :1; + unsigned int stackGrowsUp :1; + char coredumpDir[ MAXPATHLEN + 1 ]; + char debuggerCommand[ 1024 ]; + struct + { + int tableLoaded; + int wasBFDError; + int tableSize; + void **addresses; + char **names; + } bfd; + unsigned long endMagic; +} data; + +void abendInit( int argc, char **argv ) +{ + extern void funJustBeforeMain(); + extern void funJustAfterMain(); + + DEBUG( "Initializing abend" ); + data.startMagic = START_MAGIC; + data.symbolJustBeforeMain = NULL; + data.symbolJustAfterMain = NULL; + data.mainEntryAddress = NULL; + data.stackGrowsUp = stackGrowth(); + data.endMagic = END_MAGIC; +#if defined( USE_BUILTIN_RETURN_ADDRESS ) + data.mainEntryAddress = __builtin_return_address( 1 ); +#endif + data.symbolJustBeforeMain = funJustBeforeMain; + data.symbolJustAfterMain = funJustAfterMain; + snprintf( data.debuggerCommand, sizeof data.debuggerCommand, + DEBUGGER_COMMAND, argv[ 0 ], ( long ) getpid() ); + strcpy( data.coredumpDir, "/tmp" ); + data.dumpCore = 1; + data.attachDebugger = 0; + /* this is to avoid inlining by smart compilers */ + if( ( ( char * ) &data ) + 1 == NULL ) + { + DEBUG( "dying early" ); + /* take address */ + argv[ 0 ] = ( char * ) abendInit; + abendInit( argc - 1, argv ); + /* tail recursive function can be inlined? */ + DEBUG( "hehe" ); + } + data.bfd.tableLoaded = 0; + data.bfd.wasBFDError = 0; + initBfd( IMAGE ); +} + +void printBacktrace() +{ + int i; + void *frame; + + PRINT( "Stack backtrace:\n" ); + for( i = 0 ; isValidFrame( frame = getFrame( i ) ) ; ++i ) + { + void *baseAddress; + const char *name; + + name = getNameByAddress( frame, &baseAddress ); + if( name != NULL ) + { + PRINT( "%i: %p: %s %c 0x%x\n", i, frame, name, + ( baseAddress != NULL ) ? '+' : ' ', + ( baseAddress != NULL ) ? frame - baseAddress : 0 ); + } + if( isLastFrame( frame ) ) + { + break; + } + } + PRINT( "End backtrace.\n" ); +} + +void abend() +{ + int endOfEternity; + + if( ( data.startMagic == START_MAGIC ) && + ( data.endMagic == END_MAGIC ) ) + { + printBacktrace(); + if( data.dumpCore ) + { + struct rlimit infiniteCore; + + /* we dont' care about errors from following calls, because + what can we do anyway? */ + signal( SIGABRT, SIG_DFL ); + /* this is to lay down ghosts of Linux */ + getrlimit( RLIMIT_CORE, &infiniteCore ); + infiniteCore.rlim_cur = infiniteCore.rlim_max = RLIM_INFINITY; + setrlimit( RLIMIT_CORE, &infiniteCore ); + chdir( data.coredumpDir ); + kill( getpid(), SIGABRT ); + } + else if( data.attachDebugger ) + { + if( !fork() ) + { + system( data.debuggerCommand ); + _exit( 1 ); + } + } + else + { + _exit( 1 ); + } + } + else + { + /* someone danced fandango on our core */ + } + endOfEternity = 0; + while( !endOfEternity ) + {} + /* yes, we _can_ get here */ +} + +static void *getFrame( int n ) +{ +#if defined( USE_BUILTIN_RETURN_ADDRESS ) +#define GET_FRAME( n ) case n: return __builtin_return_address( n ) + + switch( n ) + { + GET_FRAME( 0 ); + GET_FRAME( 1 ); + GET_FRAME( 2 ); + GET_FRAME( 3 ); + GET_FRAME( 4 ); + GET_FRAME( 5 ); + GET_FRAME( 6 ); + GET_FRAME( 7 ); + GET_FRAME( 8 ); + GET_FRAME( 9 ); + GET_FRAME( 10 ); + GET_FRAME( 11 ); + GET_FRAME( 12 ); + GET_FRAME( 13 ); + GET_FRAME( 14 ); + GET_FRAME( 15 ); + GET_FRAME( 16 ); + GET_FRAME( 17 ); + GET_FRAME( 18 ); + GET_FRAME( 19 ); + GET_FRAME( 20 ); + GET_FRAME( 21 ); + GET_FRAME( 22 ); + GET_FRAME( 23 ); + GET_FRAME( 24 ); + GET_FRAME( 25 ); + GET_FRAME( 26 ); + GET_FRAME( 27 ); + GET_FRAME( 28 ); + GET_FRAME( 29 ); + GET_FRAME( 30 ); + GET_FRAME( 31 ); + GET_FRAME( 32 ); + default: + return NULL; + } +#undef GET_FRAME +#else + void *local; + + return ( &local )[ data.stackGrowsUp ? -n : +n ]; +#endif +} + +static int isValidFrame( void *frame ) +{ +#if defined( USE_BUILTIN_RETURN_ADDRESS ) + return frame != NULL; +#else + return 1; +#endif +} + +static int isLastFrame( void *frame ) +{ +#if defined( USE_BUILTIN_RETURN_ADDRESS ) + if( frame == NULL ) + { + return 1; + } + else if( frame == data.mainEntryAddress ) + { + return 1; + } + else +#else + if( ( data.symbolJustBeforeMain <= frame ) && + ( frame <= data.symbolJustAfterMain ) ) + { + return 1; + } + else +#endif + { + return 0; + } +} + +const char *getNameByAddress( void *address, void **exactAddress ) +{ + if( address != NULL ) + { + const char *result; + + result = getBfdNameByAddress( address, exactAddress ); + if( result == NULL ) + { + result = getDlNameByAddress( address, exactAddress ); + } + return result; + } + else + { + return "NIL"; + } +} + +static const char *getBfdNameByAddress( void *address, void **exactAddress ) +{ +#if defined( HAS_BFD ) + int low; + int high; + + low = 0; + high = data.bfd.tableSize - 1; + + if( data.bfd.addresses[ high ] <= address ) + { + return NULL; + } + if( data.bfd.addresses[ 0 ] > address ) + { + return NULL; + } + + /* binary serach */ + while( high - low > 1 ) + { + int median; + + median = ( high + low ) / 2; + if( data.bfd.addresses[ median ] > address ) + { + high = median; + } + else + { + low = median; + } + assert( high > low ); + } + assert( ( high == low + 1 ) && + ( data.bfd.addresses[ low ] <= address ) && + ( address < data.bfd.addresses[ high ] ) ); + if( exactAddress != NULL ) + { + *exactAddress = data.bfd.addresses[ low ]; + } + return data.bfd.names[ low ]; +#else + return NULL; +#endif +} + +static char dlSymbolName[ 1024 ]; + +static const char *getDlNameByAddress( void *address, void **exactAddress ) +{ +#if defined( USE_DLADDR_DLSYM_FOR_BACKTRACE ) + Dl_info symbolInfo; + + if( dladdr( address, &symbolInfo ) == 0 ) + { + return NULL; + } + else if( symbolInfo.dli_sname != NULL ) + { + if( exactAddress != NULL ) + { + *exactAddress = symbolInfo.dli_saddr; + } + snprintf( dlSymbolName, sizeof dlSymbolName, "(%s@%p) %s", + ( symbolInfo.dli_fname != NULL ) ? symbolInfo.dli_fname : "", + symbolInfo.dli_fbase, + symbolInfo.dli_sname ); + return dlSymbolName; + } +#endif + return NULL; +} + +void *getAddressFor( char *name ) +{ + void *result; + + result = NULL; +#if defined( USE_DLADDR_DLSYM_FOR_BACKTRACE ) + + result = dlsym( RTLD_DEFAULT, name ); +#endif +#if defined( HAS_BFD ) + if( result == NULL ) + { + int i; + + for( i = 0 ; i < data.bfd.tableSize ; ++i ) + { + if( !strcmp( name, data.bfd.names[ i ] ) ) + { + result = data.bfd.addresses[ i ]; + break; + } + } + } +#endif + return result; +} + +static int initBfd( char *image ) +{ +#if defined( HAS_BFD ) +#define BFD_ERROR( text ) bfd_perror( text ) ; ERROR( text ) + + bfd *abfd; + int storage_needed = 0; + asymbol **symbol_table = ( asymbol ** ) NULL; + int number_of_symbols; + int i; + int j; + + if( data.bfd.tableLoaded ) + { + return 1; + } + if( data.bfd.wasBFDError ) + { + return 0; + } + + bfd_init(); + if( bfd_get_error() != bfd_error_no_error ) + { + BFD_ERROR( "bfd_init()" ); + data.bfd.wasBFDError = 1; + return 0; + } + + abfd = bfd_openr( image, "default" ); + if( bfd_get_error() != bfd_error_no_error ) + { + BFD_ERROR( "bfd_fdopenr()" ); + data.bfd.wasBFDError = 1; + return 0; + } + if( ! bfd_check_format( abfd, bfd_object ) ) + { + BFD_ERROR( "bfd_check_format()" ); + data.bfd.wasBFDError = 1; + return 0; + } + + storage_needed = bfd_get_symtab_upper_bound( abfd ); + + if( storage_needed < 0 ) + { + BFD_ERROR( "bfd_get_symtab_upper_bound(): storage_needed < 0" ); + data.bfd.wasBFDError = 1; + return 0; + } + if( storage_needed == 0 ) + { + DEBUG( "no symbols" ); + data.bfd.wasBFDError = 1; + return 0; + } + symbol_table = xmalloc( sizeof( asymbol ) *storage_needed ); + + number_of_symbols = bfd_canonicalize_symtab( abfd, symbol_table ); + if( number_of_symbols < 0 ) + { + BFD_ERROR( "bfd_canonicalize_symtab()" ); + xfree( symbol_table ); + data.bfd.wasBFDError = 1; + return 0; + } + + qsort( symbol_table, number_of_symbols, sizeof symbol_table[ 0 ], + compare_by_address ); + + data.bfd.tableSize = 0; + for( i = 0 ; i < number_of_symbols ; ++i ) + { + if( bfd_asymbol_bfd( symbol_table[ i ] ) == ( bfd * ) NULL ) + { + continue; + } + /* unnamed sections and other garbage */ + if( symbol_table[ i ] -> name[ 0 ] == ( char ) 0 ) + { + continue; + } + ++data.bfd.tableSize; + } + + data.bfd.addresses = xmalloc( data.bfd.tableSize * sizeof( void * ) ); + data.bfd.names = xmalloc( data.bfd.tableSize * sizeof( char * ) ); + + DEBUG( "bfd tables with %i entries allocated", data.bfd.tableSize ); + + j = 0; + for( i = 0 ; i < number_of_symbols ; ++i ) + { + if( bfd_asymbol_bfd( symbol_table[ i ] ) == NULL ) + { + continue; + } + if( symbol_table[ i ] -> name[ 0 ] == ( char ) 0 ) + { + continue; + } + data.bfd.addresses[ j ] = + ( void * ) bfd_asymbol_value( symbol_table[ i ] ); + data.bfd.names[ j ] = xstrdup( symbol_table[ i ] -> name ); + ++j; + } + + xfree( symbol_table ); + if( ! bfd_close( abfd ) ) + { + BFD_ERROR( "bfd_close()" ); + data.bfd.wasBFDError = 1; + /* return 1 anyway */ + } + data.bfd.tableLoaded = 1; +#undef BFD_ERROR +#endif + return 1; +} + +static int compare_by_address( const void *symb1, const void *symb2 ) +{ +#if defined( HAS_BFD ) + return( bfd_asymbol_value( *( asymbol ** ) symb1 ) - + bfd_asymbol_value( *( asymbol ** ) symb2 ) ); +#else + return 1; +#endif +} + +static int stackGrowthCalled( int *localOfTheCaller ) +{ + int bar; + + return &bar > localOfTheCaller; +} + +static int stackGrowth() +{ + int foo; + return stackGrowthCalled( &foo ); +} + + +/* + * $Log$ + * Revision 1.1.2.1 2000/12/22 13:03:05 nikitadanilov + * abend.c contains functions to handle abnormal program termination. + * Useful exported functions: + * + * void printBacktrace(): prints stack backtrace of current + * thread. Tries to resolve addresses to symbol names using either + * BDF library interfaces of dlsym and friends. If compiler has no + * support for stack traversing, interprets stack as array of + * pointers and tries to resolve each. This will dump garbage into + * output: all automatic variables will be treated as return + * addresses. + * + * void abend(): prints backtrace and either dumps core, attaches debugger + * or _exits. Choice is hardcoded in abendInit() for now. If you want + * to attach debugger you should arrange its stdout and stdin yourself. + * + * + */ Index: squid/src/access_log.c diff -u squid/src/access_log.c:1.5 squid/src/access_log.c:1.5.18.1 --- squid/src/access_log.c:1.5 Tue Nov 14 05:03:47 2000 +++ squid/src/access_log.c Fri Dec 22 03:30:33 2000 @@ -328,6 +328,8 @@ accessLogInit(void) { assert(sizeof(log_tags) == (LOG_TYPE_MAX + 1) * sizeof(char *)); + if (strcasecmp (Config.Log.access, "none") == 0) + return; logfile = logfileOpen(Config.Log.access, MAX_URL << 1, 1); LogfileStatus = LOG_ENABLE; #if HEADERS_LOG Index: squid/src/cache_cf.c diff -u squid/src/cache_cf.c:1.6.4.1 squid/src/cache_cf.c:1.6.4.1.6.2 --- squid/src/cache_cf.c:1.6.4.1 Sun Dec 17 00:20:04 2000 +++ squid/src/cache_cf.c Fri Dec 22 03:33:28 2000 @@ -199,7 +199,7 @@ int i; ssize_t ms = -1; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSDINDEX(i) { if (Config.cacheSwap.swapDirs[i].max_objsize > ms) ms = Config.cacheSwap.swapDirs[i].max_objsize; } @@ -968,6 +968,9 @@ swap->swapDirs = NULL; swap->n_allocated = 0; swap->n_configured = 0; +#if defined( DISK_REPL ) + swap->n_offline = 0; +#endif } const char * Index: squid/src/cf.data.pre diff -u squid/src/cf.data.pre:1.5 squid/src/cf.data.pre:1.5.12.1 --- squid/src/cf.data.pre:1.5 Tue Dec 12 15:21:17 2000 +++ squid/src/cf.data.pre Sun Dec 17 06:36:23 2000 @@ -3306,5 +3306,40 @@ force fresh content. DOC_END +NAME: watch_failure +IFDEF: DISK_REPL +TYPE: wordlist +LOC: Config.disk_repl_watch_list +DEFAULT: none +DOC_START + List of the form + ... + when device_file fails, cache_dir goes offline. +DOC_END + +NAME: offline_notify_cmd +IFDEF: DISK_REPL +TYPE: string +LOC: Config.offline_notify_cmd +DEFAULT: none +DOC_START + Command to be executed when some cache_dir goes + offline. This command will be executed with tree parameters: + path to the cache_dir, cache_dir index and device file on which dir + is mounted. +DOC_END + +NAME: online_notify_cmd +IFDEF: DISK_REPL +TYPE: string +LOC: Config.online_notify_cmd +DEFAULT: none +DOC_START + Command to be executed when some cache_dir goes back + online. This command will be executed with tree parameters: + path to the cache_dir, cache_dir index and device file on which dir + is mounted. +DOC_END + EOF Index: squid/src/client_side.c diff -u squid/src/client_side.c:1.10.2.1.4.1 squid/src/client_side.c:1.10.2.1.4.1.2.1 --- squid/src/client_side.c:1.10.2.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/client_side.c Sun Dec 17 06:36:23 2000 @@ -2530,11 +2530,10 @@ int len = conn->in.size - conn->in.offset - 1; debug(33, 4) ("clientReadRequest: FD %d: reading request...\n", fd); statCounter.syscalls.sock.reads++; - size = read(fd, conn->in.buf + conn->in.offset, len); + size = comm_read_wrapper(fd, conn->in.buf + conn->in.offset, len); if (size > 0) { fd_bytes(fd, size, FD_READ); kb_incr(&statCounter.client_http.kbytes_in, size); - CLEAR_CAN_READ_IF_EMPTY( len, size, fd ); } /* * Don't reset the timeout value here. The timeout value will be @@ -2567,7 +2566,6 @@ comm_close(fd); return; } else { - CLEAR_CAN_READ( fd ); if (conn->in.offset == 0) { debug(50, 2) ("clientReadRequest: FD %d: no data to process (%s)\n", fd, xstrerror()); return; Index: squid/src/comm.c diff -u squid/src/comm.c:1.4.14.1.4.1 squid/src/comm.c:1.4.14.1.4.1.2.2 --- squid/src/comm.c:1.4.14.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/comm.c Fri Dec 22 03:39:08 2000 @@ -73,6 +73,7 @@ static int commResetFD(ConnectStateData * cs); static int commRetryConnect(ConnectStateData * cs); CBDATA_TYPE(ConnectStateData); +static SyscallCBInvoc syscallCallback[ SYSCALL_MAX ][ MAX_SYSCALLCB ]; static MemPool *comm_write_pool = NULL; static MemPool *conn_close_pool = NULL; @@ -832,13 +833,18 @@ fd_table[ fd ].flags.async_tcp = 1; fd_table[ fd ].flags.can_read = 1; fd_table[ fd ].flags.can_write = 1; + setLostIOBitmap( fd ); } } /* TCP_ASYNC */ #endif - +#if defined( DISK_REPL ) +/* for crash simulation */ +SimulateCrashData swatAButterfly; +/* DISK_REPL */ +#endif void comm_init(void) @@ -850,9 +856,23 @@ * Since Squid_MaxFD can be as high as several thousand, don't waste them */ RESERVED_FD = XMIN(100, Squid_MaxFD / 4); CBDATA_INIT_TYPE(ConnectStateData); -#ifdef TCP_ASYNC +#if TCP_ASYNC our_pid = getpid(); #endif + memset( syscallCallback, 0, sizeof syscallCallback ); +#if DISK_REPL + { + /* hard-coded */ + swatAButterfly.swapDirNo = 0; + /* 20 seconds, полет нормальный... */ + swatAButterfly.crashTime = time( NULL ) + 20; + +/* addReadHook( simulateCrash, &swatAButterfly ); */ +/* addWriteHook( simulateCrash, &swatAButterfly ); */ + } + raidInit(); +/* DISK_REPL */ +#endif comm_write_pool = memPoolCreate("CommWriteStateData", sizeof(CommWriteStateData)); conn_close_pool = memPoolCreate("close_handler", sizeof(close_handler)); } @@ -870,7 +890,7 @@ fd, (int) state->offset, state->size); nleft = state->size - state->offset; - len = write(fd, state->buf + state->offset, nleft); + len = comm_write_wrapper(fd, state->buf + state->offset, nleft); debug(5, 5) ("commHandleWrite: write() returns %d\n", len); fd_bytes(fd, len, FD_WRITE); statCounter.syscalls.sock.writes++; @@ -890,7 +910,6 @@ } else if (ignoreErrno(errno)) { debug(50, 10) ("commHandleWrite: FD %d: write failure: %s.\n", fd, xstrerror()); - CLEAR_CAN_WRITE( fd ); commSetSelect(fd, COMM_SELECT_WRITE, commHandleWrite, @@ -902,7 +921,6 @@ CommWriteStateCallbackAndFree(fd, COMM_ERROR); } } else { - CLEAR_CAN_WRITE_IF_FULL( nleft, len, fd ); /* A successful write, continue */ state->offset += len; if (state->offset < state->size) { @@ -954,6 +972,48 @@ comm_write(fd, mb.buf, mb.size, handler, handler_data, memBufFreeFunc(&mb)); } +/* wrapper around syscall */ +int comm_read_wrapper( int fd, char *buf, int size ) +{ + int ret; + + assert( buf != NULL ); + assert( size >= 0 ); + + ret = read( fd, buf, size ); +#if defined( TCP_ASYNC ) + if( ( ( ret == -1 ) && ignoreErrno( errno ) ) || + ( 0 && ( ret > 0 ) && ( ret < size ) ) ) + { + CLEAR_CAN_READ( fd ); + } +/* TCP_ASYNC */ +#endif + callReadHook( fd, &errno ); + return ret; +} + +/* wrapper around syscall */ +int comm_write_wrapper( int fd, char *buf, int size ) +{ + int ret; + + assert( buf != NULL ); + assert( size >= 0 ); + + ret = write( fd, buf, size ); +#if defined( TCP_ASYNC ) + if( ( ( ret == -1 ) && ignoreErrno( errno ) ) || + ( 0 && ( ret > 0 ) && ( ret < size ) ) ) + { + CLEAR_CAN_WRITE( fd ); + } +/* TCP_ASYNC */ +#endif + callWriteHook( fd, &errno ); + return ret; +} + /* * hm, this might be too general-purpose for all the places we'd * like to use it. @@ -1005,3 +1065,106 @@ } } } + +/* syscall callbacks */ +#define SCB_DEBUG( format, args... ) \ + debug( 5, 9 )( CURRENT_FUNCTION_NAME " (%s:%i): " format "\n", \ + __FILE__, __LINE__ , ##args ) + +#define SCB_TRACE SCB_DEBUG( "" ) + +int addSyscallCB( SyscallCBInvoc *hook, SyscallCallBack cb, void *data ) +{ + int i; + + assert( hook != NULL ); + + SCB_TRACE; + for( i = 0 ; i < MAX_SYSCALLCB ; ++i ) + { + if( hook[ i ].cb == NULL ) + { + hook[ i ].cb = cb; + hook[ i ].data = data; + return 1; + } + } + return 0; +} + +int delSyscallCB( SyscallCBInvoc *hook, SyscallCallBack cb ) +{ + int i; + + assert( hook != NULL ); + + SCB_TRACE; + for( i = 0 ; i < MAX_SYSCALLCB ; ++i ) + { + if( hook[ i ].cb == cb ) + { + hook[ i ].cb = NULL; + hook[ i ].data = NULL; + return 1; + } + } + return 0; +} + +int addReadHook( SyscallCallBack cb, void *data ) +{ + SCB_TRACE; + return addSyscallCB( syscallCallback[ SYSCALL_READ ], cb, data ); +} + +int addWriteHook( SyscallCallBack cb, void *data ) +{ + SCB_TRACE; + return addSyscallCB( syscallCallback[ SYSCALL_WRITE ], cb, data ); +} + +syscallcb_result callSyscallCBHook( SyscallCBInvoc *hook, + syscall_type call, + int fd, int *errNo ) +{ + assert( hook != NULL ); + + CHECK_FD( fd ); + SCB_TRACE; + for( ; hook -> cb != NULL ; ++hook ) + { + syscallcb_result result; + + result = ( hook -> cb )( call, fd, errNo, hook -> data ); + if( result == SYSCALLCB_ABORT ) + { + return SYSCALLCB_ABORT; + } + } + return SYSCALLCB_CONTINUE; +} + +syscallcb_result callReadHook( int fd, int *errNo ) +{ + SCB_TRACE; + CHECK_FD( fd ); + return callSyscallCBHook( syscallCallback[ SYSCALL_READ ], + SYSCALL_READ, fd, errNo ); +} + +syscallcb_result callWriteHook( int fd, int *errNo ) +{ + SCB_TRACE; + CHECK_FD( fd ); + return callSyscallCBHook( syscallCallback[ SYSCALL_WRITE ], + SYSCALL_WRITE, fd, errNo ); +} + +syscallcb_result echoSyscallCB( syscall_type call, + int fd, int *errNo, + void *data ) +{ + SCB_DEBUG( "call: %i fd: %i errno: %i data: %p", call, fd, *errNo, data ); + return SYSCALLCB_CONTINUE; +} + Index: squid/src/comm_cio.c diff -u squid/src/comm_cio.c:1.1.2.1 squid/src/comm_cio.c:1.1.2.1.2.2 --- squid/src/comm_cio.c:1.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/comm_cio.c Fri Dec 22 04:41:25 2000 @@ -136,9 +136,6 @@ #include -/** if we use ioctl in stead of sigtimedwait, ioctl_fd is descriptor - of reiserfs directory on which ioctl is issued. */ -static int ioctl_fd = -1; /* SIGRTMAX is not constant?! */ /** global array of per-signal-number callbacks */ static CioCallback callbacks[ _NSIG ]; @@ -153,6 +150,61 @@ handlers, taking deferred reads into account */ extern int cioStdCallback( siginfo_t *info ); +#if defined( __GNUC__ ) +#define BITMAP_TYPE long long +#define BITMAP_BIT 1LL +#else +#define BITMAP_TYPE long +#define BITMAP_BIT 1L +#endif + +static BITMAP_TYPE *lostIOBitmap; + +#define INDEX_IN_BITMAP( offset ) ( offset / sizeof lostIOBitmap[ 0 ] ) +#define OFFSET_IN_BYTE( offset ) ( offset % sizeof lostIOBitmap[ 0 ] ) + +#define MASK_IN_BITMAP( offset ) ( BITMAP_BIT << OFFSET_IN_BYTE( offset ) ) +#define SET_BIT_IN_BITMAP( offset ) \ + lostIOBitmap[ INDEX_IN_BITMAP( offset ) ] |= MASK_IN_BITMAP( offset ) + +#define CLN_BIT_IN_BITMAP( offset ) \ + lostIOBitmap[ INDEX_IN_BITMAP( offset ) ] &= ~MASK_IN_BITMAP( offset ) + + +#if defined( CIOCIOSAN_USE_SIGHANDLER ) + +#include +#include + +#define SIGINFOS_IN_QUEUE ( 1024 + 42 ) /* XXX just some arbitrary number */ +static volatile struct CioSiginfoQueue { + /* + XXX sig_atomic_t is only guaranteed to work correctly + on the uniprocessor. Hence I will use atomic_t in it stead. + */ + volatile atomic_t count; + volatile siginfo_t info[ SIGINFOS_IN_QUEUE ]; +} queue; + +static void cioSigHandler( int sigNum, siginfo_t *info, void *data ) +{ + if( atomic_read( &queue.count ) < SIGINFOS_IN_QUEUE ) + { + memcpy( ( void * )&queue.info[ atomic_read( &queue.count ) ], info, + sizeof *info ); + atomic_inc( ( atomic_t * )&queue.count ); + } +} + +#else + +/** if we use ioctl in stead of sigtimedwait, ioctl_fd is descriptor + of reiserfs directory on which ioctl is issued. */ +static int ioctl_fd = -1; + +/* CIOCIOSAN_USE_SIGHANDLER */ +#endif + /** call this to register your own callback. SIG_NUM will be blocked by this call. */ int commCioAddCB( int sig_num, CioCallback callback ) @@ -160,10 +212,21 @@ TRACE; if( callbacks[ sig_num ] == NULL ) { - callbacks[ sig_num ] = callback; +#if !defined( CIOCIOSAN_USE_SIGHANDLER ) sigaddset( &listenOn, sig_num ); sigprocmask( SIG_BLOCK, &listenOn, NULL ); - +#else + struct sigaction act; + + act.sa_sigaction = cioSigHandler; + act.sa_flags = SA_SIGINFO; + if( sigaction( sig_num, &act, NULL ) == -1 ) + { + DEBUG( "commCioAddCB: sigaction( %i ) fails: %s\n", + sig_num, xstrerror() ); + } +#endif + callbacks[ sig_num ] = callback; return 1; } else @@ -175,12 +238,37 @@ /** initialize Cio Cio San */ int initCommCio() { + /* prepare global data structures */ + memset( callbacks, 0, sizeof callbacks ); + sigemptyset( &listenOn ); + statCounter.syscalls.async.signals = 0; + statCounter.syscalls.async.read_sigs = 0; + statCounter.syscalls.async.write_sigs = 0; + statCounter.syscalls.async.loops = 0; + statCounter.syscalls.async.avg_ready_read = 0.0; + statCounter.syscalls.async.avg_ready_write = 0.0; + statCounter.syscalls.async.tail_reads = 0; + statCounter.syscalls.async.tail_writes = 0; + statCounter.syscalls.async.drain_reads = 0; + statCounter.syscalls.async.drain_writes = 0; + + lostIOBitmap = + xcalloc( SQUID_MAXFD / sizeof lostIOBitmap[ 0 ] / 8 + 1, + sizeof lostIOBitmap[ 0 ] ); + assert( lostIOBitmap != NULL ); + commCioAddCB( SIGCIOCIOSAN, cioStdCallback ); + return 1; +} + +int initCommCio2() +{ +#if !defined( CIOCIOSAN_USE_SIGHANDLER ) int i; /* scan all configured cache dirs to find butterfly. We need butterfly as it always works on top of a reiserfs directory and we need one reiserfs directory to use our magic ioctl. */ - for( i = 0 ; i < Config.cacheSwap.n_configured ; i++ ) + FOREACHSDINDEX( i ) { SwapDir *sd; @@ -198,34 +286,56 @@ ioctl_fd = open( "/cache", O_RDONLY ); } - if( ioctl_fd > 0 ) - { - /* prepare global data structures */ - memset( callbacks, 0, sizeof callbacks ); - sigemptyset( &listenOn ); - - statCounter.syscalls.async.signals = 0; - statCounter.syscalls.async.read_sigs = 0; - statCounter.syscalls.async.write_sigs = 0; - statCounter.syscalls.async.loops = 0; - statCounter.syscalls.async.avg_ready_read = 0.0; - statCounter.syscalls.async.avg_ready_write = 0.0; - statCounter.syscalls.async.tail_reads = 0; - statCounter.syscalls.async.tail_writes = 0; - statCounter.syscalls.async.drain_reads = 0; - statCounter.syscalls.async.drain_writes = 0; - - TRACE; - commCioAddCB( SIGCIOCIOSAN, cioStdCallback ); - return 1; - } - else + if( ioctl_fd < 0 ) { /* will use normal sigtimedwait */ DEBUG( "initCommCio: Keine butterfly cache_dir gefunden." " Cio Cio San will use sigtimedwait(2).\n" ); return 0; } +#else + DEBUG( "initCommCio: will use notification through signal handler\n" ); + + atomic_set( &queue.count, 0 ); + memset( ( void * ) queue.info, 0, sizeof queue.info ); + +/* CIOCIOSAN_USE_SIGHANDLER */ +#endif + return 1; +} + +static void processOneSiginfo( siginfo_t *info ) +{ + CHECK_FD( info -> si_fd ); + + if( callbacks[ info -> si_signo ] != NULL ) + { + if( fd_table[ info -> si_fd ].flags.async_tcp ) + { + TRACE0( "callback: fd: %i, signo: %i", + info -> si_fd, info -> si_signo ); + callbacks[ info -> si_signo ]( info ); + ++statCounter.syscalls.async.signals; + } + else if( !fd_table[ info -> si_fd ].flags.open ) + { + DEBUG + ( "commCioLoop: got signal: %i for already closed fd: %i\n", + info -> si_signo, info -> si_fd ); + } + else + { + DEBUG + ( "commCioLoop: got unexpected signal: %i for fd: %i\n", + info -> si_signo, info -> si_fd ); + } + } + else + { + DEBUG + ( "commCioLoop: got unexpected signal: %i\n", + info -> si_signo ); + } } /* main signal polling loop. Uses either ioctl( REISERFS_IOC_TIMEDWAIT ) @@ -233,11 +343,12 @@ signals received. Sets can_* flags in fds. */ int commCioLoop() { + int ranOnce; +#if !defined( CIOCIOSAN_USE_SIGHANDLER ) struct reiserfs_sigtimedwait_arg arg; siginfo_t siginfo[ SIGINFO_PER_CALL ]; - struct timespec timeout = {0,0}; + struct timespec timeout = { 0, 0 }; int numOfSigs; - int ranOnce; arg.uthese = &listenOn; arg.sigsetsize = 8; /* sizeof mask; */ @@ -262,37 +373,7 @@ ranOnce = 1; for( i = 0 ; i < numOfSigs ; ++i ) { - siginfo_t *info; - - info = &siginfo[ i ]; - if( callbacks[ info -> si_signo ] != NULL ) - { - if( fd_table[ info -> si_fd ].flags.async_tcp ) - { - TRACE0( "callback: fd: %i, signo: %i", - info -> si_fd, info -> si_signo ); - callbacks[ info -> si_signo ]( info ); - ++statCounter.syscalls.async.signals; - } - else if( !fd_table[ info -> si_fd ].flags.open ) - { - DEBUG - ( "commCioLoop: got signal: %i for already closed fd: %i\n", - info -> si_signo, info -> si_fd ); - } - else - { - DEBUG - ( "commCioLoop: got unexpected signal: %i for fd: %i\n", - info -> si_signo, info -> si_fd ); - } - } - else - { - DEBUG - ( "commCioLoop: got unexpected signal: %i\n", - info -> si_signo ); - } + processOneSiginfo( &siginfo[ i ] ); } } if( ranOnce ) @@ -315,6 +396,33 @@ return COMM_ERROR; } } +/* CIOCIOSAN_USE_SIGHANDLER */ +#else + ranOnce = 0; + if( atomic_read( &queue.count ) >= SIGINFOS_IN_QUEUE ) + { + debug( 90, 1 ) ( "commCioLoop: queue overrun.\n" ); + atomic_dec( ( atomic_t * ) &queue.count ); + } + else while( atomic_read( &queue.count ) > 0 ) + { + /* blammm, handler breaks in here, but it only increases count, ok */ + siginfo_t info; + ranOnce = 1; + /* blammm, handler breaks in during memcpy, ok */ + assert( ( atomic_read( &queue.count ) > 0 ) && + ( atomic_read( &queue.count ) < SIGINFOS_IN_QUEUE ) ); + memcpy + ( &info, ( void * ) &queue.info[ atomic_read( &queue.count ) - 1 ], + sizeof info ); + /* blammm, handler breaks in here, increases count, ok */ + atomic_dec( ( atomic_t * ) &queue.count ); + /* blammm, handler breaks in here, increases count, ok */ + processOneSiginfo( &info ); + } + return ranOnce ? COMM_OK : COMM_TIMEOUT; +/* CIOCIOSAN_USE_SIGHANDLER */ +#endif } /* taken from comm_select.c */ @@ -322,6 +430,8 @@ { fde *F = &fd_table[fd]; + CHECK_FD( fd ); + if( F->defer_check == NULL ) { return 0; @@ -336,10 +446,12 @@ ,,special processing`` means we have to call callbacks with special parameters (ask someone else why) */ -static int fdIsHttp( int fd ) +static inline int fdIsHttp( int fd ) { int j; + CHECK_FD( fd ); + for( j = 0 ; j < NHttpSockets ; j++ ) { if( fd == HttpSockets[ j ] ) @@ -350,31 +462,63 @@ return 0; } -static int fdIsIcp( int fd ) +static inline int fdIsIcp( int fd ) { - if( fd == theInIcpConnection ) - { - return 1; - } - if( fd == theOutIcpConnection ) - { - return 1; - } - return 0; + CHECK_FD( fd ); + + return( ( fd == theInIcpConnection ) || ( fd == theOutIcpConnection ) ); +} + +static inline int fdIsDns( int fd ) +{ + CHECK_FD( fd ); + + return( fd == DnsSocket ); +} + +static int inline isDatalessSocket( int fd ) +{ + CHECK_FD( fd ); + + return( fdIsHttp( fd ) || fdIsIcp( fd ) || fdIsDns( fd ) ); } -static int fdIsDns( int fd ) +static inline void cioProcessReadFD( int index ) { - if( fd == DnsSocket ) + fde *F; + PF *handler; + int dummy; + + CHECK_FD( index ); + assert( fd_table[ index ].flags.can_read ); + + F = &fd_table[ index ]; + DEBUG( "cioProcessReadFD: FD %d lost read", index ); + if( !commCioDeferRead( index ) && + ( ( handler = F -> read_handler ) != NULL ) ) { - return 1; + F -> read_handler = NULL; + handler( index, isDatalessSocket( index ) ? &dummy : F -> read_data ); } - return 0; } -int isDatalessSocket( int fd ) +static inline void cioProcessWriteFD( int index ) { - return( fdIsHttp( fd ) || fdIsIcp( fd ) || fdIsDns( fd ) ); + fde *F; + PF *handler; + int dummy; + + CHECK_FD( index ); + assert( fd_table[ index ].flags.can_write ); + + F = &fd_table[ index ]; + + DEBUG( "cioProcessWriteFD: FD %d lost write", index ); + if( ( handler = F -> write_handler ) != NULL ) + { + F -> write_handler = NULL; + handler( index, isDatalessSocket( index ) ? &dummy : F -> write_data ); + } } /* see description in declaration */ @@ -387,21 +531,25 @@ fd = info -> si_fd; f = &fd_table[ fd ]; + CHECK_FD( fd ); assert( fd >= 0 ); assert( f -> flags.open ); + SET_BIT_IN_BITMAP( fd ); result = 0; if( info -> si_band & ( POLLRDNORM | POLLIN | POLLHUP | POLLERR ) ) { /* non-blocking read is possible */ fd_table[ fd ].flags.can_read = 1; ++statCounter.syscalls.async.read_sigs; + cioProcessReadFD( fd ); } if( info -> si_band & ( POLLWRNORM | POLLOUT | POLLHUP | POLLERR ) ) { /* non-blocking write is possible */ fd_table[ fd ].flags.can_write = 1; ++statCounter.syscalls.async.write_sigs; + cioProcessWriteFD( fd ); } /* Just sanity check. */ if( ( !( info -> si_band & @@ -449,11 +597,19 @@ for( i = 0 ; i <= Biggest_FD ; i++ ) { fde *F; - int dummy; - PF *handler; - int incoming; + + /* run through bitmap */ + for( ; + lostIOBitmap[ INDEX_IN_BITMAP( i ) ] == 0 ; + i += sizeof lostIOBitmap[ 0 ] ) + {} + if( i > Biggest_FD ) + { + break; + } F = &fd_table[ i ]; + CHECK_FD( i ); /* skip internal descriptors */ @@ -467,28 +623,15 @@ { /* is this is socket listening for incoming HTTP connections or DNS socket etc.? */ - incoming = -1; - if( F -> flags.can_read && ( handler = F -> read_handler ) ) + if( F -> flags.can_read ) { ++readReady; - incoming = isDatalessSocket( i ); - DEBUG( "cioCatchLostIO: FD %d lost read", i ); - if( !commCioDeferRead( i ) ) - { - F -> read_handler = NULL; - handler( i, incoming ? &dummy : F -> read_data ); - } + cioProcessReadFD( i ); } - if( F -> flags.can_write && ( handler = F -> write_handler ) ) + if( F -> flags.can_write ) { ++writeReady; - if( incoming == -1 ) - { - incoming = isDatalessSocket( i ); - } - DEBUG( "cioCatchLostIO: FD %d lost write", i ); - F -> write_handler = NULL; - handler( i, incoming ? &dummy : F -> write_data ); + cioProcessWriteFD( i ); } } } @@ -505,6 +648,24 @@ return 1; } +void clearLostIOBitmap( int fd ) +{ + assert( lostIOBitmap != NULL ); + CLN_BIT_IN_BITMAP( fd ); +} + +void setLostIOBitmap( int fd ) +{ + assert( lostIOBitmap != NULL ); + debug( 91, 9 )( "setLostIOBitmap: fd: %i, index: %i, " + "offset: %i, mask: %x, bitmap: %p\n", + ( int )fd, + ( int ) INDEX_IN_BITMAP( fd ), + ( int ) OFFSET_IN_BYTE( fd ), + ( int ) MASK_IN_BITMAP( fd ), lostIOBitmap ); + SET_BIT_IN_BITMAP( fd ); +} + /* TCP_ASYNC */ #endif @@ -515,6 +676,22 @@ /* * $Log$ + * Revision 1.1.2.1.2.2 2000/12/22 12:41:25 nikitadanilov + * Ciociosan initialization separated into two pieces. + * Checks for fd and bitmap validness inserted. + * Nasty bug with lostIOBitmap allocation corrected: + * lostIOBitmap was allocated with enough sizeof (long long) chunks to + * accomodate for Biggest_FD files. Under light load this worked ok, but started + * to crash in random places as number of active fds grew. Fortunately, some + * glibc list (tzstring_list) was allocated right after lostIOBitmap. This list + * is traversed on each call to localtime(), in particular from each _db_print. + * This allowed find error rather quickly. As lostIOBitmap have to be allocated + * rather early, one cannot relay on Squid_MaxFD to be correct and SQUID_MAXFD is + * used in stead. + * + * Revision 1.1.2.1.2.1 2000/12/17 14:36:23 hno + * Imported raid from squidng + * * Revision 1.1.2.1 2000/12/17 14:09:08 hno * Imported tcp_async from squidng * Index: squid/src/debug.c diff -u squid/src/debug.c:1.3.22.1 squid/src/debug.c:1.3.22.1.2.1 --- squid/src/debug.c:1.3.22.1 Sun Dec 17 06:09:08 2000 +++ squid/src/debug.c Fri Dec 22 04:43:01 2000 @@ -244,7 +244,7 @@ { debug(0, 0) ("assertion failed: %s:%d: \"%s\"\n", file, line, msg); if (!shutting_down) - abort(); + abend(); } /* Index: squid/src/defines.h diff -u squid/src/defines.h:1.3.16.1.2.1 squid/src/defines.h:1.3.16.1.2.1.4.2 --- squid/src/defines.h:1.3.16.1.2.1 Sun Dec 17 05:55:39 2000 +++ squid/src/defines.h Fri Dec 22 04:44:42 2000 @@ -287,3 +287,95 @@ #define CBDATA_TYPE(type) static cbdata_type CBDATA_##type = 0 #define CBDATA_INIT_TYPE(type) (CBDATA_##type = cbdataAddType(CBDATA_##type, #type, sizeof(type))) +/* cio cio san */ +#if defined( TCP_ASYNC ) + +/* It's Butterfly's real name */ +#define SIGCIOCIOSAN ( SIGRTMIN + 8 ) + +#define SIGINFO_PER_CALL 128 + +#if defined( __GNUC__ ) +#define CURRENT_FUNCTION_NAME __PRETTY_FUNCTION__ +#else +#define CURRENT_FUNCTION_NAME __FUNC__ +#endif + +#define CLEAR_CAN_READ_INTERNAL( fd ) \ + debug( 90, 9 )( CURRENT_FUNCTION_NAME ": cleared can_read.\n" ); \ + fd_table[ fd ].flags.can_read = 0; \ + if( !fd_table[ fd ].flags.can_write ) clearLostIOBitmap( fd ) + +#define CLEAR_CAN_WRITE_INTERNAL( fd ) \ + debug( 90, 9 )( CURRENT_FUNCTION_NAME ": cleared can_write.\n" ); \ + fd_table[ fd ].flags.can_write = 0; \ + if( !fd_table[ fd ].flags.can_read ) clearLostIOBitmap( fd ) + +#define CLEAR_CAN_READ( fd ) do { \ + CLEAR_CAN_READ_INTERNAL( fd ); \ + ++statCounter.syscalls.async.tail_reads; } while( 0 ) + +#define CLEAR_CAN_WRITE( fd ) do { \ + CLEAR_CAN_WRITE_INTERNAL( fd ); \ + ++statCounter.syscalls.async.tail_writes; } while( 0 ) + +#define CLEAR_CAN_READ_IF_EMPTY( requestedSize, actuallyRead, fd ) \ + do \ + { \ + if( ( actuallyRead > 0 ) && ( actuallyRead < requestedSize ) ) \ + { \ + /* CLEAR_CAN_READ_INTERNAL( fd ); */ \ + ++statCounter.syscalls.async.drain_reads; \ + } \ + } \ + while( 0 ) + +#define CLEAR_CAN_WRITE_IF_FULL( requestedSize, actuallyWritten, fd ) \ + do \ + { \ + if( ( actuallyWritten > 0 ) && ( actuallyWritten < requestedSize ) ) \ + { \ + /* CLEAR_CAN_WRITE_INTERNAL( fd ); */ \ + ++statCounter.syscalls.async.drain_writes; \ + } \ + } \ + while( 0 ) + +/* !TCP_ASYNC */ +#else + +#define CLEAR_CAN_READ( fd ) do {} while( 0 ) +#define CLEAR_CAN_WRITE( fd ) do {} while( 0 ) +#define CLEAR_CAN_READ_IF_EMPTY( requestedSize, actuallyRead, fd ) \ + do {} while( 0 ) +#define CLEAR_CAN_WRITE_IF_FULL( requestedSize, actuallyWritten, fd ) \ + do {} while( 0 ) + +/* TCP_ASYNC */ +#endif + +#define CHECK_FD( fd ) assert( ( 0 <= fd ) && ( fd < Squid_MaxFD ) ) + + +#if defined( DISK_REPL ) + +/* errno returned if drive dies */ +#define DISK_DIES_ERRNO EIO /* ENXIO */ +/* how many DISK_DIES_ERRNOs tolerate before declaring drive dead */ +#define ERRORS_THRESHOLD 2 + +#define RAID_CTRL_SIG SIGRTMIN + 9 + +/* RAID_REPL */ +#endif + +/* how many global syscall callbacks can be registered */ +#define MAX_SYSCALLCB 128 + +#define FOREACHSDINDEX( var ) \ + for( var = 0 ; var < Config.cacheSwap.n_configured ; ++var ) + +#define FOREACHSD( var ) \ + for( var = INDEXSD( 0 ) ; \ + var < INDEXSD( Config.cacheSwap.n_configured ) ; ++var ) + Index: squid/src/disk.c diff -u squid/src/disk.c:1.4.20.1 squid/src/disk.c:1.4.20.1.2.1 --- squid/src/disk.c:1.4.20.1 Sun Dec 17 06:09:08 2000 +++ squid/src/disk.c Sun Dec 17 06:36:23 2000 @@ -71,7 +71,7 @@ xstrerror()); fd = DISK_ERROR; } else { - debug(6, 5) ("file_open: FD %d\n", fd); + debug(91, 9) ("file_open: FD %d (%s)\n", fd, path); commSetCloseOnExec(fd); fd_open(fd, FD_FILE, path); } @@ -192,7 +192,7 @@ errno = 0; if (fdd->write_q->file_offset != -1) lseek(fd, fdd->write_q->file_offset, SEEK_SET); - len = write(fd, + len = comm_write_wrapper(fd, fdd->write_q->buf + fdd->write_q->buf_offset, fdd->write_q->len - fdd->write_q->buf_offset); debug(6, 3) ("diskHandleWrite: FD %d len = %d\n", fd, len); @@ -233,17 +233,8 @@ } } while ((q = fdd->write_q)); } - else - { - CLEAR_CAN_WRITE( fd ); - } len = 0; } - else - { - CLEAR_CAN_WRITE_IF_FULL( fdd->write_q->len - fdd->write_q->buf_offset, - len, fd ); - } if (q != NULL) { /* q might become NULL from write failure above */ q->buf_offset += len; Index: squid/src/enums.h diff -u squid/src/enums.h:1.6.4.1.2.1 squid/src/enums.h:1.6.4.1.2.1.4.1 --- squid/src/enums.h:1.6.4.1.2.1 Sun Dec 17 05:55:39 2000 +++ squid/src/enums.h Sun Dec 17 06:36:24 2000 @@ -705,3 +705,15 @@ CBDATA_storeIOState, CBDATA_FIRST_CUSTOM_TYPE } cbdata_type; + +typedef enum { + SYSCALLCB_CONTINUE, + SYSCALLCB_ABORT +} syscallcb_result; + +typedef enum { + SYSCALL_READ = 0, + SYSCALL_WRITE, + SYSCALL_MAX +} syscall_type; + Index: squid/src/fd.c diff -u squid/src/fd.c:1.4.10.1 squid/src/fd.c:1.4.10.1.2.2 --- squid/src/fd.c:1.4.10.1 Sun Dec 17 06:09:08 2000 +++ squid/src/fd.c Fri Dec 22 04:45:44 2000 @@ -88,14 +88,18 @@ commUpdateWriteBits(fd, NULL); memset(F, '\0', sizeof(fde)); F->timeout = 0; +#if defined( DISK_REPL ) + F->swap_dir = -1; +#endif + CLEAR_CAN_READ( fd ); + CLEAR_CAN_WRITE( fd ); } void fd_open(int fd, unsigned int type, const char *desc) { - fde *F; + fde *F = &fd_table[fd]; assert(fd >= 0); - F = &fd_table[fd]; if (F->flags.open) { debug(51, 1) ("WARNING: Closing open FD %4d\n", fd); fd_close(fd); @@ -108,6 +112,9 @@ if (desc) xstrncpy(F->desc, desc, FD_DESC_SZ); Number_FD++; +#if defined( DISK_REPL ) + F->swap_dir = -1; +#endif } void Index: squid/src/ftp.c diff -u squid/src/ftp.c:1.5.10.1.4.1 squid/src/ftp.c:1.5.10.1.4.1.2.1 --- squid/src/ftp.c:1.5.10.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/ftp.c Sun Dec 17 06:36:24 2000 @@ -879,7 +879,7 @@ #endif memset(ftpState->data.buf + ftpState->data.offset, '\0', read_sz); statCounter.syscalls.sock.reads++; - len = read(fd, ftpState->data.buf + ftpState->data.offset, read_sz); + len = comm_read_wrapper(fd, ftpState->data.buf + ftpState->data.offset, read_sz); if (len > 0) { fd_bytes(fd, len, FD_READ); #if DELAY_POOLS @@ -888,7 +888,6 @@ kb_incr(&statCounter.server.all.kbytes_in, len); kb_incr(&statCounter.server.ftp.kbytes_in, len); ftpState->data.offset += len; - CLEAR_CAN_READ_IF_EMPTY( read_sz, len, fd ); } debug(9, 5) ("ftpDataRead: FD %d, Read %d bytes\n", fd, len); if (len > 0) { @@ -903,7 +902,6 @@ if (len < 0) { debug(50, ignoreErrno(errno) ? 3 : 1) ("ftpDataRead: read error: %s\n", xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); commSetSelect(fd, COMM_SELECT_READ, ftpDataRead, @@ -1251,21 +1249,18 @@ } assert(ftpState->ctrl.offset < ftpState->ctrl.size); statCounter.syscalls.sock.reads++; - len = read(fd, + len = comm_read_wrapper(fd, ftpState->ctrl.buf + ftpState->ctrl.offset, ftpState->ctrl.size - ftpState->ctrl.offset); if (len > 0) { fd_bytes(fd, len, FD_READ); kb_incr(&statCounter.server.all.kbytes_in, len); kb_incr(&statCounter.server.ftp.kbytes_in, len); - CLEAR_CAN_READ_IF_EMPTY( ftpState->ctrl.size - ftpState->ctrl.offset, - len, fd ); } debug(9, 5) ("ftpReadControlReply: FD %d, Read %d bytes\n", fd, len); if (len < 0) { debug(50, ignoreErrno(errno) ? 3 : 1) ("ftpReadControlReply: read error: %s\n", xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); ftpScheduleReadControlReply(ftpState, 0); } else { ftpFailed(ftpState, ERR_READ_ERROR); Index: squid/src/gopher.c diff -u squid/src/gopher.c:1.5.4.1.4.1 squid/src/gopher.c:1.5.4.1.4.1.2.1 --- squid/src/gopher.c:1.5.4.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/gopher.c Sun Dec 17 06:36:24 2000 @@ -612,7 +612,7 @@ #endif /* leave one space for \0 in gopherToHTML */ statCounter.syscalls.sock.reads++; - len = read(fd, buf, read_sz); + len = comm_read_wrapper(fd, buf, read_sz); if (len > 0) { fd_bytes(fd, len, FD_READ); #if DELAY_POOLS @@ -620,7 +620,6 @@ #endif kb_incr(&statCounter.server.all.kbytes_in, len); kb_incr(&statCounter.server.other.kbytes_in, len); - CLEAR_CAN_READ_IF_EMPTY( read_sz, len, fd ); commSetTimeout(fd, Config.Timeout.read, NULL, NULL); IOStats.Gopher.reads++; for (clen = len - 1, bin = 0; clen; bin++) @@ -631,7 +630,6 @@ if (len < 0) { debug(50, 1) ("gopherReadReply: error reading: %s\n", xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); commSetSelect(fd, COMM_SELECT_READ, gopherReadReply, data, 0); } else if (entry->mem_obj->inmem_hi == 0) { ErrorState *err; Index: squid/src/http.c diff -u squid/src/http.c:1.5.10.1.4.1 squid/src/http.c:1.5.10.1.4.1.2.1 --- squid/src/http.c:1.5.10.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/http.c Sun Dec 17 06:36:24 2000 @@ -483,7 +483,7 @@ read_sz = delayBytesWanted(delay_id, 1, read_sz); #endif statCounter.syscalls.sock.reads++; - len = read(fd, buf, read_sz); + len = comm_read_wrapper(fd, buf, read_sz); debug(11, 5) ("httpReadReply: FD %d: len %d.\n", fd, len); if (len > 0) { fd_bytes(fd, len, FD_READ); @@ -497,7 +497,6 @@ for (clen = len - 1, bin = 0; clen; bin++) clen >>= 1; IOStats.Http.read_hist[bin]++; - CLEAR_CAN_READ_IF_EMPTY( read_sz, len, fd ); } if (!httpState->reply_hdr && len > 0) { /* Skip whitespace */ @@ -513,7 +512,6 @@ debug(50, 2) ("httpReadReply: FD %d: read failure: %s.\n", fd, xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); commSetSelect(fd, COMM_SELECT_READ, httpReadReply, httpState, 0); } else if (entry->mem_obj->inmem_hi == 0) { ErrorState *err; Index: squid/src/main.c diff -u squid/src/main.c:1.6.4.1.4.1 squid/src/main.c:1.6.4.1.4.1.2.2 --- squid/src/main.c:1.6.4.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/main.c Fri Dec 22 04:47:53 2000 @@ -493,6 +493,10 @@ refererOpenLog(); httpHeaderInitModule(); /* must go before any header processing (e.g. the one in errorInitialize) */ httpReplyInitModule(); /* must go before accepting replies */ +#if defined( TCP_ASYNC ) + /* must go before error text loading */ + initCommCio(); +#endif errorInitialize(); accessLogInit(); #if USE_IDENT @@ -564,12 +568,17 @@ } #if defined( TCP_ASYNC ) - initCommCio(); + initCommCio2(); +#endif +#if defined( DISK_REPL ) + raidInitListener(); #endif - configured_once = 1; } +void funJustBeforeMain() +{} + int main(int argc, char **argv) { @@ -622,6 +631,8 @@ mainParseOptions(argc, argv); + abendInit( argc, argv ); + /* parse configuration file * note: in "normal" case this used to be called from mainInitialize() */ { @@ -722,7 +733,7 @@ if ((loop_delay = eventNextTime()) < 0) loop_delay = 0; #if defined( TCP_ASYNC ) - /* cannot remove poll/select loops at this time, bacause disk IO + /* cannot remove poll/select loops at this time, because disk IO doesn't use signals yet */ switch( commCioLoop() ) { case COMM_OK: @@ -751,10 +762,11 @@ /* PROFILE_SQUID */ #endif #if HAVE_POLL - switch (comm_poll(loop_delay)) { + switch (comm_poll(loop_delay)) #else - switch (comm_select(loop_delay)) { + switch (comm_select(loop_delay)) #endif + { case COMM_OK: errcount = 0; /* reset if successful */ break; @@ -778,6 +790,9 @@ return 0; } +void funJustAfterMain() +{} + static void sendSignal(void) { Index: squid/src/pinger.c diff -u squid/src/pinger.c:1.3 squid/src/pinger.c:1.3.24.1 --- squid/src/pinger.c:1.3 Mon Oct 23 08:04:21 2000 +++ squid/src/pinger.c Fri Dec 22 04:49:29 2000 @@ -352,6 +352,9 @@ } +void funJustBeforeMain() +{} + int main(int argc, char *argv[]) { @@ -409,4 +412,9 @@ fprintf(stderr, "%s: ICMP support not compiled in.\n", argv[0]); return 1; } + #endif /* USE_ICMP */ + +void funJustAfterMain() +{} + Index: squid/src/protos.h diff -u squid/src/protos.h:1.6.4.1.2.1.2.1 squid/src/protos.h:1.6.4.1.2.1.2.1.2.2 --- squid/src/protos.h:1.6.4.1.2.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/protos.h Fri Dec 22 04:50:18 2000 @@ -149,6 +149,8 @@ CWCB * handler, void *handler_data, FREE *); +extern int comm_read_wrapper( int fd, char *buf, int size ); +extern int comm_write_wrapper( int fd, char *buf, int size ); extern void comm_write_mbuf(int fd, MemBuf mb, CWCB * handler, void *handler_data); extern void commCallCloseHandlers(int fd); extern int commSetTimeout(int fd, int, PF *, void *); @@ -156,6 +158,34 @@ extern int ignoreErrno(int); extern void commCloseAllSockets(void); +extern int addSyscallCB( SyscallCBInvoc *hook, SyscallCallBack cb, void *data ); +extern int delSyscallCB( SyscallCBInvoc *hook, SyscallCallBack cb ); +extern int addReadHook( SyscallCallBack cb, void *data ); +extern int addWriteHook( SyscallCallBack cb, void *data ); +extern syscallcb_result callSyscallCBHook( SyscallCBInvoc *hook, + syscall_type call, + int fd, int *errNo ); +extern syscallcb_result callReadHook( int fd, int *errNo ); +extern syscallcb_result callWriteHook( int fd, int *errNo ); +extern syscallcb_result echoSyscallCB( syscall_type call, + int fd, int *errNo, + void *data ); + +/* raid.c */ +#if defined( DISK_REPL ) +extern syscallcb_result watchDiskDies( syscall_type call, + int fd, int *errNo, + void *data ); +extern int offlineSwapDir( int index ); +extern int onlineSwapDir( int index ); +syscallcb_result simulateCrash( syscall_type call, + int fd, int *errNo, void *data ); +int findFirstOnlineSD(); +int raidInit(); +void raidInitListener(); +/* DISK_REPL */ +#endif + /* * comm_select.c @@ -1229,6 +1259,9 @@ extern int commCioLoop(); extern int cioCatchLostIO(); extern int initCommCio(); +extern int initCommCio2(); +extern void clearLostIOBitmap( int fd ); +extern void setLostIOBitmap( int fd ); #endif extern int checkAsyncFlag( int fd ); /* @@ -1249,3 +1282,31 @@ #if URL_CHECKSUM_DEBUG extern unsigned int url_checksum(const char *url); #endif + +/* + * abend.c + */ +/** initialises abend sub-system. Should be called from main directly + (that is, not from function called from main. */ +void abendInit( int argc, char **argv ); + +/** prints current stack trace to the debug( 92, 2 ). Tries to resolve + address to symbol name either through bfd interfaces or through + dlsym. If you see spurious `sigaction' in backtrace, read it as + __restore() --- function call inserted into stack by kernel when + vectoring control to the signal handler. Problem is that __restore + is not exported by libc. */ +void printBacktrace(); + +/** abnormal end. Prints backtrace them either dumps core into + specified directory (/tmp by default --- XXX Unixism) or tries to + attach debugger or _exits */ +void abend(); + +/** resolves address to the symbol name. + If exactAddress is not NULL, address of the beginning of the + symbol in question returned there. */ +const char *getNameByAddress( void *address, void **exactAddress ); + +/** resolves symbol name to the address */ +void *getAddressFor( char *name ); Index: squid/src/pump.c diff -u squid/src/pump.c:1.4.14.1.4.1 squid/src/pump.c:1.4.14.1.4.1.2.1 --- squid/src/pump.c:1.4.14.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/pump.c Sun Dec 17 06:36:24 2000 @@ -230,17 +230,15 @@ int len = 0; errno = 0; statCounter.syscalls.sock.reads++; - len = read(fd, buf, bytes_to_read); + len = comm_read_wrapper(fd, buf, bytes_to_read); fd_bytes(fd, len, FD_READ); debug(61, 5) ("pumpReadFromClient: FD %d: len %d.\n", fd, len); if (len > 0) { (void) 0; /* continue */ - CLEAR_CAN_READ_IF_EMPTY( bytes_to_read, len, fd ); } else if (len < 0) { debug(61, 2) ("pumpReadFromClient: FD %d: read failure: %s.\n", fd, xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); debug(61, 5) ("pumpReadFromClient: FD %d: len %d and ignore!\n", fd, len); commSetSelect(fd, Index: squid/src/raid.c diff -u /dev/null squid/src/raid.c:1.1.2.2 --- /dev/null Tue Sep 28 18:39:43 2004 +++ squid/src/raid.c Fri Dec 22 04:52:24 2000 @@ -0,0 +1,660 @@ + +/* + * $Id$ + * + * DEBUG: section 91 RAID support + * AUTHOR: Nikita Danilov + * COPYRIGHT: This file is Copyright 2000 by Hans Reiser + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by the + * Regents of the University of California. Please see the COPYRIGHT + * file for full details. Squid incorporates software developed + * and/or copyrighted by other sources such as this file. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * */ + +/* + + This is main part of squid sub-system through which one can + dynamically switch squid cache_dirs offline and back online. + + The main and initial reason for this was support for the disk + hot-replacement (when supported by hardware), but this can be used + for any purpose. + + OUTLINE: + + (*) kernel or user notifies squid about cache directory state + change. This is done through signals. Such signal should came with + siginfo structure. Hence you cannot use kill. Simple utility ss (for + Send-signal, not Schutzstaffel) is provided for this purpose. + + (*) from the value in siginfo struct, squid extracts number of + cache_dir (index in Config.cacheSwap.swapDirs) and direction of + status change (00 --- goes offline, 01 --- goes online). + + (*) squid marks cache_dir offline, updates total number of online + caches and calls back-end specific callback (through function + pointer in _SwapDir.offline, _SwapDir.online). If all caches are + offline, squid dies miserably (this is not logically necessary: we + can in stead just serve all requests from network, but this will + require changes all over code, I guess). + + (*) squid executes command, specified by new config option: + offline_notify_cmd or online_notify_cmd. This command is supplied + with 3 arguments: path of the cache_dir in question, its index and + device where it's mounted (as indicated by watch_failure + option). Reasonable command may send message to the operator and + umount drive. + + (*) from this moment on, store dir selection functions + (storeDirSelectSwapDirLeastLoad() and + storeDirSelectSwapDirRoundRobin()) will skip offline cache and + Butterfly reselection will try to re-hash requests to different + directories. + + HOT-REPLACEMENT: + + This is mainly in raidInitListener(). It instructs kernel (by + issuing special ioctls) no notify Squid when SCSI drive dies. + + + There is also old errno-based error detection, described below. Per + dir errnos and thresholds were added. + + ---------------- old outdated description --------------------- + + Here is general outline: + [posted as <14822.56117.262849.688030@beta.namesys.com> in + squidng@cacheboy.net] + + * In struct _fde I have added backpointer to the swapdir this file + descriptor serves. This is currently only maintained for the butterfly + fds but this ought to be improved. + + * All calls to read/write were replaced to the calls to + comm_(read|write)_wrapper (This is logical thing to do anyway, + right?). + + * Wrappers call watchDiskDies(). These calls are done through some + callback mechanism, so anyone can dynamically add function(s) to be + called on each read or write. + + * watchDiskDies() checks for some magic errno (I use ENXIO, but + ENODEV, ENOTSUP or ENOMEDIUM are possible candidates too.). + + * If there were more than ERRORS_THRESHOLD errnos watchDiskDies() + declares drive dead (offline) and calls offlineSwapDir() taking swap + dir number from the fde. + + * offlineSwapDir() just updates flags in swapdir. These flags are + taken into account by storeBfSelectSwapDir(), that chooses only + online dirs. Other backends should be updated too. + + There are obvious deficiencies in this prototype: only global + counter of errnos is kept. Thus if there are two drives dying at the + same time we cannot distinguish their errnos. I am not sure this is + at all important. Also, there are insufficient provisions for + adequate debugging yet. Some way should be devised to force + reads/writes from certain device to return given errno. This can be + put into kernel (as special block device driver and I have seen + something about this on fsdevel list, or as patch for reiserfs) or + we can do this in the squid. This is simpler but we have then keep + track which device fd is attached to. I think this can be done as + special read/write-callback fired before watchDiskDies() and setting + errno. But how to control it? Signals are over-used already. */ + +/* TODO: + + \begin{itemize} + \item proper debugging (in progress) + \item store information about online/offline state. Think squid + crashes when some disks are offline and restarts. We would + like to start without going through errnos for these disks + again + \item per drive errno counters (and settings: DISK_DIES_ERRNO and + ERRORS_THRESHOLD) + \item install fde.swap_dir in backends other than butterfly + \item update cache_dir selection functions other than storeBfSelectSwapDir + \end{itemize} +*/ + +#include "squid.h" + +#if defined( DISK_REPL ) + +#include + +#include +#define CONFIG_SCSI_HOT_REPLACE +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG( format, args... ) \ + debug( 91, 9 )( CURRENT_FUNCTION_NAME " (%s:%i): " format "\n", \ + __FILE__, __LINE__ , ##args ) + +#define TRACE DEBUG( "" ) +#define LOGERROR debug( 91, 3 ) + +typedef enum { goOffline = 0, goOnline = 1, wrongEvent } CtrlEvent; + +static sig_atomic_t hasCtrlEvent = 0; +static CtrlEvent ctrlEvent = wrongEvent; +static int ctrlSwapDir = -1; + +void raidCtrl( int sigNum, siginfo_t *info, void *data ); +static char *getDeviceByCache( char *path ); +EVH doRaidCtrl; +static void registerRaidCtrlEvent(); + +static void notifyAboutOffline( SwapDir *dir ); +static void notifyAboutOnline( SwapDir *dir ); +static void notifyAboutStateSwitch( char *cmd, SwapDir *dir ); + +int raidInit() +{ + struct sigaction act; + + act.sa_sigaction = raidCtrl; + act.sa_flags = SA_SIGINFO; + if( sigaction( RAID_CTRL_SIG, &act, NULL ) == 0 ) + { + registerRaidCtrlEvent(); + } + else + { + DEBUG( "sigaction failed: %s", xstrerror() ); + } + /* addReadHook( watchDiskDies, NULL ); + addWriteHook( watchDiskDies, NULL ); */ + return 1; +} + +void raidInitListener() +{ + wordlist *watcher; + + for( watcher = Config.disk_repl_watch_list ; + watcher != NULL ; watcher = watcher -> next ) + { + if( ( watcher -> key != NULL ) && + ( watcher -> next != NULL ) && ( watcher -> next -> key != NULL ) ) + { + int fd; + + fd = open( watcher -> key, O_RDONLY ); + if( fd != -1 ) + { + struct SCSI_hot_replace repl; + char *path; + int swap; + + repl.offline_code = ( int ) watcher; + repl.signo = RAID_CTRL_SIG; + repl.pid = getpid(); + repl.sigval = -1; + + path = watcher -> next -> key; + FOREACHSDINDEX( swap ) + { + if( !strcmp( INDEXSD( swap ) -> path, path ) ) + { + if( swap > 0xff ) + { + LOGERROR + ( "raidInitListener: sorry, index too big: %i\n", + swap ); + } + else + { + repl.sigval = ( goOffline << 8 ) | swap; + } + break; + } + } + + if( repl.sigval != -1 ) + { + if( ioctl( fd, SCSI_IOCTL_ADD_FAILURE_NOTIFY, &repl ) != -1 ) + { + DEBUG( "adding watcher: %s -> %s (%i)", + watcher -> key, path, swap ); + } + else + { + LOGERROR( "raidInitListener: ioctl: %s\n", xstrerror() ); + } + } + else + { + LOGERROR + ( "raidInitListener: cannot find swap_dir: %s\n", path ); + } + close( fd ); + } + else + { + LOGERROR( "raidInitListener: error opening %s: %s\n", + watcher -> key, xstrerror() ); + } + } + else + { + LOGERROR + ( "raidInitListener: wrong syntax of `watch_failure' field\n" ); + break; + } + watcher = watcher -> next; + } +} + +syscallcb_result watchDiskDies( syscall_type call, + int fd, int *errNo, + void *data ) +{ + /* DEBUG( "swap_dir: %i errno: %i (%i)", fd_table[ fd ].swap_dir, *errNo, errno ); */ + CHECK_FD( fd ); + + if( fd_table[ fd ].swap_dir != -1 ) + { + SwapDir *dir; + + assert( ( 0 <= fd_table[ fd ].swap_dir ) && + ( fd_table[ fd ].swap_dir < Config.cacheSwap.n_configured ) ); + dir = INDEXSD( fd_table[ fd ].swap_dir ); + if( ( *errNo != 0 ) && + ( *errNo == dir -> raid.offline_errno ) && + ( ++( dir -> raid.errnos_detected ) >= dir -> raid.errno_threshold ) ) + { + debug( 91, 1 )( "Cache_dir %i is dying...\n", dir -> index ); + offlineSwapDir( dir -> index ); + dir -> raid.errnos_detected = 0; + } + } +#if 0 + else if( *errNo == DISK_DIES_ERRNO ) + { + /* If a system drive (not a cache dir drive) dies, we are toast, + no hope of continuing without interrruption, so print an + informative message and bail out. -hans */ + debug( 91, 1 )( "System drive died. Finis.\n" ); + shut_down( 0 ); + return SYSCALLCB_ABORT; + } +#endif + return SYSCALLCB_CONTINUE; +} + +int offlineSwapDir( int index ) +{ + if( index < 0 ) + { + LOGERROR( "offlineSwapDir: %i < 0\n", index ); + return 0; + } + else if( index >= Config.cacheSwap.n_configured ) + { + LOGERROR( "offlineSwapDir: %i >= %i\n", + index, Config.cacheSwap.n_configured ); + return 0; + } + + TRACE; + if( INDEXSD( index ) -> flags.offline ) + { + DEBUG( "%i already offline", index ); + return 0; + } + ++Config.cacheSwap.n_offline; + if( Config.cacheSwap.n_offline >= Config.cacheSwap.n_configured ) + { + debug( 91, 1 )( "Last drive dies. Fare thee well.\n" ); + /* just to maintain invariant that n_offline is the number of + swapdirs with (offline == 1) */ + --Config.cacheSwap.n_offline; + shut_down( 0 ); + } + else + { + SwapDir *dir; + + dir = INDEXSD( index ); + dir -> flags.offline = 1; + dir -> log.close( dir ); + chdir( "/" ); /* to get out of umount's way */ + if( dir -> offline != NULL ) + { + dir -> offline( dir ); + } + debug( 91, 1 )( "Cache_dir %i turned offline...\n", index ); + notifyAboutOffline( dir ); + } + return 1; +} + +int onlineSwapDir( int index ) +{ + SwapDir *dir; + + if( index < 0 ) + { + LOGERROR( "onlineSwapDir: %i < 0\n", index ); + return 0; + } + else if( index >= Config.cacheSwap.n_configured ) + { + LOGERROR( "onlineSwapDir: %i >= %i\n", + index, Config.cacheSwap.n_configured ); + return 0; + } + + + DEBUG( "index: %i", index ); + dir = INDEXSD( index ); + if( !dir -> flags.offline ) + { + DEBUG( "%i already online", index ); + return 0; + } + if( ( dir -> online != NULL ) && dir -> online( dir ) ) + { + char *device; + + debug( 91, 1 )( "Cache_dir %i turned online...\n", index ); + dir -> log.open( dir ); + dir -> flags.offline = 0; + dir -> raid.errnos_detected = 0; + --Config.cacheSwap.n_offline; + notifyAboutOnline( dir ); + device = getDeviceByCache( dir -> path ); + if( device != NULL ) + { + int fd; + + fd = open( device, O_RDONLY ); + if( fd != -1 ) + { + struct SCSI_hot_replace repl; + + repl.offline_code = ( int ) device; + repl.signo = RAID_CTRL_SIG; + repl.pid = getpid(); + repl.sigval = -1; + repl.sigval = ( goOffline << 8 ) | index; + + if( ioctl( fd, SCSI_IOCTL_ADD_FAILURE_NOTIFY, &repl ) != -1 ) + { + DEBUG( "re-adding watcher: %s -> %s (%i)", + device, dir -> path, index ); + } + else + { + LOGERROR( "raidInitListener: ioctl: %s\n", xstrerror() ); + } + close( fd ); + } + else + { + LOGERROR( "Cannot re-establish fault notification at %s:%s (%s)", + dir -> path, device, xstrerror() ); + } + } + } + else + { + debug( 91, 1 )( "Cannot turn online cache_dir %i...\n", index ); + } + return 1; +} + +syscallcb_result simulateCrash( syscall_type call, + int fd, int *errNo, void *data ) +{ + SimulateCrashData *crash = ( SimulateCrashData * ) data; + + /* DEBUG( "fd: %i swap_dir: %i time: %li fireTime: %li", */ + /* fd, fd_table[ fd ].swap_dir, squid_curtime, crash -> crashTime ); */ + if( ( fd_table[ fd ].swap_dir == crash -> swapDirNo ) && + ( squid_curtime >= crash -> crashTime ) ) + { + static int sparseLogging = 0; + + *errNo = INDEXSD( crash -> swapDirNo ) -> raid.offline_errno; + ++sparseLogging; + if( !( sparseLogging & ( sparseLogging - 1 ) ) ) + { + DEBUG( "And therefore I claim swap_dir %i have to be ruined (%i:%i)", + crash -> swapDirNo, sparseLogging, *errNo ); + } + } + return SYSCALLCB_CONTINUE; +} + +int findFirstOnlineSD() +{ + int i; + + assert( Config.cacheSwap.n_configured > Config.cacheSwap.n_offline ); + + FOREACHSDINDEX( i ) + { + if( !( INDEXSD( i ) -> flags.offline ) ) + { + return i; + } + } + assert( 0 ); + return 0; +} + +void raidCtrl( int sigNum, siginfo_t *info, void *data ) +{ + /* don't risk asserts from signal handler, Just for documentation. */ + assert( 1 || ( sigNum == RAID_CTRL_SIG ) ); + assert( 1 || ( info != NULL ) ); + assert( 1 || ( info -> si_code == SI_QUEUE ) ); + + /* don't bother to queue events. */ + if( hasCtrlEvent == 0 ) + { + ++hasCtrlEvent; + ctrlEvent = info -> si_value.sival_int >> 8; + ctrlSwapDir = info -> si_value.sival_int & 0xff; + } +} + +void doRaidCtrl( void *data ) +{ + DEBUG( "event: %i dir: %i", ctrlEvent, ctrlSwapDir ); + if( hasCtrlEvent ) + { + hasCtrlEvent = 0; + switch( ctrlEvent ) + { + case goOffline: + DEBUG( "offlineSwapDir" ); + offlineSwapDir( ctrlSwapDir ); + break; + case goOnline: + DEBUG( "onlineSwapDir" ); + onlineSwapDir( ctrlSwapDir ); + break; + default: + DEBUG( "Wrong RAID control request" ); + break; + } + } + registerRaidCtrlEvent(); +} + +static void registerRaidCtrlEvent() +{ + eventAdd( "RAID control", doRaidCtrl, NULL, 1, 0 ); +} + +static void notifyAboutOffline( SwapDir *dir ) +{ + assert( dir != NULL ); + + if( Config.offline_notify_cmd != NULL ) + { + notifyAboutStateSwitch( Config.offline_notify_cmd, dir ); + } +} + +static void notifyAboutOnline( SwapDir *dir ) +{ + assert( dir != NULL ); + + if( Config.online_notify_cmd != NULL ) + { + notifyAboutStateSwitch( Config.online_notify_cmd, dir ); + } +} + +static char *getDeviceByCache( char *path ) +{ + wordlist *watcher; + + assert( path != NULL ); + + for( watcher = Config.disk_repl_watch_list ; watcher != NULL ; + watcher = watcher -> next ) + { + if( ( watcher -> key != NULL ) && + ( watcher -> next != NULL ) && ( watcher -> next -> key != NULL ) ) + { + if( !strcmp( watcher -> next -> key, path ) ) + { + return watcher -> key; + } + } + watcher = watcher -> next; + } + return NULL; +} + +static void notifyAboutStateSwitch( char *cmd, SwapDir *dir ) +{ + int len; + char *cmd_buf; + char *dev; + int ret; + + assert( cmd != NULL ); + assert( dir != NULL ); + + dev = getDeviceByCache( dir -> path ); + dev = ( dev == NULL ) ? "" : dev; + + len = strlen( cmd ) + strlen( dir -> path ) + 20 + strlen( dev ); + cmd_buf = xmalloc( len ); + + snprintf( cmd_buf, len, "%s %s %i %s", cmd, dir -> path, dir -> index, dev ); + DEBUG( "about to execute `%s'", cmd_buf ); + /* XXX system is insecure. Will replace to + fork()/dup()/close()/exec() later. + Famous last words. */ + if( ( ret = system( cmd_buf ) ) != 0 ) + { + LOGERROR( "notifyAboutStateSwitch: cannot execute `%s': %i: %s", + cmd_buf, ret, xstrerror() ); + } + xfree( cmd_buf ); +} + +/* DISK_REPL */ +#else + +int findFirstOnlineSD() +{ + return 0; +} + +/* DISK_REPL */ +#endif + +/* + * $Log$ + * Revision 1.1.2.2 2000/12/22 12:52:24 nikitadanilov + * Errno monitoring commented out. fd checks added. findFirstOnlineSD() have + * to be implemented even if DISK_REPL is undefined (required bu store_dir selection + * algorithms). + * + * Revision 1.1.2.1 2000/12/17 14:36:24 hno + * Imported raid from squidng + * + * Revision 1.1.2.11 2000/12/15 18:48:21 nikitadanilov + * Re-install fault notification when cache goes back online. Don't monitor errnos beyond cache_dirs. + * + * Revision 1.1.2.10 2000/12/15 14:45:23 nikitadanilov + * Bug fixes in ciociosan: clear can_* flags on file close; corrected work with fd-bitmap etc. + * + * Revision 1.1.2.9 2000/12/07 16:47:36 nikitadanilov + * new code commented + * + * Revision 1.1.2.8 2000/11/24 20:41:48 nikitadanilov + * Execute command specified in squid.conf when drive goes offline + * + * Revision 1.1.2.7 2000/11/22 14:42:59 nikitadanilov + * generally improved + * + * Revision 1.1.2.6 2000/11/08 11:49:07 nikitadanilov + * Dynamic switching cache_dirs offline and back through signals + * + * Revision 1.1.2.5 2000/10/19 09:57:12 nikitadanilov + * Added manual online/offline switching through signals (Signals with siginfo, e.g., sent by sigqueue.). + * + * Revision 1.1.2.4 2000/10/17 19:28:47 nikitadanilov + * Switched to per-swapdir errno settings and counters. Fallback findFirstOnlineSD() added: used by storeBfSelectSwapDir() + * + * Revision 1.1.2.3 2000/10/17 12:13:42 nikitadanilov + * Comments added. Check for last drive death corrected. + * + * Revision 1.1.2.2 2000/10/13 15:43:18 nikitadanilov + * RAID disk replcement changes: added per-fs callbacks called on on/of-line switch; Crash simulation added; Code commented. It started to work (with crash simulation). Alpha release state reached. + * + */ + +/* + * Make Linus happy. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ + Index: squid/src/squid.h diff -u squid/src/squid.h:1.3.16.1.2.1.2.1 squid/src/squid.h:1.3.16.1.2.1.2.1.2.1 --- squid/src/squid.h:1.3.16.1.2.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/squid.h Sun Dec 17 06:36:24 2000 @@ -89,7 +89,13 @@ #define assert(EX) ((EX)?((void)0):xassert("EX", __FILE__, __LINE__)) #endif -#define LINGERING_CLOSE 1 +#define LINGERING_CLOSE 0 + +#if defined( __GNUC__ ) +#define CURRENT_FUNCTION_NAME __PRETTY_FUNCTION__ +#else +#define CURRENT_FUNCTION_NAME "" +#endif /* SIGIO-driven IO hack. Don't look here. @@ -98,11 +104,6 @@ /* #define TCP_ASYNC */ #if defined( TCP_ASYNC ) -/* It's Butterfly's real name */ -#define SIGCIOCIOSAN ( SIGRTMIN + 8 ) - -#define SIGINFO_PER_CALL 128 - /* this should be done before #include , because we need F_SETSIG. */ #define _GNU_SOURCE @@ -111,63 +112,11 @@ #include #endif -#if defined( __GNUC__ ) -#define CURRENT_FUNCTION_NAME __PRETTY_FUNCTION__ -#else -#define CURRENT_FUNCTION_NAME __FUNC__ -#endif - -#define CLEAR_CAN_READ_INTERNAL( fd ) \ - debug( 90, 9 )( CURRENT_FUNCTION_NAME ": cleared can_read.\n" ); \ - fd_table[ fd ].flags.can_read = 0 - -#define CLEAR_CAN_WRITE_INTERNAL( fd ) \ - debug( 90, 9 )( CURRENT_FUNCTION_NAME ": cleared can_write.\n" ); \ - fd_table[ fd ].flags.can_write = 0 - -#define CLEAR_CAN_READ( fd ) do { \ - CLEAR_CAN_READ_INTERNAL( fd ); \ - ++statCounter.syscalls.async.tail_reads; } while( 0 ) - -#define CLEAR_CAN_WRITE( fd ) do { \ - CLEAR_CAN_WRITE_INTERNAL( fd ); \ - ++statCounter.syscalls.async.tail_writes; } while( 0 ) - -#define CLEAR_CAN_READ_IF_EMPTY( requestedSize, actuallyRead, fd ) \ - do \ - { \ - if( ( actuallyRead > 0 ) && ( actuallyRead < requestedSize ) ) \ - { \ - /* CLEAR_CAN_READ_INTERNAL( fd ); */ \ - ++statCounter.syscalls.async.drain_reads; \ - } \ - } \ - while( 0 ) - -#define CLEAR_CAN_WRITE_IF_FULL( requestedSize, actuallyWritten, fd ) \ - do \ - { \ - if( ( actuallyWritten > 0 ) && ( actuallyWritten < requestedSize ) ) \ - { \ - /* CLEAR_CAN_WRITE_INTERNAL( fd ); */ \ - ++statCounter.syscalls.async.drain_writes; \ - } \ - } \ - while( 0 ) - -#else - -#define CLEAR_CAN_READ( fd ) do {} while( 0 ) -#define CLEAR_CAN_WRITE( fd ) do {} while( 0 ) -#define CLEAR_CAN_READ_IF_EMPTY( requestedSize, actuallyRead, fd ) \ - do {} while( 0 ) -#define CLEAR_CAN_WRITE_IF_FULL( requestedSize, actuallyWritten, fd ) \ - do {} while( 0 ) - /* TCP_ASYNC */ #endif + #if HAVE_UNISTD_H #include #endif Index: squid/src/ssl.c diff -u squid/src/ssl.c:1.3.16.1.4.1 squid/src/ssl.c:1.3.16.1.4.1.2.1 --- squid/src/ssl.c:1.3.16.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/ssl.c Sun Dec 17 06:36:24 2000 @@ -200,7 +200,7 @@ read_sz = delayBytesWanted(sslState->delay_id, 1, read_sz); #endif statCounter.syscalls.sock.reads++; - len = read(fd, sslState->server.buf + sslState->server.len, read_sz); + len = comm_read_wrapper(fd, sslState->server.buf + sslState->server.len, read_sz); debug(26, 3) ("sslReadServer: FD %d, read %d bytes\n", fd, len); if (len > 0) { fd_bytes(fd, len, FD_READ); @@ -210,7 +210,6 @@ kb_incr(&statCounter.server.all.kbytes_in, len); kb_incr(&statCounter.server.other.kbytes_in, len); sslState->server.len += len; - CLEAR_CAN_READ_IF_EMPTY( read_sz, len, fd ); } cbdataLock(sslState); if (len < 0) { @@ -219,7 +218,6 @@ if (!ignoreErrno(errno)) comm_close(fd); } else { - CLEAR_CAN_READ( fd ); if (len == 0) { comm_close(sslState->server.fd); } @@ -240,7 +238,7 @@ fd, SQUID_TCP_SO_RCVBUF - sslState->client.len, sslState->client.len); statCounter.syscalls.sock.reads++; - len = read(fd, + len = comm_read_wrapper(fd, sslState->client.buf + sslState->client.len, SQUID_TCP_SO_RCVBUF - sslState->client.len); debug(26, 3) ("sslReadClient: FD %d, read %d bytes\n", fd, len); @@ -248,8 +246,6 @@ fd_bytes(fd, len, FD_READ); kb_incr(&statCounter.client_http.kbytes_in, len); sslState->client.len += len; - CLEAR_CAN_READ_IF_EMPTY( SQUID_TCP_SO_RCVBUF - sslState->client.len, - len, fd ); } cbdataLock(sslState); if (len < 0) { @@ -260,7 +256,6 @@ #endif if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); level = 3; } debug(50, level) ("sslReadClient: FD %d: read failure: %s\n", @@ -285,7 +280,7 @@ debug(26, 3) ("sslWriteServer: FD %d, %d bytes to write\n", fd, sslState->client.len); statCounter.syscalls.sock.writes++; - len = write(fd, + len = comm_write_wrapper(fd, sslState->client.buf, sslState->client.len); debug(26, 3) ("sslWriteServer: FD %d, %d bytes written\n", fd, len); @@ -294,7 +289,6 @@ kb_incr(&statCounter.server.all.kbytes_out, len); kb_incr(&statCounter.server.other.kbytes_out, len); assert(len <= sslState->client.len); - CLEAR_CAN_WRITE_IF_FULL( sslState->client.len, len, fd ); sslState->client.len -= len; if (sslState->client.len > 0) { /* we didn't write the whole thing */ @@ -309,10 +303,6 @@ ("sslWriteServer: FD %d: write failure: %s.\n", fd, xstrerror()); if (!ignoreErrno(errno)) comm_close(fd); - else - { - CLEAR_CAN_WRITE( fd ); - } } if (cbdataValid(sslState)) sslSetSelect(sslState); @@ -329,7 +319,7 @@ debug(26, 3) ("sslWriteClient: FD %d, %d bytes to write\n", fd, sslState->server.len); statCounter.syscalls.sock.writes++; - len = write(fd, + len = comm_write_wrapper(fd, sslState->server.buf, sslState->server.len); debug(26, 3) ("sslWriteClient: FD %d, %d bytes written\n", fd, len); @@ -337,7 +327,6 @@ fd_bytes(fd, len, FD_WRITE); kb_incr(&statCounter.client_http.kbytes_out, len); assert(len <= sslState->server.len); - CLEAR_CAN_WRITE_IF_FULL( sslState->server.len, len, fd ); sslState->server.len -= len; /* increment total object size */ if (sslState->size_ptr) @@ -355,10 +344,6 @@ ("sslWriteClient: FD %d: write failure: %s.\n", fd, xstrerror()); if (!ignoreErrno(errno)) comm_close(fd); - else - { - CLEAR_CAN_WRITE( fd ); - } } if (cbdataValid(sslState)) sslSetSelect(sslState); Index: squid/src/store.c diff -u squid/src/store.c:1.6.4.1.2.1 squid/src/store.c:1.6.4.1.2.1.4.1 --- squid/src/store.c:1.6.4.1.2.1 Sun Dec 17 05:55:39 2000 +++ squid/src/store.c Sun Dec 17 06:36:24 2000 @@ -780,7 +780,7 @@ static time_t last_warn_time = 0; /* walk each fs */ - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSDINDEX( i ) { /* call the maintain function .. */ SD = INDEXSD(i); /* XXX FixMe: This should be done "in parallell" on the different Index: squid/src/store_client.c diff -u squid/src/store_client.c:1.4.14.1.2.1 squid/src/store_client.c:1.4.14.1.2.1.4.1 --- squid/src/store_client.c:1.4.14.1.2.1 Sun Dec 17 05:55:39 2000 +++ squid/src/store_client.c Fri Dec 22 04:55:03 2000 @@ -388,7 +388,7 @@ tlv *tlv_list; tlv *t; int swap_object_ok = 1; - assert(sc->entry->key); /* --sizif */ + assert(sc->entry->hash.key); /* --sizif */ assert(sc->flags.disk_io_pending); sc->flags.disk_io_pending = 0; assert(sc->callback != NULL); Index: squid/src/store_dir.c diff -u squid/src/store_dir.c:1.4.18.1 squid/src/store_dir.c:1.4.18.1.4.1 --- squid/src/store_dir.c:1.4.18.1 Sun Dec 17 05:55:39 2000 +++ squid/src/store_dir.c Sun Dec 17 06:36:24 2000 @@ -48,10 +48,8 @@ void storeDirInit(void) { - int i; SwapDir *sd; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { - sd = &Config.cacheSwap.swapDirs[i]; + FOREACHSD( sd ) { sd->init(sd); } if (0 == strcasecmp(Config.store_dir_select_algorithm, "round-robin")) { @@ -66,14 +64,14 @@ void storeCreateSwapDirectories(void) { - int i; SwapDir *sd; pid_t pid; int status; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSD( sd ) { + if( sd -> flags.offline ) + continue; if (fork()) continue; - sd = &Config.cacheSwap.swapDirs[i]; sd->newfs(sd); exit(0); } @@ -136,7 +134,11 @@ if (++dirn >= Config.cacheSwap.n_configured) dirn = 0; sd = &Config.cacheSwap.swapDirs[dirn]; - if (sd->cur_size > sd->max_size) + if ((sd->cur_size > sd->max_size) +#if defined( DISK_REPL ) + || sd->flags.offline +#endif + ) continue; break; } @@ -146,9 +148,15 @@ act together like hash buckets. They use a hash based on URL to select which dir the object should go to -- this allows to have only one open() in storeTreeGet(). */ + debug( 47, 9 )( "storeDirSelectSwapDirRoundRobin: fdirn: %i\n", dirn ); + /* may be we should not store re-selected cache_dir index back into + dirn? Otherwise, if we have mixture of storetree and usual + caches, round-robin will somewhat stuck into storetree, + giving unfair amount of load to it? --nikita */ if (sd->reselectdir) dirn = sd->reselectdir(e)->index; #endif + debug( 47, 9 )( "storeDirSelectSwapDirRoundRobin: dirn: %i\n", dirn ); return dirn; } @@ -184,9 +192,13 @@ /* Initial defaults */ least_size = Config.cacheSwap.swapDirs[0].cur_size; least_objsize = Config.cacheSwap.swapDirs[0].max_objsize; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSDINDEX( i ) { SD = &Config.cacheSwap.swapDirs[i]; SD->flags.selected = 0; +#if defined( DISK_REPL ) + if (SD->flags.offline) + continue; +#endif if (SD->flags.read_only) continue; /* Valid for object size check */ @@ -212,6 +224,7 @@ if (dirn >= 0) Config.cacheSwap.swapDirs[dirn].flags.selected = 1; + debug( 47, 9 )( "storeDirSelectSwapDirLeastLoad: dirn: %i\n", dirn ); return dirn; } @@ -251,7 +264,10 @@ e->swap_dirn, e->swap_filen); sd = &Config.cacheSwap.swapDirs[e->swap_dirn]; - sd->log.write(sd, e, op); + if( !sd -> flags.offline ) + { + sd->log.write(sd, e, op); + } } void @@ -284,7 +300,7 @@ percent((int) (Config.Swap.maxSize - store_swap_size), (int) Config.Swap.maxSize)); /* Now go through each swapdir, calling its statfs routine */ - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSDINDEX( i ) { storeAppendPrintf(sentry, "\n"); SD = &(Config.cacheSwap.swapDirs[i]); storeAppendPrintf(sentry, "Store Directory #%d (%s): %s\n", i, SD->type, @@ -297,10 +313,8 @@ storeDirConfigure(void) { SwapDir *SD; - int i; Config.Swap.maxSize = 0; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { - SD = &Config.cacheSwap.swapDirs[i]; + FOREACHSD( SD ) { Config.Swap.maxSize += SD->max_size; SD->low_size = (int) (((float) SD->max_size * (float) Config.Swap.lowWaterMark) / 100.0); @@ -322,10 +336,12 @@ void storeDirOpenSwapLogs(void) { - int dirn; SwapDir *sd; - for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - sd = &Config.cacheSwap.swapDirs[dirn]; + FOREACHSD( sd ) { + if( sd -> flags.offline ) + { + continue; + } sd->log.open(sd); } } @@ -333,10 +349,12 @@ void storeDirCloseSwapLogs(void) { - int dirn; SwapDir *sd; - for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - sd = &Config.cacheSwap.swapDirs[dirn]; + FOREACHSD( sd ) { + if( sd -> flags.offline ) + { + continue; + } sd->log.close(sd); } } @@ -360,7 +378,6 @@ double dt; SwapDir *sd; RemovalPolicyWalker **walkers; - int dirn; int notdone = 1; if (store_dirs_rebuilding) { debug(20, 1) ("Not currently OK to rewrite swap log.\n"); @@ -371,8 +388,11 @@ getCurrentTime(); start = current_time; walkers = xcalloc(Config.cacheSwap.n_configured, sizeof *walkers); - for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - sd = &Config.cacheSwap.swapDirs[dirn]; + FOREACHSD( sd ) { + if( sd -> flags.offline ) + { + continue; + } if (sd->log.clean.start(sd) < 0) { debug(20, 1) ("log.clean.start() failed for dir #%d\n", sd->index); continue; @@ -380,8 +400,11 @@ } while (notdone) { notdone = 0; - for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - sd = &Config.cacheSwap.swapDirs[dirn]; + FOREACHSD( sd ) { + if( sd -> flags.offline ) + { + continue; + } if (NULL == sd->log.clean.write) continue; e = sd->log.clean.nextentry(sd); @@ -408,8 +431,11 @@ } } /* Flush */ - for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - sd = &Config.cacheSwap.swapDirs[dirn]; + FOREACHSD( sd ) { + if( sd -> flags.offline ) + { + continue; + } sd->log.clean.done(sd); } if (reopen) @@ -429,11 +455,13 @@ void storeDirSync(void) { - int i; SwapDir *SD; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { - SD = &Config.cacheSwap.swapDirs[i]; + FOREACHSD( SD ) { + if( SD -> flags.offline ) + { + continue; + } if (SD->sync != NULL) SD->sync(SD); } @@ -450,12 +478,16 @@ static int ndir = 0; do { j = 0; - for (i = 0; i < Config.cacheSwap.n_configured; i++) { + FOREACHSDINDEX( i ) { if (ndir >= Config.cacheSwap.n_configured) ndir = ndir % Config.cacheSwap.n_configured; SD = &Config.cacheSwap.swapDirs[ndir++]; if (NULL == SD->callback) continue; + if( SD -> flags.offline ) + { + continue; + } j += SD->callback(SD); } } while (j > 0); Index: squid/src/store_io.c diff -u squid/src/store_io.c:1.2 squid/src/store_io.c:1.2.26.1 --- squid/src/store_io.c:1.2 Sat Oct 21 08:16:13 2000 +++ squid/src/store_io.c Sun Dec 17 06:36:24 2000 @@ -62,13 +62,18 @@ void *callback_data) { SwapDir *SD = &Config.cacheSwap.swapDirs[e->swap_dirn]; - return SD->obj.open(SD, e, file_callback, callback, callback_data); + if( SD -> flags.offline ) + return NULL; + else + return SD->obj.open(SD, e, file_callback, callback, callback_data); } void storeClose(storeIOState * sio) { SwapDir *SD = &Config.cacheSwap.swapDirs[sio->swap_dirn]; + if( SD -> flags.offline ) + return; if (sio->flags.closing) return; sio->flags.closing = 1; @@ -79,6 +84,8 @@ storeRead(storeIOState * sio, char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data) { SwapDir *SD = &Config.cacheSwap.swapDirs[sio->swap_dirn]; + if( SD -> flags.offline ) + return; SD->obj.read(SD, sio, buf, size, offset, callback, callback_data); } @@ -86,6 +93,8 @@ storeWrite(storeIOState * sio, char *buf, size_t size, off_t offset, FREE * free_func) { SwapDir *SD = &Config.cacheSwap.swapDirs[sio->swap_dirn]; + if( SD -> flags.offline ) + return; SD->obj.write(SD, sio, buf, size, offset, free_func); } @@ -93,6 +102,8 @@ storeUnlink(StoreEntry * e) { SwapDir *SD = INDEXSD(e->swap_dirn); + if( SD -> flags.offline ) + return; SD->obj.unlink(SD, e); } Index: squid/src/store_swapout.c diff -u squid/src/store_swapout.c:1.4.14.1.2.1 squid/src/store_swapout.c:1.4.14.1.2.1.4.1 --- squid/src/store_swapout.c:1.4.14.1.2.1 Sun Dec 17 05:55:39 2000 +++ squid/src/store_swapout.c Sun Dec 17 06:36:24 2000 @@ -280,7 +280,7 @@ assert(e->swap_status == SWAPOUT_WRITING); cbdataFree(c); if (errflag) { - debug(20, 1) ("storeSwapOutFileClosed: dirno %d, swapfile %08X, errflag=%d\n\t%s\n", + debug(20, 1) ("storeSwapOutFileClosed: dirno %d, swapfile %08X, errflag=%d: %s\n", e->swap_dirn, e->swap_filen, errflag, xstrerror()); if (errflag == DISK_NO_SPACE_LEFT) { storeDirDiskFull(e->swap_dirn); Index: squid/src/structs.h diff -u squid/src/structs.h:1.8.8.1.2.1 squid/src/structs.h:1.8.8.1.2.1.2.1 --- squid/src/structs.h:1.8.8.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/structs.h Sun Dec 17 06:36:24 2000 @@ -463,6 +463,9 @@ SwapDir *swapDirs; int n_allocated; int n_configured; +#if defined( DISK_REPL ) + int n_offline; +#endif } cacheSwap; char *fake_ua; struct { @@ -517,6 +520,11 @@ size_t high_memory; } warnings; char *store_dir_select_algorithm; +#if defined( DISK_REPL ) + wordlist *disk_repl_watch_list; + char *offline_notify_cmd; + char *online_notify_cmd; +#endif }; struct _SquidConfig2 { @@ -628,6 +636,9 @@ DEFER *defer_check; /* check if we should defer read */ void *defer_data; CommWriteStateData *rwstate; /* State data for comm_write */ +#if defined( DISK_REPL ) + int swap_dir; +#endif }; struct _fileMap { @@ -1360,6 +1371,7 @@ struct { unsigned int selected:1; unsigned int read_only:1; + unsigned int offline:1; } flags; STINIT *init; /* Initialise the fs */ STNEWFS *newfs; /* Create a new fs */ @@ -1374,6 +1386,10 @@ STUNREFOBJ *unrefobj; /* Unreference this object */ STCALLBACK *callback; /* Handle pending callbacks */ STSYNC *sync; /* Sync the directory */ +#if defined( DISK_REPL ) + STONOFF *offline; /* Go offline */ + STONOFF *online; /* Go online */ +#endif /* The following are for storetree */ STHALFRELEASE *halfrelease; /* Release StoreEntry but keep disk object */ STRESELECTDIR *reselectdir; /* Swap dir redirection function */ @@ -1398,6 +1414,14 @@ } clean; int writes_since_clean; } log; +#if defined( DISK_REPL ) + struct + { + int offline_errno; + int errno_threshold; + int errnos_detected; + } raid; +#endif void *fsdata; }; Index: squid/src/tools.c diff -u squid/src/tools.c:1.5 squid/src/tools.c:1.5.12.1 --- squid/src/tools.c:1.5 Tue Dec 12 15:21:20 2000 +++ squid/src/tools.c Fri Dec 22 04:43:01 2000 @@ -287,7 +287,7 @@ else puts(dead_msg()); } - abort(); + abend(); } @@ -343,7 +343,7 @@ if (0 == store_dirs_rebuilding) storeDirWriteCleanLogs(0); fatal_common(message); - exit(shutting_down ? 0 : 1); + abend(); } /* printf-style interface for fatal */ @@ -387,7 +387,7 @@ fatal_common(message); if (opt_catch_signals) storeDirWriteCleanLogs(0); - abort(); + abend(); } void Index: squid/src/typedefs.h diff -u squid/src/typedefs.h:1.4.10.1.2.1.2.1 squid/src/typedefs.h:1.4.10.1.2.1.2.1.2.1 --- squid/src/typedefs.h:1.4.10.1.2.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/typedefs.h Sun Dec 17 06:36:24 2000 @@ -247,6 +247,7 @@ typedef void STDONE(void); typedef int STCALLBACK(SwapDir *); typedef void STSYNC(SwapDir *); +typedef int STONOFF(SwapDir *); typedef void STHALFRELEASE(StoreEntry *); typedef SwapDir *STRESELECTDIR(const StoreEntry *); @@ -319,7 +320,22 @@ /* * comm_cio.h */ - typedef int ( *CioCallback ) ( siginfo_t *info ); +typedef int ( *CioCallback ) ( siginfo_t *info ); +typedef syscallcb_result ( *SyscallCallBack )( syscall_type call, + int fd, int *errNo, + void *data ); +typedef struct { + SyscallCallBack cb; + void *data; +} SyscallCBInvoc; + +#if defined( DISK_REPL ) +typedef struct { + int swapDirNo; + time_t crashTime; +} SimulateCrashData; +/* DISK_REPL */ +#endif #endif /* _TYPEDEFS_H_ */ Index: squid/src/wais.c diff -u squid/src/wais.c:1.3.16.1.4.1 squid/src/wais.c:1.3.16.1.4.1.2.1 --- squid/src/wais.c:1.3.16.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/wais.c Sun Dec 17 06:36:24 2000 @@ -103,7 +103,7 @@ read_sz = delayBytesWanted(delay_id, 1, read_sz); #endif statCounter.syscalls.sock.reads++; - len = read(fd, buf, read_sz); + len = comm_read_wrapper(fd, buf, read_sz); if (len > 0) { fd_bytes(fd, len, FD_READ); #if DELAY_POOLS @@ -119,13 +119,11 @@ for (clen = len - 1, bin = 0; clen; bin++) clen >>= 1; IOStats.Wais.read_hist[bin]++; - CLEAR_CAN_READ_IF_EMPTY( read_sz, len, fd ); } if (len < 0) { debug(50, 1) ("waisReadReply: FD %d: read failure: %s.\n", fd, xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); /* reinstall handlers */ /* XXX This may loop forever */ commSetSelect(fd, COMM_SELECT_READ, Index: squid/src/whois.c diff -u squid/src/whois.c:1.3.16.1.4.1 squid/src/whois.c:1.3.16.1.4.1.2.1 --- squid/src/whois.c:1.3.16.1.4.1 Sun Dec 17 06:09:08 2000 +++ squid/src/whois.c Sun Dec 17 06:36:24 2000 @@ -92,7 +92,7 @@ MemObject *mem = entry->mem_obj; int len; statCounter.syscalls.sock.reads++; - len = read(fd, buf, 4095); + len = comm_read_wrapper(fd, buf, 4095); buf[len] = '\0'; debug(75, 3) ("whoisReadReply: FD %d read %d bytes\n", fd, len); debug(75, 5) ("{%s}\n", buf); @@ -104,12 +104,10 @@ kb_incr(&statCounter.server.http.kbytes_in, len); storeAppend(entry, buf, len); commSetSelect(fd, COMM_SELECT_READ, whoisReadReply, p, Config.Timeout.read); - CLEAR_CAN_READ_IF_EMPTY( 4095, len, fd ); } else if (len < 0) { debug(50, 2) ("whoisReadReply: FD %d: read failure: %s.\n", fd, xstrerror()); if (ignoreErrno(errno)) { - CLEAR_CAN_READ( fd ); commSetSelect(fd, COMM_SELECT_READ, whoisReadReply, p, Config.Timeout.read); } else if (mem->inmem_hi == 0) { ErrorState *err; Index: squid/src/fs/butterfly/store_dir_bf.c diff -u squid/src/fs/butterfly/store_dir_bf.c:1.1.2.1.2.1 squid/src/fs/butterfly/store_dir_bf.c:1.1.2.1.2.1.2.2 --- squid/src/fs/butterfly/store_dir_bf.c:1.1.2.1.2.1 Sun Dec 17 06:09:08 2000 +++ squid/src/fs/butterfly/store_dir_bf.c Fri Dec 22 04:55:03 2000 @@ -54,6 +54,7 @@ }; int n_bf_dirs = 0; +int n_bf_offline = 0; static int *bf_dir_index = NULL; MemPool *bf_state_pool = NULL; MemPool *bf_op_pool = NULL; @@ -73,6 +74,10 @@ static void storeBfDirInit(SwapDir * sd); static void storeBfStats(StoreEntry * sentry); static void storeBfDirSync(SwapDir * SD); +#if defined( DISK_REPL ) +static int storeBfDirOffline(SwapDir * SD); +static int storeBfDirOnline(SwapDir * SD); +#endif static int storeBfDirCallback(SwapDir * SD); static void storeBfRelease(StoreEntry *e); static SwapDir *storeBfReselectDir (const StoreEntry *e); @@ -92,6 +97,7 @@ static void storeBfDirFree(SwapDir * s); static int storeBfCleanupDoubleCheck(SwapDir * sd, StoreEntry * e); static void storeBfDirParse(SwapDir * sd, int index, char *path); +static int findNthBf( int n ); int @@ -217,38 +223,52 @@ storeBfDirInit(SwapDir * sd) { bfinfo_t *bfi = sd->fsdata; - static const char *errmsg = - "\tFailed to verify one of the swap directories, Check cache.log\n" - "\tfor details. Run 'squid -z' to create swap directories\n" - "\tif needed, or if running Squid for the first time."; - if (!getenv(AIO_THREAD_ENVIRON)) { static char aio_nthreads[30]; snprintf(aio_nthreads, sizeof(aio_nthreads), "%d", bf_nthreads); setenv(AIO_THREAD_ENVIRON, aio_nthreads, 0); } - if (storeBfDirVerifyCacheDirs(sd) < 0) + if (storeBfDirVerifyCacheDirs(sd) < 0) { +#if defined( DISK_REPL ) + sd -> flags.offline = 1; + bfi->dir = -1; +#else fatal(errmsg); - - bfi->dir = open (sd->path, O_RDONLY); - if (bfi->dir < 0) { +#endif + } else { + bfi->dir = open (sd->path, O_RDONLY); + if (bfi->dir < 0) { debug(50, 0) ("storeBfInit: open(%s): %s\n", sd->path, xstrerror()); +#if defined( DISK_REPL ) + sd -> flags.offline = 1; +#else fatal ("cache_dir open failed"); - } +#endif + } + else + { #if 0 /* TODO: consider legalizing dir fd? */ - fd_note(bfi->dir, sd->path); - commSetTimeout(bfi->dir, -1, NULL, NULL); - commSetNonBlocking(bfi->dir); -#endif - /* We are doing polled I/O, don't wait too long in comm_poll */ - comm_quick_poll_required(); - storeBfDirOpenSwapLog(sd); - storeBfDirRebuild(sd); + fd_note(bfi->dir, sd->path); + commSetTimeout(bfi->dir, -1, NULL, NULL); + commSetNonBlocking(bfi->dir); +#endif + /* We are doing polled I/O, don't wait too long in comm_poll */ + comm_quick_poll_required(); + storeBfDirOpenSwapLog(sd); + storeBfDirRebuild(sd); + } + } #if 0 && defined( TCP_ASYNC ) debug(50, 0) ("storeBfDirInit: commCioAddCB\n"); commCioAddCB( SIGBUTTERFLY, storeBfDirCioCallback ); /* TCP_ASYNC */ #endif +#if defined( DISK_REPL ) + sd -> raid.offline_errno = DISK_DIES_ERRNO; + sd -> raid.errno_threshold = 2; + sd -> raid.errnos_detected = 0; +#endif + assert( n_bf_dirs >= 0 ); } @@ -301,6 +321,50 @@ #endif } +#if defined( DISK_REPL ) +static int storeBfDirOffline(SwapDir * SD) +{ + bfinfo_t *bfi = SD -> fsdata; + + assert( SD != NULL ); + debug( 86, 9 ) ( "storeBfDirOffline: offline: %i total: %i\n", + n_bf_offline, n_bf_dirs ); + + close( bfi -> dir ); + bfi -> dir = -1; + + ++n_bf_offline; + if( 0 >= n_bf_dirs ) + { + debug( 86, 8 ) ( "storeBfDirOffline: all Butterfly dirs are gone.\n" ); + } + return 1; +} + +static int storeBfDirOnline(SwapDir * SD) +{ + bfinfo_t *bfi = SD -> fsdata; + + assert( SD != NULL ); + assert( n_bf_offline >= 0 ); + + bfi -> dir = open( SD -> path, O_RDONLY ); + if( bfi -> dir < 0 ) + { + debug(50, 0) ("storeBfDirOnline: open(%s): %s\n", + SD -> path, xstrerror() ); + return 0; + } + else + { + --n_bf_offline; + assert( n_bf_offline >= 0 ); + return 1; + } +} + +#endif + /* Write down op duration profiling data */ static void @@ -451,7 +515,7 @@ static SwapDir * storeBfReselectDir (const StoreEntry *e) { - return INDEXSD(storeBfSelectSwapDir(e)); + return INDEXSD(storeBfSelectSwapDir(e)); } @@ -575,6 +639,11 @@ return NULL; if (strncasecmp (url, "cache_object://", sizeof "cache_object://"-1) == 0) return NULL; +#if defined( DISK_REPL ) + if (n_storetree_dirs <= n_bf_offline) + return NULL; +#endif + debug(86, 5) ("StoreTreeGet for %s %s Callers %p %p %p %p\n", url, storeKeyText (key), @@ -678,41 +747,122 @@ static int storeBfSelectSwapDir(const StoreEntry * e) { - int dirn; - int x; - + int shift; + if (! e->id_valid) { assert (e->mem_obj); assert (e->mem_obj->url); - e->id = make_id (e->key, e->mem_obj->url, strlen (e->mem_obj->url)); - e->id_valid = 1; - } - - x = *(unsigned *)e->key % n_bf_dirs; - - if (Config.cacheSwap.n_configured == n_bf_dirs) - return x; - else for (dirn = 0; dirn < Config.cacheSwap.n_configured; dirn++) { - SwapDir *sd = INDEXSD(dirn); - if (storeBfDirIs(sd)) { - if (x == 0) + /* To avoid constness complaints from cc */ + ((StoreEntry *)e)->id = make_id + (e->hash.key, e->mem_obj->url, strlen (e->mem_obj->url)); + ((StoreEntry *)e)->id_valid = 1; + } + + /* it is not very clean way of coding (it is not clean at all) + but keep with it just for the time being (and being and being and...) */ + /* This was updated to handle case when some disks are offline. + We take each sizeof( unsigned int ) chars from MD5 checksum of + URL, divide it to the number of configured butterfly cache_dirs + and see if it points to the online dir. If it is not we just + move to the next char, otherwise return this dir. + */ + for( shift = 0 ; shift < MD5_DIGEST_CHARS - sizeof( unsigned ) ; ++shift ) + { + int dirn; + int x; + + /* n_bf_dirs is decremented when cache goes offline. + n_bf_dirs + n_bf_offline is invariant */ + x = *(unsigned *)(e->hash.key + shift) % ( n_bf_dirs + n_bf_offline ); + /* All caches are butterfly and all of them are online. Normal case. */ + if((Config.cacheSwap.n_configured == n_bf_dirs) && + (n_bf_offline == 0)) + { + debug( 91, 9 ) ("storeBfSelectSwapDir(shortcut): dirn: %i\n", x ); + return x; + } + /* Some cache_dirs are offline */ + else if( n_bf_dirs > 0 ) + { + dirn = findNthBf( x ); + assert( dirn >= 0 ); +#if defined( DISK_REPL ) + if( !INDEXSD(dirn) -> flags.offline ) + { return dirn; - x--; + } + else + { + /* try next portion of md5 */ + debug( 86, 9 ) + ("storeBfSelectSwapDir: skipped dirn: %i:%i\n", dirn, x ); + continue; + } +#else + return dirn; +#endif + } + /* All butterfly caches are offline. */ + else + { + break; + } + } + /* + All butterfly caches are offline or we tried all possible + hashes and always hit offline dir. Forget about load balancing + etc., and just return the first online butterfly directory that + we can find. The frequency of this code being invoked should + be very small, causing insignificant distortion of optimal load + balancing. --hans + */ + if( n_bf_dirs > 0 ) + { + SwapDir *sd; + + FOREACHSD( sd ) + { + if( storeBfDirIs( sd ) && sd -> flags.offline ) + { + return sd -> index; + } + } + assert( 0 ); + return 0; + } + else + { + /* As a last resort try to use any online cache whatsover. */ + /* Will assert(0) if there are no online caches. */ + return findFirstOnlineSD(); + } +} + +static int findNthBf( int n ) +{ + SwapDir *sd; + + FOREACHSD( sd ) + { + if( storeBfDirIs( sd ) ) + { + if( n == 0 ) + { + return sd -> index; + } + n--; } } - assert (0); - return 0; + return -1; } - /* * storeBfDirRefObj * * This routine is called whenever an object is referenced, so we can * maintain replacement information within the storage fs. * - * TODO put reiserfs_raw setattr here! - */ + * TODO put reiserfs_raw setattr here! */ static void storeBfDirRefObj(SwapDir *sd, StoreEntry * e) { @@ -867,7 +1017,8 @@ if (! e->id_valid) { assert (e->mem_obj); assert (e->mem_obj->url); - e->id = make_id (e->key, e->mem_obj->url, strlen (e->mem_obj->url)); + e->id = make_id (e->hash.key, e->mem_obj->url, + strlen (e->mem_obj->url)); e->id_valid = 1; } ra->id1 = (unsigned) e->id; @@ -943,6 +1094,10 @@ sd->unrefobj = storeBfDirUnrefObj; sd->callback = storeBfDirCallback; sd->sync = storeBfDirSync; +#if defined( DISK_REPL ) + sd->online = storeBfDirOnline; + sd->offline = storeBfDirOffline; +#endif sd->halfrelease = storeBfRelease; sd->reselectdir = storeBfReselectDir; sd->obj.create = storeBfCreate; @@ -1003,9 +1158,60 @@ /* * $Log$ + * Revision 1.1.2.1.2.1.2.2 2000/12/22 12:55:03 nikitadanilov + * StoreEntry->key converted to StoreEntry->hash.key. Unused errmsg removed from storeBfDirInit(). + * + * Revision 1.1.2.1.2.1.2.1 2000/12/17 14:36:24 hno + * Imported raid from squidng + * * Revision 1.1.2.1.2.1 2000/12/17 14:09:08 hno * Imported tcp_async from squidng * + * --- below are from squidng --- + * + * Revision 1.1.2.7.2.3.2.12 2000/12/07 16:49:31 nikitadanilov + * storeBfDirInit: asserts and n_bf_dirs handling corrected (Sizif noted that + * it crashed if first dir was initially offline) + * + * Revision 1.1.2.7.2.3.2.11 2000/11/28 16:01:39 nikitadanilov + * Missing braces (noted by Sizif) inserted. + * + * Revision 1.1.2.7.2.3.2.10 2000/11/28 11:52:16 sizif + * Missing {}s in the recent change + * + * Revision 1.1.2.7.2.3.2.9 2000/11/27 19:55:56 nikitadanilov + * if storeBfDirVerifyCacheDirs() returns -1 just mark cache_dir as offline, + * don't bail + * + * Revision 1.1.2.7.2.3.2.8 2000/11/24 20:43:34 nikitadanilov + * If Butterfly cache_dir is inaccessible during startup, mark it offline, not + * bail out (NOT TESTED) + * + * Revision 1.1.2.7.2.3.2.7 2000/11/22 14:49:01 nikitadanilov + * Online/offline callbacks improved. Hash function changed for new semantics + * of n_bf_dirs and n_bf_offline + * + * Revision 1.1.2.7.2.3.2.6 2000/10/20 12:05:04 nikitadanilov + * Correct invalid return value from storeBfSelectSwapDir() when compiled w/o + * DISK_REPL + * + * Revision 1.1.2.7.2.3.2.5 2000/10/19 09:59:48 nikitadanilov + * storeBfSelectSwapDir corrected + * + * Revision 1.1.2.7.2.3.2.4 2000/10/17 19:44:00 nikitadanilov + * Move .raid initialisation into storeBfDirInit() + * + * Revision 1.1.2.7.2.3.2.3 2000/10/17 19:31:26 nikitadanilov + * storeBfSelectSwapDir() should be more stable to disk crashes + * + * Revision 1.1.2.7.2.3.2.2 2000/10/13 15:43:19 nikitadanilov + * RAID disk replcement changes: added per-fs callbacks called on on/of-line s + * witch; Crash simulation added; Code commented. It started to work (with + * crash simulation). Alpha release state reached. + * + * Revision 1.1.2.7.2.3.2.1 2000/10/12 21:02:21 nikitadanilov + * Prototype of put-disk-offline-on-the-fly sub-system + * * Revision 1.1.2.7.2.3 2000/10/12 15:06:25 nikitadanilov * Small typo corrected * Index: squid/src/fs/butterfly/store_io_bf.c diff -u squid/src/fs/butterfly/store_io_bf.c:1.1.2.1 squid/src/fs/butterfly/store_io_bf.c:1.1.2.1.4.2 --- squid/src/fs/butterfly/store_io_bf.c:1.1.2.1 Sun Dec 17 05:55:40 2000 +++ squid/src/fs/butterfly/store_io_bf.c Fri Dec 22 04:55:03 2000 @@ -99,7 +99,7 @@ bfinfo_t *bfi = sd->fsdata; bfop_t *op; - debug(81, 3) ("storeBfOpen: key %s\n", storeKeyText(e->key)); + debug(81, 3) ("storeBfOpen: key %s\n", storeKeyText(e->hash.key)); sio = CBDATA_ALLOC(storeIOState, storeBfIOFreeEntry); sio->fsstate = bfstate = memPoolAlloc(bf_state_pool); @@ -143,7 +143,7 @@ bfop_t *op; debug(81, 3) ("storeBfCreate: key %s flags %s\n", - storeKeyText(e->key), storeEntryFlags (e)); + storeKeyText(e->hash.key), storeEntryFlags (e)); /* Avoid disk subsystem overload. May lead to losing hits, so don't set the limit too low. */ @@ -189,7 +189,7 @@ bfstate_t *bfstate = sio->fsstate; debug(81, 3) ("storeBfClose: dirno %d, key %s\n", sd->index, - storeKeyText (sio->e->key)); + storeKeyText (sio->e->hash.key)); bfstate->close_request = 1; @@ -269,7 +269,7 @@ storeBfUnlink(SwapDir *sd, StoreEntry *e) { debug(81, 3) ("storeBfUnlink: dirno %d, key %s\n", - sd->index, storeKeyText(e->key)); + sd->index, storeKeyText(e->hash.key)); /* storeBfDirReplRemove(e); */ storeBfUnlink2 (sd, e, NULL); } @@ -326,7 +326,7 @@ bfop_t *op; debug(81, 3) ("storeBfSetattr: dirno %d, key %s\n", - sd->index, storeKeyText(e->key)); + sd->index, storeKeyText(e->hash.key)); ra = xcalloc (1, sizeof (reiserfs_raw_ioctl_arg)); storeBfDirFillRa(e, ra); @@ -541,7 +541,7 @@ sio->swap_dirn, x); if (bfstate->unlink_attempted) debug(81, 1) ("storeBfCompletion: BFCREAT: dir%d, lingering unlink %s\n", - sio->swap_dirn, storeKeyText(sio->e->key)); + sio->swap_dirn, storeKeyText(sio->e->hash.key)); else { int x = storeBfUnlink2 (INDEXSD (sio->swap_dirn), sio->e, sio); if (x >= 0) { @@ -563,6 +563,15 @@ sio->e->swap_file_sz = bfstate->ra.size; store_open_disk_fd++; debug(81, 4) ("storeBfOpCompletion: sio %p fd %d opened\n", sio, bfstate->fd); +#if defined( DISK_REPL ) + fd_table[ bfstate->fd ].swap_dir = sio -> swap_dirn; +#endif + debug( 81, 9 )( "butterfly fd: %i in swap_dir: %i\n", + bfstate->fd, sio -> swap_dirn ); + if( op -> type == BFCREAT ) + { + fd_note( bfstate->fd, "Butterfly KAIO" ); + } } break; @@ -572,6 +581,18 @@ rlen = (x == 0)? aio_return (&op->aio): -1; op->rlen = (x == 0)? aio_return (&op->aio): 0; storeBfUpdateOffset (bfstate); +#if defined( DISK_REPL ) + if( fd_table[ bfstate->fd ].swap_dir != sio -> swap_dirn ) + { + debug( 81, 9 )( "BFREAD: swap_dir anomaly: fd: %i\n", bfstate->fd ); + print_fde( &fd_table[ bfstate->fd ] ); + debug( 81, 9 ) ( "op: %s", storeBfOpDump( op ) ); + } + else +#endif + { + callReadHook( bfstate->fd, &x ); + } if (! bfstate->aborting && cbdataValid(op->callback_data)) op->callback.read (op->callback_data, op->aio.aio_buf, rlen); cbdataUnlock (op->callback_data); @@ -582,6 +603,18 @@ storeBfIOAbort (sio, op->type, x); op->rlen = (x == 0)? aio_return (&op->aio): 0; storeBfUpdateOffset (bfstate); +#if defined( DISK_REPL ) + if( fd_table[ bfstate->fd ].swap_dir != sio -> swap_dirn ) + { + debug( 81, 9 )( "BFWRITE: swap_dir anomaly: fd: %i\n", bfstate->fd ); + print_fde( &fd_table[ bfstate->fd ] ); + debug( 81, 9 ) ( "op: %s", storeBfOpDump( op ) ); + } + else +#endif + { + callWriteHook( bfstate->fd, &x ); + } if (op->callback.write) op->callback.write (op->aio.aio_buf); /* free_func */ break; Index: squid/src/fs/ufs/store_dir_ufs.c diff -u squid/src/fs/ufs/store_dir_ufs.c:1.5.4.1 squid/src/fs/ufs/store_dir_ufs.c:1.5.4.1.6.1 --- squid/src/fs/ufs/store_dir_ufs.c:1.5.4.1 Sun Dec 17 00:20:06 2000 +++ squid/src/fs/ufs/store_dir_ufs.c Sun Dec 17 06:36:24 2000 @@ -862,6 +862,10 @@ char *new_path = xstrdup(storeUfsDirSwapLogFile(sd, ".new")); int fd; file_close(ufsinfo->swaplog_fd); + if( sd -> flags.offline ) + { + return; + } #ifdef _SQUID_OS2_ if (unlink(swaplog_path) < 0) { debug(50, 0) ("%s: %s\n", swaplog_path, xstrerror()); @@ -893,6 +897,10 @@ struct stat clean_sb; FILE *fp; int fd; + if( sd -> flags.offline ) + { + return; + } if (stat(swaplog_path, &log_sb) < 0) { debug(47, 1) ("Cache Dir #%d: No log file\n", sd->index); safe_free(swaplog_path); @@ -952,6 +960,11 @@ { struct _clean_state *state = xcalloc(1, sizeof(*state)); struct stat sb; + + if( sd -> flags.offline ) + { + return; + } sd->log.clean.write = NULL; sd->log.clean.state = NULL; state->new = xstrdup(storeUfsDirSwapLogFile(sd, ".clean")); @@ -1001,6 +1014,11 @@ storeSwapLogData s; static size_t ss = sizeof(storeSwapLogData); struct _clean_state *state = sd->log.clean.state; + + if( sd -> flags.offline ) + { + return; + } memset(&s, '\0', ss); s.op = (char) SWAP_LOG_ADD; s.swap_filen = e->swap_filen; @@ -1035,6 +1053,8 @@ storeUfsDirWriteCleanDone(SwapDir * sd) { struct _clean_state *state = sd->log.clean.state; + if (sd->flags.offline) + return; if (NULL == state) return; if (state->fd < 0) @@ -1095,7 +1115,10 @@ storeUfsDirSwapLog(const SwapDir * sd, const StoreEntry * e, int op) { ufsinfo_t *ufsinfo = (ufsinfo_t *) sd->fsdata; - storeSwapLogData *s = memAllocate(MEM_SWAP_LOG_DATA); + storeSwapLogData *s; + if (sd->flags.offline) + return; + s = memAllocate(MEM_SWAP_LOG_DATA); s->op = (char) op; s->swap_filen = e->swap_filen; s->timestamp = e->timestamp; @@ -1118,6 +1141,10 @@ static void storeUfsDirNewfs(SwapDir * sd) { + if( sd -> flags.offline ) + { + return; + } debug(47, 3) ("Creating swap space in %s\n", sd->path); storeUfsDirCreateDirectory(sd->path, 0); storeUfsDirCreateSwapSubDirs(sd); @@ -1153,6 +1180,10 @@ N0 = n_ufs_dirs; D0 = ufs_dir_index[swap_index % N0]; SD = &Config.cacheSwap.swapDirs[D0]; + if( SD -> flags.offline ) + { + return; + } ufsinfo = (ufsinfo_t *) SD->fsdata; N1 = ufsinfo->l1; D1 = (swap_index / N0) % N1; @@ -1229,6 +1260,10 @@ ufs_dir_index = xcalloc(n_ufs_dirs, sizeof(*ufs_dir_index)); for (i = 0, n = 0; i < Config.cacheSwap.n_configured; i++) { sd = &Config.cacheSwap.swapDirs[i]; + if( sd -> flags.offline ) + { + continue; + } if (!storeUfsDirIs(sd)) continue; ufs_dir_index[n++] = i; @@ -1309,6 +1344,10 @@ double f; RemovalPurgeWalker *walker; /* We can't delete objects while rebuilding swap */ + if( SD -> flags.offline ) + { + return; + } if (store_dirs_rebuilding) { return; } else { @@ -1356,6 +1395,10 @@ return -1; } #endif + if( SD -> flags.offline ) + { + return -1; + } /* Return 999 (99.9%) constant load */ return 999; } @@ -1371,6 +1414,10 @@ { debug(1, 3) ("storeUfsDirRefObj: referencing %p %d/%d\n", e, e->swap_dirn, e->swap_filen); + if( SD -> flags.offline ) + { + return; + } if (SD->repl->Referenced) SD->repl->Referenced(SD->repl, e, &e->repl); } @@ -1385,6 +1432,10 @@ { debug(1, 3) ("storeUfsDirUnrefObj: referencing %p %d/%d\n", e, e->swap_dirn, e->swap_filen); + if( SD -> flags.offline ) + { + return; + } if (SD->repl->Dereferenced) SD->repl->Dereferenced(SD->repl, e, &e->repl); } @@ -1401,6 +1452,10 @@ { debug(79, 3) ("storeUfsDirUnlinkFile: unlinking fileno %08X\n", f); /* storeUfsDirMapBitReset(SD, f); */ + if( SD -> flags.offline ) + { + return; + } unlinkdUnlink(storeUfsDirFullPath(SD, f, NULL)); } @@ -1414,6 +1469,10 @@ { debug(20, 4) ("storeUfsDirReplAdd: added node %p to dir %d\n", e, SD->index); + if( SD -> flags.offline ) + { + return; + } SD->repl->Add(SD->repl, e, &e->repl); } @@ -1424,6 +1483,10 @@ SwapDir *SD = INDEXSD(e->swap_dirn); debug(20, 4) ("storeUfsDirReplRemove: remove node %p from dir %d\n", e, SD->index); + if( SD -> flags.offline ) + { + return; + } SD->repl->Remove(SD->repl, e, &e->repl); } @@ -1494,6 +1557,10 @@ int l2; unsigned int read_only = 0; + if( sd -> flags.offline ) + { + return; + } i = GetInteger(); size = i << 10; /* Mbytes to kbytes */ if (size <= 0) @@ -1529,13 +1596,13 @@ storeUfsDirDump(StoreEntry * entry, const char *name, SwapDir * s) { ufsinfo_t *ufsinfo = (ufsinfo_t *) s->fsdata; - storeAppendPrintf(entry, "%s %s %s %d %d %d\n", + storeAppendPrintf(entry, "%s %s %s %d %d %d [%s]\n", name, "ufs", s->path, s->max_size >> 10, ufsinfo->l1, - ufsinfo->l2); + ufsinfo->l2, s -> flags.offline ? "offline" : "online"); } /* @@ -1582,6 +1649,8 @@ storeUfsCleanupDoubleCheck(SwapDir * sd, StoreEntry * e) { struct stat sb; + if (sd->flags.offline) + return -1; if (stat(storeUfsDirFullPath(sd, e->swap_filen, NULL), &sb) < 0) { debug(20, 0) ("storeUfsCleanupDoubleCheck: MISSING SWAP FILE\n"); debug(20, 0) ("storeUfsCleanupDoubleCheck: FILENO %08X\n", e->swap_filen);