This patch is generated from the docs branch of HEAD in squid3 Tue Apr 22 00:24:34 2008 GMT See http://devel.squid-cache.org/ Index: squid3/doc/Programming-Guide/.cvsignore diff -u squid3/doc/Programming-Guide/.cvsignore:1.3 squid3/doc/Programming-Guide/.cvsignore:1.3.30.1 --- squid3/doc/Programming-Guide/.cvsignore:1.3 Sun Jun 1 02:23:30 2003 +++ squid3/doc/Programming-Guide/.cvsignore Sat Jan 12 05:44:52 2008 @@ -1,37 +1,2 @@ -prog-guide-10.html -prog-guide-11.html -prog-guide-12.html -prog-guide-13.html -prog-guide-14.html -prog-guide-15.html -prog-guide-16.html -prog-guide-17.html -prog-guide-18.html -prog-guide-19.html -prog-guide-1.html -prog-guide-20.html -prog-guide-21.html -prog-guide-22.html -prog-guide-23.html -prog-guide-24.html -prog-guide-25.html -prog-guide-26.html -prog-guide-27.html -prog-guide-28.html -prog-guide-29.html -prog-guide-2.html -prog-guide-30.html -prog-guide-3.html -prog-guide-4.html -prog-guide-5.html -prog-guide-6.html -prog-guide-7.html -prog-guide-8.html -prog-guide-9.html -prog-guide.html -prog-guide.txt -prog-guide.tex -prog-guide.ps -prog-guide.pdf -prog-guide.dvi +html Makefile Index: squid3/doc/Programming-Guide/07_MainLoop.dox diff -u /dev/null squid3/doc/Programming-Guide/07_MainLoop.dox:1.1.2.5 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/07_MainLoop.dox Thu Aug 9 17:12:56 2007 @@ -0,0 +1,131 @@ +/** +\page 07_MainLoop The Main Loop: comm_select() +\par + At the core of Squid is the select(2) system call. + Squid uses select() or poll(2) to process I/O on + all open file descriptors. Hereafter we'll only use + "select" to refer generically to either system call. +\par + The select() and poll() system calls work by + waiting for I/O events on a set of file descriptors. Squid + only checks for \em read and \em write events. Squid + knows that it should check for reading or writing when + there is a read or write handler registered for a given + file descriptor. Handler functions are registered with + the commSetSelect function. For example: +\code + commSetSelect(fd, COMM_SELECT_READ, clientReadRequest, conn, 0); +\endcode + In this example, fd is a TCP socket to a client + connection. When there is data to be read from the socket, + then the select loop will execute +\code + clientReadRequest(fd, conn); +\endcode +\todo DOCS: find out if poll() is still used and get it linking to docs. + +\par + The I/O handlers are reset every time they are called. In + other words, a handler function must re-register itself + with commSetSelect() if it wants to continue reading or + writing on a file descriptor. The I/O handler may be + canceled before being called by providing NULL arguments, + e.g.: +\code + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); +\endcode +\par + These I/O handlers (and others) and their associated callback + data pointers are saved in the fde data structure: +\code + struct _fde { + ... + PF *read_handler; + void *read_data; + PF *write_handler; + void *write_data; + close_handler *close_handler; + DEFER *defer_check; + void *defer_data; + }; +\endcode + read_handler and write_handler are called when + the file descriptor is ready for reading or writing, + respectively. The close_handler is called when the + filedescriptor is closed. The close_handler is + actually a linked list of callback functions to be called. +\todo DOCS: make _fde code example a grab straight from the current source file + +\par + In some situations we want to defer reading from a + filedescriptor, even though it has data for us to read. + This may be the case when data arrives from the server-side + faster than it can be written to the client-side. Before + adding a filedescriptor to the "read set" for select, we + call defer_check (if it is non-NULL). If defer_check + returns 1, then we skip the filedescriptor for that time + through the select loop. +\todo DOCS: update name defer_check to current one used in code. + +\par + These handlers are stored in the FD_ENTRY structure + as defined in comm.h. fd_table[] is the global + array of FD_ENTRY structures. The handler functions + are of type PF, which is a typedef: +\code + typedef void (*PF) (int, void *); +\endcode + The close handler is really a linked list of handler + functions. Each handler also has an associated pointer + (void *data) to some kind of data structure. +\todo DOCS: update FD_ENTRY (macro?) linking to current details. +\todo DOCS: get fd_table[] documenting and linking properly. + +\par + comm_select() is the function which issues the select() + system call. It scans the entire fd_table[] array + looking for handler functions. Each file descriptor with + a read handler will be set in the fd_set read bitmask. + Similarly, write handlers are scanned and bits set for the + write bitmask. select() is then called, and the return + read and write bitmasks are scanned for descriptors with + pending I/O. For each ready descriptor, the handler is + called. Note that the handler is cleared from the + FD_ENTRY before it is called. + +\par + After each handler is called, comm_select_incoming() + is called to process new HTTP and ICP requests. +\todo DOCS: what has replaced comm_select_incoming() + +\par + Typical read handlers are + httpReadReply(), + diskHandleRead(), + icpHandleUdp(), + and ipcache_dnsHandleRead(). + Typical write handlers are + commHandleWrite(), + diskHandleWrite(), + and icpUdpReply(). + The handler function is set with commSetSelect(), with the + exception of the close handlers, which are set with + comm_add_close_handler(). +\todo DOCS: what has replaced httpReadReply() as callback. +\todo DOCS: what has replaced ipcache_dnsHandleRead() as callback. +\todo DOCS: what has replaced icpUdpReply() as callback + +\par + The close handlers are normally called from comm_close(). + The job of the close handlers is to deallocate data structures + associated with the file descriptor. For this reason + comm_close() must normally be the last function in a + sequence to prevent accessing just-freed memory. +\todo DOCS: what has replaced comm_close() as close callback handler + +\par + The timeout and lifetime handlers are called for file + descriptors which have been idle for too long. They are + further discussed in \link ClientStreamAPI Client Streams. \endlink + + */ Index: squid3/doc/Programming-Guide/10_DelayPools.dox diff -u /dev/null squid3/doc/Programming-Guide/10_DelayPools.dox:1.1.2.2 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/10_DelayPools.dox Mon Jul 30 21:29:06 2007 @@ -0,0 +1,49 @@ +/** +\page 10_DelayPools Delay Pools + +\section Introduction Introduction +\par + A DelayPool is a Composite used to manage bandwidth for any request + assigned to the pool by an access expression. DelayId's are a used + to manage the bandwith on a given request, whereas a DelayPool + manages the bandwidth availability and assigned DelayId's. + +\section ExtendingDelayPools Extending Delay Pools +\par + A CompositePoolNode is the base type for all members of a DelayPool. + Any child must implement the RefCounting primitives, as well as five + delay pool functions: + \li stats() - provide cachemanager statistics for itself. + \li dump() - generate squid.conf syntax for the current configuration of the item. + \li update() - allocate more bandwith to all buckets in the item. + \li parse() - accept squid.conf syntax for the item, and configure for use appropriately. + \li id() - return a DelayId entry for the current item. + +\par + A DelayIdComposite is the base type for all delay Id's. Concrete + Delay Id's must implement the refcounting primitives, as well as two + delay id functions: + \li bytesWanted() - return the largest amount of bytes that this delay id allows by policy. + \li bytesIn() - record the use of bandwidth by the request(s) that this delayId is monitoring. + +\par + Composite creation is currently under design review, so see the + DelayPool class and follow the parse() code path for details. + +\section NeatExtensions Neat things that could be done. +\par + With the composite structure, some neat things have become possible. + For instance: + +\par Dynamically defined pool arrangements. + For instance an aggregate (class 1) combined with the per-class-C-net tracking of a + class 3 pool, without the individual host tracking. This differs + from a class 3 pool with -1/-1 in the host bucket, because no memory + or cpu would be used on hosts, whereas with a class 3 pool, they are + allocated and used. + +\par Per request bandwidth limits. + A delayId that contains it's own bucket could limit each request + independently to a given policy, with no aggregate restrictions. + + */ Index: squid3/doc/Programming-Guide/11_StorageManager.dox diff -u /dev/null squid3/doc/Programming-Guide/11_StorageManager.dox:1.1.2.2 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/11_StorageManager.dox Thu Aug 16 20:40:27 2007 @@ -0,0 +1,48 @@ +\** + \defgroup StorageManager Storage Manager + \ingroup Components + * + \par + The Storage Manager is the glue between client and server + sides. Every object saved in the cache is allocated a + StoreEntry structure. While the object is being + accessed, it also has a MemObject structure. + +\par + Squid can quickly locate cached objects because it keeps + (in memory) a hash table of all StoreEntry's. The + keys for the hash table are MD5 checksums of the objects + URI. In addition there is also a storage policy such + as LRU that keeps track of the objects and determines + the removal order when space needs to be reclaimed. + For the LRU policy this is implemented as a doubly linked + list. + +\par + For each object the StoreEntry maps to a cache_dir + and location via sdirn and sfilen. For the "ufs" store + this file number (sfilen) is converted to a disk pathname + by a simple modulo of L2 and L1, but other storage drivers may + map sfilen in other ways. A cache swap file consists + of two parts: the cache metadata, and the object data. + Note the object data includes the full HTTP reply---headers + and body. The HTTP reply headers are not the same as the + cache metadata. + +\par + Client-side requests register themselves with a StoreEntry + to be notified when new data arrives. Multiple clients + may receive data via a single StoreEntry. For POST + and PUT request, this process works in reverse. Server-side + functions are notified when additional data is read from + the client. + +\section ObjectStorage Object Storage +\par +\todo DOCS: write section about object storage + +\section ObjectRetrieval Object Retrieval +\par +\todo DOCS: write section about object retrieval + + */ Index: squid3/doc/Programming-Guide/12_StorageInterface.dox diff -u /dev/null squid3/doc/Programming-Guide/12_StorageInterface.dox:1.1.2.4 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/12_StorageInterface.dox Mon Jul 30 21:29:06 2007 @@ -0,0 +1,741 @@ +/** +\page 12_StorageInterface Storage Interface + +\section Introduction Introduction +\par + Traditionally, Squid has always used the Unix filesystem (UFS) + to store cache objects on disk. Over the years, the + poor performance of UFS has become very obvious. In most + cases, UFS limits Squid to about 30-50 requests per second. + Our work indicates that the poor performance is mostly + due to the synchronous nature of open() and unlink() + system calls, and perhaps thrashing of inode/buffer caches. + +\par + We want to try out our own, customized filesystems with Squid. + In order to do that, we need a well-defined interface + for the bits of Squid that access the permanent storage + devices. We also require tighter control of the replacement + policy by each storage module, rather than a single global + replacement policy. + +\section BuildStructure Build structure +\par + The storage types live in src/fs/ . Each subdirectory corresponds + to the name of the storage type. When a new storage type is implemented + configure.in must be updated to autogenerate a Makefile in + src/fs/foo/ from a Makefile.in file. +\todo DOCS: add template addition to configure.in for storage module addition. + +\par + configure will take a list of storage types through the + --enable-store-io parameter. This parameter takes a list of + space seperated storage types. For example, + --enable-store-io="ufs coss" . + +\par + Each storage type must create an archive file + in src/fs/foo/.a . This file is automatically linked into + squid at compile time. + +\par + Each storefs must export a function named storeFsSetup_foo(). + This function is called at runtime to initialise each storage type. + The list of storage types is passed through store_modules.sh + to generate the initialisation function storeFsSetup(). This + function lives in store_modules.c. +\todo DOCS: find out what has replaced storeFsSetup() and store_modules.c + + +\par Example of the automatically generated file: + automatically generated by ./store_modules.sh ufs coss do not edit +\code + #include "squid.h" + + extern STSETUP storeFsSetup_ufs; + extern STSETUP storeFsSetup_coss; + void storeFsSetup(void) + { + storeFsAdd("ufs", storeFsSetup_ufs); + storeFsAdd("coss", storeFsSetup_coss); + } +\endcode + + +\section InitStorageType Initialization of a storage type +\par + Each storage type initializes through the storeFsSetup_foo() + function. The storeFsSetup_foo() function takes a single + argument - a storefs_entry_t pointer. This pointer references + the storefs_entry to initialise. A typical setup function is as + follows: +\code + void + storeFsSetup_foo(storefs_entry_t *storefs) + { + assert(!foo_initialised); + storefs->parsefunc = storeFooDirParse; + storefs->reconfigurefunc = storeFooDirReconfigure; + storefs->donefunc = storeFooDirDone; + foo_state_pool = memPoolCreate("FOO IO State data", sizeof(foostate_t)); + foo_initialised = 1; + } +\endcode + +\par + There are five function pointers in the storefs_entry which require + initializing. In this example, some protection is made against the + setup function being called twice, and a memory pool is initialised + for use inside the storage module. + +\par + Each function will be covered below. + + +\subsection done done +\par +\code + typedef void + STFSSHUTDOWN(void); +\endcode + +\par + This function is called whenever the storage system is to be shut down. + It should take care of deallocating any resources currently allocated. +\code + typedef void STFSPARSE(SwapDir *SD, int index, char *path); + typedef void STFSRECONFIGURE(SwapDir *SD, int index, char *path); +\endcode + +\par + These functions handle configuring and reconfiguring a storage + directory. Additional arguments from the cache_dir configuration + line can be retrieved through calls to strtok() and GetInteger(). + +\par STFSPARSE + has the task of initialising a new swapdir. It should + parse the remaining arguments on the cache_dir line, initialise the + relevant function pointers and data structures, and choose the + replacement policy. STFSRECONFIGURE deals with reconfiguring an + active swapdir. It should parse the remaining arguments on the + cache_dir line and change any active configuration parameters. The + actual storage initialisation is done through the STINIT function + pointer in the SwapDir. + +\par +\code + struct _SwapDir { + char *type; // Pointer to the store dir type string + int cur_size; // Current swapsize in kb + int low_size; // ?? + int max_size; // Maximum swapsize in kb + char *path; // Path to store + int index; // This entry's index into the swapDir array + int suggest; // Suggestion for UFS style stores (??) + size_t max_objsize; // Maximum object size for this store + union { // Replacement policy-specific fields + #ifdef HEAP_REPLACEMENT + struct { + heap *heap; + } heap; + #endif + struct { + dlink_list list; + dlink_node *walker; + } lru; + } repl; + int removals; + int scanned; + struct { + unsigned int selected:1; // Currently selected for write + unsigned int read_only:1; // This store is read only + } flags; + STINIT *init; // Initialise the fs + STNEWFS *newfs; // Create a new fs + STDUMP *dump; // Dump fs config snippet + STFREE *freefs; // Free the fs data + STDBLCHECK *dblcheck; // Double check the obj integrity + STSTATFS *statfs; // Dump fs statistics + STMAINTAINFS *maintainfs; // Replacement maintainence + STCHECKOBJ *checkob; // Check if the fs will store an object, and get the FS load + // These two are notifications + STREFOBJ *refobj; // Reference this object + STUNREFOBJ *unrefobj; // Unreference this object + STCALLBACK *callback; // Handle pending callbacks + STSYNC *sync; // Sync the directory + struct { + STOBJCREATE *create; // Create a new object + STOBJOPEN *open; // Open an existing object + STOBJCLOSE *close; // Close an open object + STOBJREAD *read; // Read from an open object + STOBJWRITE *write; // Write to a created object + STOBJUNLINK *unlink; // Remove the given object + } obj; + struct { + STLOGOPEN *open; // Open the log + STLOGCLOSE *close; // Close the log + STLOGWRITE *write; // Write to the log + struct { + STLOGCLEANOPEN *open; // Open a clean log + STLOGCLEANWRITE *write; // Write to the log + void *state; // Current state + } clean; + } log; + void *fsdata; // FS-specific data + }; +\endcode + +\section OperationOfStorageModules Operation of a Storage Module +\par + Squid understands the concept of multiple diverse storage directories. + Each storage directory provides a caching object store, with object + storage, retrieval, indexing and replacement. + +\par + Each open object has associated with it a storeIOState object. The + storeIOState object is used to record the state of the current + object. Each storeIOState can have a storage module specific data + structure containing information private to the storage module. + +\par +\code + struct _storeIOState { + sdirno swap_dirn; // SwapDir index + sfileno swap_filen; // Unique file index number + StoreEntry *e; // Pointer to parent StoreEntry + mode_t mode; // Mode - O_RDONLY or O_WRONLY + size_t st_size; // Size of the object if known + off_t offset; // current _on-disk_ offset pointer + STFNCB *file_callback; // called on delayed sfileno assignments + STIOCB *callback; // IO Error handler callback + void *callback_data; // IO Error handler callback data + struct { + STRCB *callback; // Read completion callback + void *callback_data; // Read complation callback data + } read; + struct { + unsigned int closing:1; // debugging aid + } flags; + void *fsstate; // pointer to private fs state + }; +\endcode + +\par + Each SwapDir has the concept of a maximum object size. This is used + as a basic hint to the storage layer in first choosing a suitable + SwapDir. The checkobj function is then called for suitable + candidate SwapDirs to find out whether it wants to store a + given StoreEntry. A maxobjsize of -1 means 'any size'. + +\par + The specific filesystem operations listed in the SwapDir object are + covered below. + +\subsection initfs initfs +\code + typedef void STINIT(SwapDir *SD); +\endcode + +\par + Initialise the given SwapDir. Operations such as verifying and + rebuilding the storage and creating any needed bitmaps are done + here. + + +\subsection newfs newfs +\code + typedef void STNEWFS(SwapDir *SD); +\endcode + +\par + Called for each configured SwapDir to perform filesystem + initialisation. This happens when '-z' is given to squid on the + command line. + +\subsection dumpfs dumpfs +\code + typedef void STDUMP(StoreEntry *e, SwapDir *SD); +\endcode + +\par + Dump the FS specific configuration data of the current SwapDir + to the given StoreEntry. Used to grab a configuration file dump + from the cachemgr interface. + +\remark Note: The printed options should start with a space character to + separate them from the cache_dir path. + +\subsection freefs freefs +\code + typedef void STFREE(SwapDir *SD); +\endcode + +\par + Free the SwapDir filesystem information. This routine should + deallocate SD->fsdata. + + +\subsection doublecheckfs doublecheckfs +\code + typedef int STDBLCHECK(SwapDir *SD, StoreEntry *e); +\endcode + +\par + Double-check the given object for validity. Called during rebuild if + the '-S' flag is given to squid on the command line. Returns 1 if the + object is indeed valid, and 0 if the object is found invalid. + +\subsection statfs statfs +\code + typedef void STSTATFS(SwapDir *SD, StoreEntry *e); +\endcode + +\par + Called to retrieve filesystem statistics, such as usage, load and + errors. The information should be appended to the passed + StoreEntry e. + +\subsection maintainfs maintainfs +\code + typedef void STMAINTAINFS(SwapDir *SD); +\endcode + +\par + Called periodically to replace objects. The active replacement policy + should be used to timeout unused objects in order to make room for + new objects. + +\subsection callback callback +\code + typedef void + STCALLBACK(SwapDir *SD); +\endcode + +\par + This function is called inside the comm_select/comm_poll loop to handle + any callbacks pending. + +\subsection sync sync +\code + typedef void + STSYNC(SwapDir *SD); +\endcode + +\par + This function is called whenever a sync to disk is required. This + function should not return until all pending data has been flushed to + disk. + + +\subsection parse-reconfigure parse/reconfigure + +\subsection checkobj checkobj +\code + typedef int + STCHECKOBJ(SwapDir *SD, const StoreEntry *e); +\endcode + +\par + Called by storeDirSelectSwapDir() to determine whether the + SwapDir will store the given StoreEntry object. If the + SwapDir is not willing to store the given StoreEntry + -1 should be returned. Otherwise, a value between 0 and 1000 should + be returned indicating the current IO load. A value of 1000 indicates + the SwapDir has an IO load of 100%. This is used by + storeDirSelectSwapDir() to choose the SwapDir with the + lowest IO load. + +\subsection referenceobj referenceobj +\code + typedef void + STREFOBJ(SwapDir *SD, StoreEntry *e); +\endcode + +\par + Called whenever an object is locked by storeLockObject(). + It is typically used to update the objects position in the replacement + policy. + +\subsection unreferenceobj unreferenceobj +\code + typedef void + STUNREFOBJ(SwapDir *SD, StoreEntry *e); +\endcode + +\par + Called whenever the object is unlocked by storeUnlockObject() + and the lock count reaches 0. It is also typically used to update the + objects position in the replacement policy. + +\subsection createobj createobj +\code + typedef storeIOState * + STOBJCREATE(SwapDir *SD, StoreEntry *e, STFNCB *file_callback, STIOCB *io_callback, void *io_callback_data); +\endcode + +\par + Create an object in the SwapDir *SD. file_callback is called + whenever the filesystem allocates or reallocates the swap_filen. + Note - STFNCB is called with a generic cbdata pointer, which + points to the StoreEntry e. The StoreEntry should not be + modified EXCEPT for the replacement policy fields. + +\par + The IO callback should be called when an error occurs and when the + object is closed. Once the IO callback is called, the storeIOState + becomes invalid. + +\par + STOBJCREATE returns a storeIOState suitable for writing on + sucess, or NULL if an error occurs. + +\subsection openobj openobj +\code + typedef storeIOState * + STOBJOPEN(SwapDir *SD, StoreEntry *e, STFNCB *file_callback, STIOCB *io_callback, void *io_callback_data); +\endcode + +\par + Open the StoreEntry in SwapDir *SD for reading. Much the + same is applicable from STOBJCREATE, the major difference being + that the data passed to file_callback is the relevant store_client. + +\subsection closeobj closeobj +\code + typedef void + STOBJCLOSE(SwapDir *SD, storeIOState *sio); +\endcode + +\par + Close an opened object. The STIOCB callback should be called at + the end of this routine. + +\subsection readobj readobj +\code + typedef void + STOBJREAD(SwapDir *SD, storeIOState *sio, char *buf, size_t size, off_t offset, STRCB *read_callback, void *read_callback_data); +\endcode + +\par + Read part of the object of into buf. It is safe to request a read + when there are other pending reads or writes. STRCB is called at + completion. + +\par + If a read operation fails, the filesystem layer notifies the + calling module by calling the STIOCB callback with an + error status code. + +\subsection writeobj writeobj +\code + typedef void + STOBJWRITE(SwapDir *SD, storeIOState *sio, char *buf, size_t size, off_t offset, FREE *freefunc); +\endcode + +\par + Write the given block of data to the given store object. buf is + allocated by the caller. When the write is complete, the data is freed + through free_func. + +\par + If a write operation fails, the filesystem layer notifies the + calling module by calling the STIOCB callback with an + error status code. + +\subsection unlinkobj unlinkobj +\code + typedef void STOBJUNLINK(SwapDir *SD, StoreEntry *e); +\endcode + +\par + Remove the StoreEntry e from the SwapDir SD and the + replacement policy. + + +\section StoreIOCalls Store IO calls +\par + These routines are used inside the storage manager to create and + retrieve objects from a storage directory. + +\subsection storeCreate storeCreate() +\code + storeIOState * + storeCreate(StoreEntry *e, STIOCB *file_callback, STIOCB *close_callback, void * callback_data) +\endcode + +\par + storeCreate is called to store the given StoreEntry in + a storage directory. + +\par + callback is a function that will be called either when + an error is encountered, or when the object is closed (by + calling storeClose()). If the open request is + successful, there is no callback. The calling module must + assume the open request will succeed, and may begin reading + or writing immediately. + +\par + storeCreate() may return NULL if the requested object + can not be created. In this case the callback function + will not be called. + +\subsection storeOpen storeOpen() +\code + storeIOState * + storeOpen(StoreEntry *e, STFNCB * file_callback, STIOCB * callback, void *callback_data) +\endcode + +\par + storeOpen is called to open the given StoreEntry from + the storage directory it resides on. + +\par + callback is a function that will be called either when + an error is encountered, or when the object is closed (by + calling storeClose()). If the open request is + successful, there is no callback. The calling module must + assume the open request will succeed, and may begin reading + or writing immediately. + +\par + storeOpen() may return NULL if the requested object + can not be openeed. In this case the callback function + will not be called. + +\subsection storeRead storeRead() +\code + void + storeRead(storeIOState *sio, char *buf, size_t size, off_t offset, STRCB *callback, void *callback_data) +\endcode + +\par + storeRead() is more complicated than the other functions + because it requires its own callback function to notify the + caller when the requested data has actually been read. + buf must be a valid memory buffer of at least size + bytes. offset specifies the byte offset where the + read should begin. Note that with the Swap Meta Headers + prepended to each cache object, this offset does not equal + the offset into the actual object data. + +\par + The caller is responsible for allocating and freeing buf. + +\subsection storeWrite storeWrite() +\code + void + storeWrite(storeIOState *sio, char *buf, size_t size, off_t offset, FREE *free_func) +\endcode + +\par + storeWrite() submits a request to write a block + of data to the disk store. + The caller is responsible for allocating buf, but since + there is no per-write callback, this memory must be freed by + the lower filesystem implementation. Therefore, the caller + must specify the free_func to be used to deallocate + the memory. + +\par + If a write operation fails, the filesystem layer notifies the + calling module by calling the STIOCB callback with an + error status code. + +\subsection storeUnlink storeUnlink() +\code + void + storeUnlink(StoreEntry *e) +\endcode + +\par + storeUnlink() removes the cached object from the disk + store. There is no callback function, and the object + does not need to be opened first. The filesystem + layer will remove the object if it exists on the disk. + +\subsection storeOfset storeOffset() +\code + off_t storeOffset(storeIOState *sio) +\endcode + +\par + storeOffset() returns the current _ondisk_ offset. This is used to + determine how much of an objects memory can be freed to make way for + other in-transit and cached objects. You must make sure that the + storeIOState->offset refers to the ondisk offset, or undefined + results will occur. For reads, this returns the current offset of + successfully read data, not including queued reads. + + +\section Callbacks Callbacks + +\subsection STIOCB STIOCB callback +\code + void + stiocb(void *data, int errorflag, storeIOState *sio) +\endcode + +\par + The stiocb function is passed as a parameter to + storeOpen(). The filesystem layer calls stiocb + either when an I/O error occurs, or when the disk + object is closed. + +\par + errorflag is one of the following: +\code + #define DISK_OK (0) + #define DISK_ERROR (-1) + #define DISK_EOF (-2) + #define DISK_NO_SPACE_LEFT (-6) +\endcode + +\par + Once the The stiocb function has been called, + the sio structure should not be accessed further. + +\subsection STRCB STRCB callback +\code + void + strcb(void *data, const char *buf, size_t len) +\endcode + +\par + The strcb function is passed as a parameter to + storeRead(). The filesystem layer calls strcb + after a block of data has been read from the disk and placed + into buf. len indicates how many bytes were + placed into buf. The strcb function is only + called if the read operation is successful. If it fails, + then the STIOCB callback will be called instead. + + +\section StateLogging State Logging + +\par + These functions deal with state + logging and related tasks for a squid storage system. + These functions are used (called) in store_dir.c. + +\par + Each storage system must provide the functions described + in this section, although it may be a no-op (null) function + that does nothing. Each function is accessed through a + function pointer stored in the SwapDir structure: +\code + struct _SwapDir { + ... + STINIT *init; + STNEWFS *newfs; + struct { + STLOGOPEN *open; + STLOGCLOSE *close; + STLOGWRITE *write; + struct { + STLOGCLEANOPEN *open; + STLOGCLEANWRITE *write; + void *state; + } clean; + } log; + .... + }; +\endcode + +\subsection log.open log.open() +\code + void + STLOGOPEN(SwapDir *); +\endcode + +\par + The log.open() function, of type STLOGOPEN, + is used to open or initialize the state-holding log + files (if any) for the storage system. For UFS this + opens the swap.state files. + +\par + The log.open() function may be called any number of + times during Squid's execution. For example, the + process of rotating, or writing clean logfiles closes + the state log and then re-opens them. A squid -k reconfigure + does the same. + +\subsection log.close log.close() +\code + void + STLOGCLOSE(SwapDir *); +\endcode + +\par + The log.close function, of type STLOGCLOSE, is + obviously the counterpart to log.open. It must close + the open state-holding log files (if any) for the storage + system. + +\subsection log.write log.write() +\code + void + STLOGWRITE(const SwapDir *, const StoreEntry *, int op); +\endcode + +\par + The log.write function, of type STLOGWRITE, is + used to write an entry to the state-holding log file. The + op argument is either SWAP_LOG_ADD or SWAP_LOG_DEL. + This feature may not be required by some storage systems + and can be implemented as a null-function (no-op). + +\subsection log.clean.start() log.clean.start() +\code + int + STLOGCLEANSTART(SwapDir *); +\endcode + +\par + The log.clean.start function, of type STLOGCLEANSTART, + is used for the process of writing "clean" state-holding + log files. The clean-writing procedure is initiated by + the squid -k rotate command. This is a special case + because we want to optimize the process as much as possible. + This might be a no-op for some storage systems that don't + have the same logging issues as UFS. + +\par + The log.clean.state pointer may be used to + keep state information for the clean-writing process, but + should not be accessed by upper layers. + +\subsection log.clean.nextentry log.clean.nextentry() +\code + StoreEntry * + STLOGCLEANNEXTENTRY(SwapDir *); +\endcode + +\par + Gets the next entry that is a candidate for the clean log. + Returns NULL when there is no more objects to log. + +\subsection log.clean.write log.clean.write() +\code + void + STLOGCLEANWRITE(SwapDir *, const StoreEntry *); +\endcode + +\par + The log.clean.write()/ function, of type STLOGCLEANWRITE, + writes an entry to the clean log file (if any). + +\subsection log.clean.done log.clean.done() +\code + void + STLOGCLEANDONE(SwapDir *); +\endcode + +\par + Indicates the end of the clean-writing process and signals + the storage system to close the clean log, and rename or + move them to become the official state-holding log ready + to be opened. + + */ Index: squid3/doc/Programming-Guide/12b_RemovalPolicy.dox diff -u /dev/null squid3/doc/Programming-Guide/12b_RemovalPolicy.dox:1.1.2.1 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/12b_RemovalPolicy.dox Wed Jul 11 17:27:59 2007 @@ -0,0 +1,305 @@ +/** +\page 12b_RemovalPolicy Replacement Policy Implementation + +\par +The replacement policy can be updated during STOBJREAD/STOBJWRITE/STOBJOPEN/ +STOBJCLOSE as well as STREFOBJ and STUNREFOBJ. Care should be taken to +only modify the relevant replacement policy entries in the StoreEntry. +The responsibility of replacement policy maintainence has been moved into +each SwapDir so that the storage code can have tight control of the +replacement policy. Cyclic filesystems such as COSS require this tight +coupling between the storage layer and the replacement policy. + +\section RemovalPolicyAPI Removal policy API + +\par + The removal policy is responsible for determining in which order + objects are deleted when Squid needs to reclaim space for new objects. + Such a policy is used by a object storage for maintaining the stored + objects and determining what to remove to reclaim space for new objects. + (together they implements a replacement policy) + +\par + It is implemented as a modular API where a storage directory or + memory creates a policy of choice for maintaining it's objects, + and modules registering to be used by this API. + +\subsection createRemovalPolicy createRemovalPolicy() + +\code + RemovalPolicy policy = createRemovalPolicy(cons char *type, cons char *args) +\endcode + +\par + Creates a removal policy instance where object priority can be + maintained + +\par + The returned RemovalPolicy instance is cbdata registered + +\subsection policy.free policy.Free() + +\code + policy->Free(RemovalPolicy *policy) +\endcode + +\par + Destroys the policy instance and frees all related memory. + +\subsection policy.Add policy.Add() + +\code + policy->Add(RemovalPolicy *policy, StoreEntry *, RemovalPolicyNode *node) +\endcode + +\par + Adds a StoreEntry to the policy instance. + +\par + datap is a pointer to where policy specific data can be stored + for the store entry, currently the size of one (void *) pointer. + +\subsection policy.Remove policy.Remove() +\code + policy->Remove(RemovalPolicy *policy, StoreEntry *, RemovalPolicyNode *node) +\endcode + +\par + Removes a StoreEntry from the policy instance out of + policy order. For example when an object is replaced + by a newer one or is manually purged from the store. + +\par + datap is a pointer to where policy specific data is stored + for the store entry, currently the size of one (void *) pointer. + +\subsection policy.Referenced policy.Referenced() +\code + policy->Referenced(RemovalPolicy *policy, const StoreEntry *, RemovalPolicyNode *node) +\endcode + +\par + Tells the policy that a StoreEntry is going to be referenced. Called + whenever a entry gets locked. + +\par + node is a pointer to where policy specific data is stored + for the store entry, currently the size of one (void *) pointer. + +\subsection policy.Dereferenced policy.Dereferenced() +\code + policy->Dereferenced(RemovalPolicy *policy, const StoreEntry *, RemovalPolicyNode *node) +\endcode + +\par + Tells the policy that a StoreEntry has been referenced. Called when + an access to the entry has finished. + +\par + node is a pointer to where policy specific data is stored + for the store entry, currently the size of one (void *) pointer. + +\subsection policy.WalkInit policy.WalkInit() +\code + RemovalPolicyWalker walker = policy->WalkInit(RemovalPolicy *policy) +\endcode + +\par + Initiates a walk of all objects in the policy instance. + The objects is returned in an order suitable for using + as reinsertion order when rebuilding the policy. + +\par + The returned RemovalPolicyWalker instance is cbdata registered + +\note The walk must be performed as an atomic operation + with no other policy actions intervening, or the outcome + will be undefined. + +\subsection walker.Next walker.Next() +\code + const StoreEntry *entry = walker->Next(RemovalPolicyWalker *walker) +\endcode + +\par + Gets the next object in the walk chain + +\par + Return NULL when there is no further objects + +\subsection walker.Done walker.Done() +\code + walker->Done(RemovalPolicyWalker *walker) +\endcode + +\par + Finishes a walk of the maintained objects, destroys + walker. + +\subsection policy.PurgeInit policy.PurgeInit() +\code + RemovalPurgeWalker purgewalker = policy->PurgeInit(RemovalPolicy *policy, int max_scan) +\endcode + +\par + Initiates a search for removal candidates. Search depth is indicated + by max_scan. + +\par + The returned RemovalPurgeWalker instance is cbdata registered + +\note The walk must be performed as an atomic operation + with no other policy actions intervening, or the outcome + will be undefined. + +\subsection purgewalker.Next purgewalker.Next() +\code + StoreEntry *entry = purgewalker->Next(RemovalPurgeWalker *purgewalker) +\endcode + +\par + Gets the next object to purge. The purgewalker will remove each + returned object from the policy. + +\par + It is the polices responsibility to verify that the object + isn't locked or otherwise prevented from being removed. What this + means is that the policy must not return objects where + storeEntryLocked() is true. + +\par + Return NULL when there is no further purgeable objects in the policy. + +\subsection purgewalker.Done purgewalker.Done() + +\code + purgewalker->Done(RemovalPurgeWalker *purgewalker) +\endcode + +\par + Finishes a walk of the maintained objects, destroys + walker and restores the policy to it's normal state. + +\subsection policy.Stats policy.Stats() + +\code + purgewalker->Stats(RemovalPurgeWalker *purgewalker, StoreEntry *entry) +\endcode + +\par + Appends statistics about the policy to the given entry. + +\section SourceLayout Source layout + +\par + Policy implementations resides in src/repl/<name>/, and a make in + such a directory must result in a object archive src/repl/<name>.a + containing all the objects implementing the policy. + +\section InternalStructures Internal structures + +\subsection RemovalPolicy RemovalPolicy + +\code + typedef struct _RemovalPolicy RemovalPolicy; + struct _RemovalPolicy { + char *_type; + void *_data; + void (*add)(RemovalPolicy *policy, StoreEntry *); + ... // see the API definition above + }; +\endcode + +\par + The _type member is mainly for debugging and diagnostics purposes, and + should be a pointer to the name of the policy (same name as used for + creation) + +\par + The _data member is for storing policy specific information. + +\subsection RemvalPolicyWalker RemovalPolicyWalker + +\code + typedef struct _RemovalPolicyWalker RemovalPolicyWalker; + struct _RemovalPolicyWalker { + RemovalPolicy *_policy; + void *_data; + StoreEntry *(*next)(RemovalPolicyWalker *); + ... // see the API definition above + }; +\endcode + + +\subsection RemovalPolicyNode _RemovalPolicyNode + +\code + typedef struct _RemovalPolicyNode RemovalPolicyNode; + struct _RemovalPolicyNode { + void *data; + }; +\endcode + +\par + Stores policy specific information about a entry. Currently + there is only space for a single pointer, but plans are to + maybe later provide more space here to allow simple policies + to store all their data "inline" to preserve some memory. + +\section PolicyRegistration Policy Registration + +\par + Policies are automatically registered in the Squid binary from the + policy selection made by the user building Squid. In the future this + might get extended to support loadable modules. All registered + policies are available to object stores which wishes to use them. + +\section PolicyInstanceCreation >Policy instance creation + +\par + Each policy must implement a "create/new" function "RemovalPolicy * + createRemovalPolicy_<name>(char *arguments)". This function + creates the policy instance and populates it with at least the API + methods supported. Currently all API calls are mandatory, but the + policy implementation must make sure to NULL fill the structure prior + to populating it in order to assure future API compability. + +\par + It should also populate the _data member with a pointer to policy + specific data. + +\section Walker Walker + +\par + When a walker is created the policy populates it with at least the API + methods supported. Currently all API calls are mandatory, but the + policy implementation must make sure to NULL fill the structure prior + to populating it in order to assure future API compatibility. + +\section DesignNotes Design notes/bugs + +\par + The RemovalPolicyNode design is incomplete/insufficient. The intention + was to abstract the location of the index pointers from the policy + implementation to allow the policy to work on both on-disk and memory + caches, but unfortunately the purge method for HEAP based policies + needs to update this, and it is also preferable if the purge method + in general knows how to clear the information. I think the agreement + was that the current design of tightly coupling the two together + on one StoreEntry is not the best design possible. + +\par + It is debated if the design in having the policy index control the + clean index writes is the correct approach. Perhaps not. Perhaps a + more appropriate design is probably to do the store indexing + completely outside the policy implementation (i.e. using the hash + index), and only ask the policy to dump it's state somehow. + +\par + The Referenced/Dereferenced() calls is today mapped to lock/unlock + which is an approximation of when they are intended to be called. + However, the real intention is to have Referenced() called whenever + an object is referenced, and Dereferenced() only called when the + object has actually been used for anything good. + + */ Index: squid3/doc/Programming-Guide/13_ForwardingSelection.dox diff -u /dev/null squid3/doc/Programming-Guide/13_ForwardingSelection.dox:1.1.2.1 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/13_ForwardingSelection.dox Tue Jul 10 22:32:06 2007 @@ -0,0 +1,8 @@ +/** +\page 13_ForwardingSelection Forwarding Selection + +\section Infrastructure Infrastructure + +\todo Write documentation about Forwarding Selection + + */ Index: squid3/doc/Programming-Guide/24_RefCountDataAllocator.dox diff -u /dev/null squid3/doc/Programming-Guide/24_RefCountDataAllocator.dox:1.1.2.2 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/24_RefCountDataAllocator.dox Wed Jul 11 16:35:00 2007 @@ -0,0 +1,151 @@ +/** +\page 24_RefCountDataAllocator Reference Counting Data Allocator (C++ Only) + +\note This is only available in Squid 3.x C++ code. + +\par + Manual reference counting such as cbdata uses is error prone, + and time consuming for the programmer. C++'s operator overloading + allows us to create automatic reference counting pointers, that will + free objects when they are no longer needed. With some care these + objects can be passed to functions needed Callback Data pointers. + +\section API API +\par + There are two classes involved in the automatic refcouting - a + RefCountable class that provides the mechanics for reference + counting a given derived class. And a RefCount class that is the + smart pointer, and handles const correctness, and tells the RefCountable + class of references and dereferences. + +\subsection RefCountable RefCountable +\par + The RefCountable base class defines one abstract function - + deleteSelf(). You must implement deleteSelf for each concrete + class and. deleteSelf() is a workaround for 'operator delete' not + being virtual. delete Self typically looks like: +\code + void deleteSelf() const { delete this; } +\endcode + +\subsection RefCount RefCount +\par + The RefCount template class replaces pointers as parameters and + variables of the class being reference counted. Typically one creates + a typedef to aid users. + +\code + class MyConcrete : public RefCountable { + public: + typedef RefCount Pointer; + void deleteSelf() const {delete this;} + }; +\endcode + Now, one can pass objects of MyConcrete::Pointer around. + +\subsection CBDATA CBDATA +\par + To make a refcounting CBDATA class, you need to overload new and delete, + include a macro in your class definition, and ensure that some everyone + who would call you directly (not as a cbdata callback, but as a normal + use), holds a RefCount<> smart pointer to you. + +\code + class MyConcrete : public RefCountable { + public: + typedef RefCount Pointer; + void * operator new(size_t); + void operator delete (void *); + void deleteSelf() const {delete this;} + private: + CBDATA_CLASS(MyConcrete); + }; + + ... + // In your .cc file + CBDATA_CLASS_INIT(MyConcrete); + + void * + MyConcrete::operator new (size_t) + { + CBDATA_INIT_TYPE(MyConcrete); + MyConcrete *result = cbdataAlloc(MyConcrete); + // Mark result as being owned - we want the refcounter to do the delete call + cbdataReference(result); + return result; + } + + void + MyConcrete::operator delete (void *address) + { + MyConcrete *t = static_cast(address); + cbdataFree(address); + // And allow the memory to be freed + cbdataReferenceDone (t); + } +\endcode + +\par + When no RefCount smart pointers exist, the objects + delete method will be called. This will run the object destructor, + freeing any foreign resources it hold. Then cbdataFree + will be called, marking the object as invalid for all the cbdata + functions that it may have queued. When they all return, the actual + memory will be returned to the pool. + +\subsection UsingRefCounter Using the Refcounter +\par + Allocation and deallocation of refcounted objects (including those of + the RefCount template class) must be done via new() and delete(). If a + class that will hold an instance of a RefCount <foo> variable + does not use delete(), you must assign NULL to the variable before + it is freed. Failure to do this will result in memory leaks. You HAVE + been warned. + +\par + Never call delete or deleteSelf on a RefCountable object. You will + create a large number of dangling references and squid will segfault + eventually. + +\par + Always create at least one RefCount smart pointer, so that the + reference counting mechanism will delete the object when it's not + needed. + +\par + Do not pass RefCount smart pointers outside the squid memory space. + They will invariably segfault when copied. + +\par + If, in a method, all other smart pointer holding objects may be deleted + or may set their smart pointers to NULL, then you will be deleted + partway through the method (and thus crash). To prevent this, assign + a smart pointer to yourself: + +\code + void + MyConcrete::aMethod(){ + // This holds a reference to us + Pointer aPointer(this); + // This is a method that may mean we don't need to exist anymore + someObject->someMethod(); + // This prevents aPointer being optimised away before this point, + // and must be the last line in our method + aPointer = NULL; + } +\endcode + +\par + Calling methods via smart pointers is easy just dereference via -> +\code + void + SomeObject::someFunction() { + myConcretePointer->someOtherMethod(); + } +\endcode + +\par + When passing RefCount smart pointers, always pass them as their + native type, never as '*' or as '&'. + + */ Index: squid3/doc/Programming-Guide/26_HTTPHeaders.dox diff -u /dev/null squid3/doc/Programming-Guide/26_HTTPHeaders.dox:1.1.2.2 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/26_HTTPHeaders.dox Wed Jul 11 16:35:00 2007 @@ -0,0 +1,235 @@ +/** +/page 26_HTTPHeaders HTTP Headers + +\par Files: + \li HttpHeader.c + \li HttpHeaderTools.c + \li HttpHdrCc.c + \li HttpHdrContRange.c + \li HttpHdrExtField.c + \li HttpHdrRange.c + +\par + HttpHeader class encapsulates methods and data for HTTP header + manipulation. HttpHeader can be viewed as a collection of HTTP + header-fields with such common operations as add, delete, and find. + Compared to an ascii "string" representation, HttpHeader performs + those operations without rebuilding the underlying structures from + scratch or searching through the entire "string". + +\section General General remarks +\par + HttpHeader is a collection (or array) of HTTP header-fields. A header + field is represented by an HttpHeaderEntry object. HttpHeaderEntry is + an (id, name, value) triplet. Meaningful "Id"s are defined for + "well-known" header-fields like "Connection" or "Content-Length". + When Squid fails to recognize a field, it uses special "id", + HDR_OTHER. Ids are formed by capitalizing the corresponding HTTP + header-field name and replacing dashes ('-') with underscores ('_'). +\par + Most operations on HttpHeader require a "known" id as a parameter. The + rationale behind the later restriction is that Squid programmer should + operate on "known" fields only. If a new field is being added to + header processing, it must be given an id. + +\section LifeCycle Life cycle +\par + HttpHeader follows a common pattern for object initialization and + cleaning: + +\code + // declare + HttpHeader hdr; + + // initialize (as an HTTP Request header) + httpHeaderInit(&hdr, hoRequest); + + // do something + ... + + // cleanup + httpHeaderClean(&hdr); +\endcode + +\par + Prior to use, an HttpHeader must be initialized. A + programmer must specify if a header belongs to a request + or reply message. The "ownership" information is used mostly + for statistical purposes. + +\par + Once initialized, the HttpHeader object must be, + eventually, cleaned. Failure to do so will result in a + memory leak. + +\par + Note that there are no methods for "creating" or "destroying" + a "dynamic" HttpHeader object. Looks like headers are + always stored as a part of another object or as a temporary + variable. Thus, dynamic allocation of headers is not needed. + +\section HeaderManipulation Header Manipulation +\par + The mostly common operations on HTTP headers are testing + for a particular header-field (httpHeaderHas()), + extracting field-values (httpHeaderGet*()), and adding + new fields (httpHeaderPut*()). + +\par + httpHeaderHas(hdr, id) returns true if at least one + header-field specified by "id" is present in the header. + Note that using HDR_OTHER as an id is prohibited. + There is usually no reason to know if there are "other" + header-fields in a header. + +\par + httpHeaderGet<Type>(hdr, id) returns the value + of the specified header-field. The "Type" must match + header-field type. If a header is not present a "null" + value is returned. "Null" values depend on field-type, of + course. + +\par + Special care must be taken when several header-fields with + the same id are preset in the header. If HTTP protocol + allows only one copy of the specified field per header + (e.g. "Content-Length"), httpHeaderGet<Type>() + will return one of the field-values (chosen semi-randomly). + If HTTP protocol allows for several values (e.g. "Accept"), + a "String List" will be returned. + +\par + It is prohibited to ask for a List of values when only one + value is permitted, and visa-versa. This restriction prevents + a programmer from processing one value of an header-field + while ignoring other valid values. + +\par + httpHeaderPut<Type>(hdr, id, value) will add an + header-field with a specified field-name (based on "id") + and field_value. The location of the newly added field in + the header array is undefined, but it is guaranteed to be + after all fields with the same "id" if any. Note that old + header-fields with the same id (if any) are not altered in + any way. + +\par + The value being put using one of the httpHeaderPut() + methods is converted to and stored as a String object. + +\par Example: + Add our own Age: field if none was added before +\code + int age = ... + if (!httpHeaderHas(hdr, HDR_AGE)) + httpHeaderPutInt(hdr, HDR_AGE, age); +\endcode + +\par + There are two ways to delete a field from a header. To + delete a "known" field (a field with "id" other than + HDR_OTHER), use httpHeaderDelById() function. + Sometimes, it is convenient to delete all fields with a + given name ("known" or not) using httpHeaderDelByName() + method. Both methods will delete ALL fields specified. + +\par + The httpHeaderGetEntry(hdr, pos) function can be used + for iterating through all fields in a given header. Iteration + is controlled by the pos parameter. Thus, several + concurrent iterations over one name, name)) + ... // delete entry + } +\endcode + +\note httpHeaderGetEntry() is a low level function + and must not be used if high level alternatives are available. + For example, to delete an entry with a given name, use the + httpHeaderDelByName() function rather than the loop + above. + +\section HeaderIO I/O and Headers +\par + To store a header in a file or socket, pack it using + httpHeaderPackInto() method and a corresponding + "Packer". Note that httpHeaderPackInto will pack only + header-fields; request-lines and status-lines are not + prepended, and CRLF is not appended. Remember that neither + of them is a part of HTTP message header as defined by the + HTTP protocol. + +\section AddingNewHeaderFieldIDs Adding new header-field ids +\par + Adding new ids is simple. First add new HDR_ entry to the + http_hdr_type enumeration in enums.h. Then describe a new + header-field attributes in the HeadersAttrs array located + in HttpHeader.c. The last attribute specifies field + type. Five types are supported: integer (ftInt), string + (ftStr), date in RFC 1123 format (ftDate_1123), + cache control field (ftPCc), range field (ftPRange), + and content range field (ftPContRange). Squid uses + type information to convert internal binary representation + of fields to their string representation (httpHeaderPut + functions) and visa-versa (httpHeaderGet functions). + +\par + Finally, add new id to one of the following arrays: + GeneralHeadersArr, EntityHeadersArr, + ReplyHeadersArr, RequestHeadersArr. Use HTTP + specs to determine the applicable array. If your header-field + is an "extension-header", its place is in ReplyHeadersArr + and/or in RequestHeadersArr. You can also use + EntityHeadersArr for "extension-header"s that can be + used both in replies and requests. Header fields other + than "extension-header"s must go to one and only one of + the arrays mentioned above. + +\par + Also, if the new field is a "list" header, add it to the + ListHeadersArr array. A "list" field-header is the + one that is defined (or can be defined) using "#" BNF + construct described in the HTTP specs. Essentially, a field + that may have more than one valid field-value in a single + header is a "list" field. + +\par + In most cases, if you forget to include a new field id in + one of the required arrays, you will get a run-time assertion. + For rarely used fields, however, it may take a long time + for an assertion to be triggered. + +\par + There is virtually no limit on the number of fields supported + by Squid. If current mask sizes cannot fit all the ids (you + will get an assertion if that happens), simply enlarge + HttpHeaderMask type in typedefs.h. + +\section Efficiency A Word on Efficiency +\par + httpHeaderHas() is a very cheap (fast) operation + implemented using a bit mask lookup. + +\par + Adding new fields is somewhat expensive if they require + complex conversions to a string. + +\par + Deleting existing fields requires scan of all the entries + and comparing their "id"s (faster) or "names" (slower) with + the one specified for deletion. + +\par + Most of the operations are faster than their "ascii string" + equivalents. + + */ Index: squid3/doc/Programming-Guide/27_MiscOther.dox diff -u /dev/null squid3/doc/Programming-Guide/27_MiscOther.dox:1.1.2.7 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/27_MiscOther.dox Sun Aug 19 20:07:44 2007 @@ -0,0 +1,78 @@ +/** +/page 27_MiscOther Miscellaneous Other Details + +\section leakFinder leakFinder +\par + src/leakfinder.c contains some routines useful for debugging + and finding memory leaks. It is not enabled by default. To enable + it, use +\code + configure --enable-leakfinder ... +\endcode + +\par + The module has three public functions: leakAdd, + leakFree, and leakTouch Note, these are actually + macros that insert __FILE__ and __LINE__ arguments to the real + functions. + +\par + leakAdd should be called when a pointer is first created. + Usually this follows immediately after a call to malloc or some + other memory allocation function. For example: +\code + ... + void *p; + p = malloc(100); + leakAdd(p); + ... +\endcode + +\par + leakFree is the opposite. Call it just before releasing + the pointer memory, such as a call to free. For example: +\code + ... + leakFree(foo); + free(foo); + return; +\endcode +\note leakFree aborts with an assertion if you give it a pointer that was never added with leakAdd. + +\par + The definition of a leak is memory that was allocated but never + freed. Thus, to find a leak we need to track the pointer between + the time it got allocated and the time when it should have been + freed. Use leakTouch to accomplish this. You can sprinkle + leakTouch calls throughout the code where the pointer is + used. For example: +\code +void +myfunc(void *ptr) +{ + ... + leakTouch(ptr); + ... +} +\endcode +\note leakTouch aborts with an assertion if you give it + a pointer that was never added with leakAdd, or if the + pointer was already freed. + +\par + For each pointer tracked, the module remembers the filename, line + number, and time of last access. You can view this data with the + cache manager by selecting the leaks option. You can also + do it from the command line: +\code +% client mgr:leaks | less +\endcode + +\par + The way to identify possible leaks is to look at the time of last + access. Pointers that haven't been accessed for a long time are + candidates for leaks. The filename and line numbers tell you where + that pointer was last accessed. If there is a leak, then the bug + occurs somewhere after that point of the code. + + */ Index: squid3/doc/Programming-Guide/AuthenticationFramework.dox diff -u /dev/null squid3/doc/Programming-Guide/AuthenticationFramework.dox:1.1.2.8 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/AuthenticationFramework.dox Thu Aug 9 21:14:46 2007 @@ -0,0 +1,208 @@ +/** +\defgroup AuthAPI Authentication Framework +\ingroup Components + +\par + Squid's authentication system is responsible for reading + authentication credentials from HTTP requests and deciding + whether or not those credentials are valid. This functionality + resides in two separate components: Authentication Schemes + and Authentication Modules. + +\par + An Authentication Scheme describes how Squid gets the + credentials (i.e. username, password) from user requests. + Squid currently supports two authentication schemes: Basic + and NTLM. Basic authentication uses the WWW-Authenticate + HTTP header. The Authentication Scheme code is implemented + inside Squid itself. + +\par + An Authentication Module takes the credentials received + from a client's request and tells Squid if they are + are valid. Authentication Modules are implemented + externally from Squid, as child helper processes. + Authentication Modules interface with various types + authentication databases, such as LDAP, PAM, NCSA-style + password files, and more. + +\section AuthenticationSchemeAPI Authentication Scheme API + +\subsection DefinitionOfAuthenticationScheme Definition of an Authentication Scheme + +\par + An auth scheme in squid is the collection of functions required to + manage the authentication process for a given HTTP authentication + scheme. Existing auth schemes in squid are Basic and NTLM. Other HTTP + schemes (see for example RFC 2617) have been published and could be + implemented in squid. The term auth scheme and auth module are + interchangeable. An auth module is not to be confused with an + authentication helper, which is a scheme specific external program used + by a specific scheme to perform data manipulation external to squid. + Typically this involves comparing the browser submitted credentials with + those in the organization's user directory. + +\par + Auth modules SHOULD NOT perform access control functions. Squid has + advanced caching \link ACLAPI access control \endlink functionality already. + Future work in squid will allow a auth scheme helper to return group information + for a user, to allow Squid to more seamlessly implement access control. + +\subsection Functions Function typedefs + +\todo Migrate the function typedef documentation to the relevant code positions. + +\par + Each function related to the general case of HTTP authentication has + a matching typedef. There are some additional function types used to + register/initialize, deregister/shutdown and provide stats on auth + modules: + +\par typedef void AUTHSSETUP(authscheme_entry_t *); + functions of type AUTHSSETUP are used to register an + auth module with squid. The registration function MUST be + named "authSchemeSetup_SCHEME" where SCHEME is the auth_scheme + as defined by RFC 2617. Only one auth scheme registered in + squid can provide functionality for a given auth_scheme. + (I.e. only one auth module can handle Basic, only one can + handle Digest and so forth). The Setup function is responsible + for registering the functions in the auth module into the + passed authscheme_entry_t. The authscheme_entry_t will + never be NULL. If it is NULL the auth module should log an + error and do nothing. The other functions can have any + desired name that does not collide with any statically + linked function name within Squid. It is recommended to + use names of the form "authe_SCHEME_FUNCTIONNAME" (for + example authenticate_NTLM_Active is the Active() function + for the NTLM auth module. + +\par typedef void AUTHSSHUTDOWN(void); + Functions of type AUTHSSHUTDOWN are responsible for + freeing any resources used by the auth modules. The shutdown + function will be called before squid reconfigures, and + before squid shuts down. + +\par typedef void AUTHSINIT(authScheme *); + Functions of type AUTHSINIT are responsible for allocating + any needed resources for the authentication module. AUTHSINIT + functions are called after each configuration takes place + before any new requests are made. + +\par typedef void AUTHSPARSE(authScheme *, int, char *); + Functions of type AUTHSPARSE are responsible for parsing + authentication parameters. The function currently needs a + scheme scope data structure to store the configuration in. + The passed scheme's scheme_data pointer should point to + the local data structure. Future development will allow + all authentication schemes direct access to their configuration + data without a locally scope structure. The parse function + is called by Squid's config file parser when a auth_param + scheme_name entry is encountered. + +\par typedef void AUTHSFREECONFIG(authScheme *); + Functions of type AUTHSFREECONFIG are called by squid + when freeing configuration data. The auth scheme should + free any memory allocated that is related to parse data + structures. The scheme MAY take advantage of this call to + remove scheme local configuration dependent data. (Ie cached + user details that are only relevant to a config setting). + +\par typedef void AUTHSSTATS(StoreEntry *); + Functions of type AUTHSSTATS are called by the cachemgr + to provide statistics on the authmodule. Current modules + simply provide the statistics from the back end helpers + (number of requests, state of the helpers), but more detailed + statistics are possible - for example unique users seen or + failed authentication requests. + +\par + The next set of functions + work on the data structures used by the authentication schemes. + +\par typedef void AUTHSREQFREE(auth_user_request_t *); + The AUTHSREQFREE function is called when an AuthUserRequest is being + freed by the authentication framework, and scheme specific data was + present. The function should free any scheme related data and MUST set + the scheme_data pointer to NULL. Failure to unlink the scheme data will + result in squid dying. + +\par + The next set of functions perform the actual + authentication. The functions are used by squid for both + WWW- and Proxy- authentication. Therefore they MUST NOT + assume the authentication will be based on the Proxy-* + Headers. + +\par typedef void AUTHSAUTHUSER(AuthUserRequest_t *, request_t *, ConnStateData *, http_hdr_type); + Functions of type AUTHSAUTHUSER are called when Squid + has a request that needs authentication. If needed the auth + scheme can alter the auth_user pointer (usually to point + to a previous instance of the user whose name is discovered + late in the auth process. For an example of this see the + NTLM scheme). These functions are responsible for performing + any in-squid routines for the authentication of the user. + The AuthUserRequest struct that is passed around is only + persistent for the current request. If the auth module + requires access to the structure in the future it MUST lock + it, and implement some method for identifying it in the + future. For example the NTLM module implements a connection + based authentication scheme, so the AuthUserRequest struct + gets referenced from the ConnStateData. + +\par typedef void AUTHSFIXERR(auth_user_request_t *, HttpReply *, http_hdr_type, request_t *); + Functions of type AUTHSFIXERR are used by squid to add scheme + specific challenges when returning a 401 or 407 error code. On requests + where no authentication information was provided, all registered auth + modules will have their AUTHSFIXERR function called. When the client + makes a request with an authentication header, on subsequent calls only the matching + AUTHSFIXERR function is called (and then only if the auth module + indicated it had a new challenge to send the client). If no auth schemes + match the request, the authentication credentials in the request are + ignored - and all auth modules are called. + +\par typedef void AUTHSFREE(auth_user_t *); + These functions are responsible for freeing scheme specific data from + the passed auth_user_t structure. This should only be called by squid + when there are no outstanding requests linked to the auth user. This includes + removing the user from any scheme specific memory caches. + +\par typedef void AUTHSADDTRAILER(auth_user_request_t *, HttpReply *, int); + These functions are responsible for adding any authentication + specific header(s) or trailer(s) OTHER THAN the WWW-Authenticate and + Proxy-Authenticate headers to the passed HttpReply. The int indicates + whether the request was an accelerated request or a proxied request. For + example operation see the digest auth scheme. (Digest uses a + Authentication-Info header.) This function is called whenever a + AuthUserRequest exists in a request when the reply is constructed + after the body is sent on chunked replies respectively. + +\par typedef void AUTHSONCLOSEC(ConnStateData *); + This function type is called when a AuthUserRequest is + linked into a ConnStateData struct, and the connection is closed. If any + scheme specific activities related to the request or connection are in + progress, this function MUST clear them. + +\subsection HowToAddAuthenitcationSchemes How to add a new Authentication Scheme + +\par + Copy the nearest existing auth scheme and modify to receive the + appropriate scheme headers. Now step through the acl.c MatchAclProxyUser + function's code path and see how the functions call down through + authenticate.c to your scheme. Write a helper to provide you scheme with + any backend existence it needs. Remember any blocking code must go in + AUTHSSTART function(s) and _MUST_ use callbacks. + +\subsection HowToHookInNewFunctions How to "hook in" new functions to the API + +\par + Start of by figuring the code path that will result in + the function being called, and what data it will need. Then + create a typedef for the function, add and entry to the + authscheme_entry struct. Add a wrapper function to + authenticate.c (or if appropriate cf_cache.c) that called + the scheme specific function if it exists. Test it. Test + it again. Now port to all the existing auth schemes, or at + least add a setting of NULL for the function for each + scheme. + + */ Index: squid3/doc/Programming-Guide/ExternalPrograms.dox diff -u /dev/null squid3/doc/Programming-Guide/ExternalPrograms.dox:1.1.2.3 --- /dev/null Thu Jan 1 01:00:00 1970 +++ squid3/doc/Programming-Guide/ExternalPrograms.dox Sat Oct 27 03:11:22 2007 @@ -0,0 +1,10 @@ +/** +\defgroup ExternalPrograms External Programs + +\section redirector redirector +\par + A redirector process reads URLs on stdin and writes (possibly + changed) URLs on stdout. It is implemented as an external + process to maximize flexibility. + + */ Index: squid3/src/AuthUserRequest.h diff -u squid3/src/AuthUserRequest.h:1.12 squid3/src/AuthUserRequest.h:1.8.2.9 --- squid3/src/AuthUserRequest.h:1.12 Thu Mar 20 18:22:17 2008 +++ squid3/src/AuthUserRequest.h Tue Mar 25 05:40:57 2008 @@ -128,8 +128,8 @@ char const * getDenyMessage(); size_t refCount() const; - void _lock(); // please use AUTHUSERREQUESTLOCK() - void _unlock(); // please use AUTHUSERREQUESTUNLOCK() + void _lock(); /// \note please use AUTHUSERREQUESTLOCK() + void _unlock(); /// \note please use AUTHUSERREQUESTUNLOCK() /** * Squid does not make assumptions about where the username is stored. Index: squid3/src/authenticate.h diff -u squid3/src/authenticate.h:1.16 squid3/src/authenticate.h:1.14.14.3 --- squid3/src/authenticate.h:1.16 Thu Mar 20 18:22:18 2008 +++ squid3/src/authenticate.h Tue Mar 25 05:41:06 2008 @@ -65,7 +65,6 @@ MEMPROXY_CLASS_INLINE(AuthUserHashPointer); class ConnStateData; - class AuthScheme; /** Index: squid3/src/cbdata.cc diff -u squid3/src/cbdata.cc:1.29 squid3/src/cbdata.cc:1.27.2.3 --- squid3/src/cbdata.cc:1.29 Fri Mar 21 19:52:16 2008 +++ squid3/src/cbdata.cc Tue Mar 25 05:41:06 2008 @@ -158,7 +158,8 @@ */ void cbdata::operator delete(void *where, void *where2) -{; +{ + ; // empty. } long @@ -169,7 +170,7 @@ return (long)dataOffset; } #else -MEMPROXY_CLASS_INLINE(cbdata) /**DOCS_NOSEMI*/ +MEMPROXY_CLASS_INLINE(cbdata); #endif static OBJH cbdataDump; Index: squid3/src/errorpage.cc diff -u squid3/src/errorpage.cc:1.52 squid3/src/errorpage.cc:1.46.2.5 --- squid3/src/errorpage.cc:1.52 Fri Mar 21 19:52:22 2008 +++ squid3/src/errorpage.cc Tue Mar 25 05:41:08 2008 @@ -72,7 +72,7 @@ /* local constant and vars */ /** -/// \ingroup ErrorPageInternal + \ingroup ErrorPageInternal * \note hard coded error messages are not appended with %S * automagically to give you more control on the format Index: squid3/src/auth/negotiate/auth_negotiate.h diff -u squid3/src/auth/negotiate/auth_negotiate.h:1.12 squid3/src/auth/negotiate/auth_negotiate.h:1.8.2.4 --- squid3/src/auth/negotiate/auth_negotiate.h:1.12 Fri Mar 21 19:52:26 2008 +++ squid3/src/auth/negotiate/auth_negotiate.h Tue Mar 25 05:41:16 2008 @@ -54,7 +54,7 @@ dlink_list proxy_auth_list; }; -MEMPROXY_CLASS_INLINE(NegotiateUser) /**DOCS_NOSEMI*/ +MEMPROXY_CLASS_INLINE(NegotiateUser); /// \ingroup AuthNegotiateAPI typedef class NegotiateUser negotiate_user_t; Index: squid3/src/fs/ufs/ufscommon.h diff -u squid3/src/fs/ufs/ufscommon.h:1.15 squid3/src/fs/ufs/ufscommon.h:1.9.10.7 --- squid3/src/fs/ufs/ufscommon.h:1.15 Fri Mar 21 19:52:28 2008 +++ squid3/src/fs/ufs/ufscommon.h Tue Mar 25 05:41:16 2008 @@ -288,7 +288,6 @@ MEMPROXY_CLASS_INLINE(UFSStoreState::_queued_write); - #include "StoreSearch.h" /// \ingroup UFS