Date: Fri, 25 Apr 2003 10:32:20 +1000
From: Lachlan Andrew <lha@users.sourceforge.net>
To: Ted Stresen-Reuter <tedmasterweb@mac.com>
Cc: htdig-dev@lists.sourceforge.net
Subject: [htdig-dev] New patch [was: new release of htdig]

On Fri, 25 Apr 2003 00:30, Ted Stresen-Reuter wrote:
> I meant to express my interest in testing your patch too. I
> have OS X as well. I would need instructions for
> how to apply a patch.

Thanks.  The revised patch is attached.

To apply it, start from a freshly checked out CVS, and cd to the top 
directory.  Then type
    patch -p0 < your/path/to/dbase.patch1
et voila.

The '-p0' is because this particular patch specifies the destination 
files (e.g., the last path on the first line of the patch) exactly 
the way you would specify it from where you have 'cd'd to.  If you 
want it to ignore the first leading path component (say the file said 
"test/db/mp.h"), then you would say '-p1'.

Let me know if you have any problems.

Thanks again,
Lachlan

diff -cr /home/lha/devel/htdig/cvs/htdig/db/mp.h db/mp.h
*** /home/lha/devel/htdig/cvs/htdig/db/mp.h	Sun Feb  3 05:18:05 2002
--- db/mp.h	Thu Apr 24 23:36:37 2003
***************
*** 45,50 ****
--- 45,54 ----
  
  	int	    nc_reg;		/* N underlying cache regions. */
  	REGINFO	   *c_reginfo;		/* Underlying cache regions. */
+ 
+ 	/* I'm not sure if these need to be thread-protected... */
+ 	int         recursion_level;	/* limit recur'n from weak compr'n */
+ 
  };
  
  /*
***************
*** 167,172 ****
--- 171,183 ----
  
  #define	MP_LSN_RETRY	0x01		/* Retry all BH_WRITE buffers. */
  	u_int32_t  flags;
+ 
+ 	/* HACK!! */
+ 	/* a pointers allocated for this structure is (erroneously?) used */
+ 	/* in CDB___memp_alloc() to refer to a MCACHE structure.  Make sure */
+ 	/* the allocation is big enough. */
+ 	int	    dummy [100];
+ 
  };
  
  /*
diff -cr /home/lha/devel/htdig/cvs/htdig/db/mp_alloc.c db/mp_alloc.c
*** /home/lha/devel/htdig/cvs/htdig/db/mp_alloc.c	Sun Feb  3 05:18:05 2002
--- db/mp_alloc.c	Thu Apr 24 23:39:38 2003
***************
*** 12,23 ****
--- 12,50 ----
  
  #ifndef NO_SYSTEM_INCLUDES
  #include <sys/types.h>
+ #include <errno.h>
  #endif
  
  #include "db_int.h"
  #include "db_shash.h"
  #include "mp.h"
  
+ #if 0
+ #  define DEBUG
+ #endif
+ 
+ /*
+  * CDB___memp_clean_page  allocates memory from the memory pool.  If mfp
+  * is non-NULL, its page size specifies the amount of memory to allocate.
+  * This memory is essentially a disk cache, and it seems pages can be freed
+  * at will (after writing to disk, if the page is dirty).
+  *
+  * On-the-fly compression (CDB___memp_cmpr...) causes problems when writing
+  * dirty pages.  CDB___memp_cmpr_alloc_chain  calls  CDB___memp_alloc,
+  * potentially causing infinite recursion.  This should eventually be fixed
+  * in  CDB___memp_cmpr_alloc_chain but a hack has been put here for now.
+  * An explicit count of the recursion level is kept.  In the outer call,
+  * any page can be swapped out, whether dirty or not.  During a recursive call,
+  * only clean pages are swapped out.  To ensure that there are sufficient
+  * clean pages,  CDB___memp_clean_page  is called whenever over 1/3 of the
+  * pages are dirty.  (Note: that is 1/3 of the *number* of pages, irrespective
+  * of their sizes.  The pages allocated by  CDB___memp_cmpr_alloc_chain
+  * itself are very small and always seem clean, so the actual amount of clean
+  * memory may be quite small.)
+  */
+ 
+ static int CDB___memp_clean_page __P((DB_MPOOL *, REGINFO *));
+ 
  /*
   * CDB___memp_alloc --
   *	Allocate some space in the mpool region.
***************
*** 38,49 ****
--- 65,126 ----
  	MCACHE *mc;
  	MPOOL *mp;
  	MPOOLFILE *bh_mfp;
+ 	u_int32_t failed_writes, pages_reviewed;
  	size_t total;
  	int nomore, restart, ret, wrote;
  	void *p;
  
+ 
+ 	dbmp->recursion_level++;
+ 	if (dbmp->recursion_level > 2)	/* bhwrite (used by us) calls us. */
+ 	{				/* Disallow deeper recursions. */
+ 	    fprintf (stderr,
+ 		"Can't allocate %lu bytes: Recursive call (mfp %p)\n",
+ 		(u_long)len, mfp);
+ #ifdef DEBUG
+ 	    static flag = 1;
+ 	    if (flag)
+ 	    {
+ 		flag = 0;
+ 		/* Obtain a backtrace and print it to stderr. */
+ 		{
+ 		  void *array[100];
+ 		  size_t size;
+ 		  char **strings;
+ 		  size_t i;
+ 
+ 		  size = backtrace (array, 100);
+ 		  strings = backtrace_symbols (array, size);
+ 
+ 		  printf ("Obtained %zd stack frames.\n", size);
+ 
+ 		  for (i = 0; i < size; i++)
+ 		     fprintf (stderr, "%s\n", strings[i]);
+ 
+ 		  free (strings);
+ 		}
+ 	    }
+ #endif
+ 	    ret = ENOMEM;
+ 	    goto err;
+ 	}
+ 
  	mp = dbmp->reginfo.primary;
+ 
+ 	/* This next statement is dodgy.  The memory allocation in which */
+ 	/* memreg->primary is located is not (quite) large enough. */
+ 	/* The same void pointer is used in  CDB___memp_open() to point to */
+ 	/* an MPOOL, rather than an MCACHE as it does here. */
  	mc = memreg->primary;
+ 	failed_writes = 0;
+ 
+ #ifdef DEBUG
+ #  ifndef HEAVY_DEBUG
+ 	if (dbmp->recursion_level > 1)
+ #  endif
+ 	    fprintf(stderr,"Depth %d D %d C %d\n", dbmp->recursion_level,
+ 			mc->stat.st_page_dirty, mc->stat.st_page_clean);
+ #endif
  
  	/*
  	 * If we're allocating a buffer, and the one we're discarding is the
***************
*** 60,80 ****
  		if (offsetp != NULL)
  			*offsetp = R_OFFSET(memreg, p);
  		*(void **)retp = p;
! 		return (0);
  	}
  	if (nomore) {
  		CDB___db_err(dbmp->dbenv,
  	    "Unable to allocate %lu bytes from mpool shared region: %s\n",
  		    (u_long)len, CDB_db_strerror(ret));
! 		return (ret);
  	}
  
! retry:	/* Find a buffer we can flush; pure LRU. */
! 	restart = total = 0;
  	for (bhp =
  	    SH_TAILQ_FIRST(&mc->bhq, __bh); bhp != NULL; bhp = nbhp) {
  		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
  
  		/* Ignore pinned or locked (I/O in progress) buffers. */
  		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
  			continue;
--- 137,172 ----
  		if (offsetp != NULL)
  			*offsetp = R_OFFSET(memreg, p);
  		*(void **)retp = p;
! #ifdef HEAVY_DEBUG
! 		fprintf(stderr,"D %d C %d\n",
! 			mc->stat.st_page_dirty, mc->stat.st_page_clean);
! #endif
! 		if (mc->stat.st_page_dirty > 2*mc->stat.st_page_clean) {
! 		    /* if over 2/3 pages dirty, write some out */
! 		    /* since "write" with compression on can itself */
! 		    /* need to allocate memory... */
! 		    CDB___memp_clean_page(dbmp, memreg);
! 		}
! 		ret = 0;
! 		goto err;
  	}
  	if (nomore) {
  		CDB___db_err(dbmp->dbenv,
  	    "Unable to allocate %lu bytes from mpool shared region: %s\n",
  		    (u_long)len, CDB_db_strerror(ret));
! 		goto err;
  	}
  
! retry:	/* Find a buffer we can flush; LRU, skipping unwritable dirty pages */
! 	restart = 0;
! 	total = 0;
! 	pages_reviewed = 0;
  	for (bhp =
  	    SH_TAILQ_FIRST(&mc->bhq, __bh); bhp != NULL; bhp = nbhp) {
  		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
  
+ 		++pages_reviewed;
+ 
  		/* Ignore pinned or locked (I/O in progress) buffers. */
  		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
  			continue;
***************
*** 84,95 ****
  
  		/* Write the page if it's dirty. */
  		if (F_ISSET(bhp, BH_DIRTY)) {
  			++bhp->ref;
! 			if ((ret = CDB___memp_bhwrite(dbmp,
! 			    bh_mfp, bhp, &restart, &wrote)) != 0)
! 				return (ret);
  			--bhp->ref;
  
  			/*
  			 * Another process may have acquired this buffer and
  			 * incremented the ref count after we wrote it.
--- 176,229 ----
  
  		/* Write the page if it's dirty. */
  		if (F_ISSET(bhp, BH_DIRTY)) {
+ 		        /* If called from within  CDB___memp_cmpr_write()... */
+ 		        if (dbmp->recursion_level > 1)
+ 			    continue;
+ 
  			++bhp->ref;
! 			ret = CDB___memp_bhwrite(dbmp,
! 			    bh_mfp, bhp, &restart, &wrote);
  			--bhp->ref;
  
+ 			if (ret != 0) {
+ 				/*
+ 				 * Count the number of writes that have
+ 				 * failed.  If the number of writes that
+ 				 * have failed, total, plus the number
+ 				 * of pages we've reviewed on this pass
+ 				 * equals the number of buffers there
+ 				 * currently are, we've most likely
+ 				 * run out of buffers that are going to
+ 				 * succeed, and it's time to fail.
+ 				 * (We chuck failing buffers to the
+ 				 * end of the list.) [#0637]
+ 				 */
+ 				failed_writes++;
+ #ifdef DEBUG
+ 				fprintf(stderr,"D %d C %d R %d F %d\n",
+ 					mc->stat.st_page_dirty,
+ 					mc->stat.st_page_clean,
+ 					pages_reviewed, failed_writes);
+ #endif
+ 				if (failed_writes + pages_reviewed >=
+ 					mc->stat.st_page_dirty +
+ 					mc->stat.st_page_clean)
+ 				    goto err;
+ 
+ 				/*
+ 				 * Otherwise, relocate this buffer
+ 				 * to the end of the LRU queue
+ 				 * so we're less likely to encounter
+ 				 * it again, and try again.
+ 				 */
+ 				SH_TAILQ_REMOVE(&mc->bhq, bhp, q, __bh);
+ 				SH_TAILQ_INSERT_TAIL(&mc->bhq, bhp, q);
+ #ifdef DEBUG
+ 				fprintf(stderr,"Retrying...\n");
+ #endif
+ 				goto retry;
+ 			}
+ 
  			/*
  			 * Another process may have acquired this buffer and
  			 * incremented the ref count after we wrote it.
***************
*** 130,136 ****
  			if (offsetp != NULL)
  				*offsetp = R_OFFSET(memreg, bhp);
  			*(void **)retp = bhp;
! 			return (0);
  		}
  
  		/* Note how much space we've freed, and free the buffer. */
--- 264,283 ----
  			if (offsetp != NULL)
  				*offsetp = R_OFFSET(memreg, bhp);
  			*(void **)retp = bhp;
! 
! #ifdef HEAVY_DEBUG
! 			fprintf(stderr,"d %d c %d\n",
! 				mc->stat.st_page_dirty, mc->stat.st_page_clean);
! #endif
! 			if (mc->stat.st_page_dirty > 2*mc->stat.st_page_clean) {
! 			    /* if over 2/3 pages dirty, write some out */
! 			    /* since "write" with compression on can itself */
! 			    /* need to allocate memory... */
! 			    CDB___memp_clean_page(dbmp, memreg);
! 			}
! 
! 			ret = 0;
! 			goto err;
  		}
  
  		/* Note how much space we've freed, and free the buffer. */
***************
*** 151,154 ****
--- 298,359 ----
  	}
  	nomore = 1;
  	goto alloc;
+ err:
+ 	dbmp->recursion_level--;
+ 	return (ret);
+ }
+ 
+ 
+ /*
+  * CDB___memp_clean_page --
+  *	Find dirty pages in MPOOL region and flush them.  Doesn't flush all,
+  *	but ensures sufficient clean pages that compressed writes will not
+  *	be stranded if the compression process needs to allocate memory.
+  *
+  *	I think  CDB_memp_trickle()  was intended for this purpose, but it
+  *	seems not to be in use.  See also  CDB_memp_sync().
+  *
+  * PRIVATE: int CDB___memp_clean_page __P((DB_MPOOL *, REGINFO *, MPOOLFILE *));
+  */
+ int
+ CDB___memp_clean_page(dbmp, memreg)
+ 	DB_MPOOL *dbmp;
+ 	REGINFO *memreg;
+ {
+ 	BH *bhp, *nbhp;
+ 	MCACHE *mc;
+ 	MPOOLFILE *bh_mfp;
+ 	int restart, cleaned, wrote;
+ 
+ 	mc = memreg->primary;
+ 	cleaned = 0;
+ #ifdef DEBUG
+ 	fprintf(stderr,"Trying to clean %d pages (%d already clean)\n",
+ 		mc->stat.st_page_dirty, mc->stat.st_page_clean);
+ #endif
+ 
+ 	for (bhp = SH_TAILQ_FIRST(&mc->bhq, __bh); bhp != NULL; bhp = nbhp) {
+ 
+ 	    nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+ 
+ 	    /* Ignore pinned or locked (I/O in progress) buffers. */
+ 	    if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
+ 		    continue;
+ 
+ 	    /* Find the associated MPOOLFILE. */
+ 	    bh_mfp = R_ADDR(&dbmp->reginfo, bhp->mf_offset);
+ 
+ 	    /* Only write the page if it's dirty. */
+ 	    if (!F_ISSET(bhp, BH_DIRTY))
+ 		continue;
+ 
+ 	    ++bhp->ref;		// are these needed?  I just copied from above!
+ 	    if (!CDB___memp_bhwrite(dbmp, bh_mfp, bhp, &restart, &wrote))
+ 		cleaned++;
+ 	    --bhp->ref;
+     }
+ #ifdef DEBUG
+ 	fprintf(stderr,"Cleaned %d\n", cleaned);
+ #endif
+     return (0);
  }
diff -cr /home/lha/devel/htdig/cvs/htdig/db/mp_cmpr.c db/mp_cmpr.c
*** /home/lha/devel/htdig/cvs/htdig/db/mp_cmpr.c	Mon Oct 28 02:47:55 2002
--- db/mp_cmpr.c	Thu Apr 24 23:41:30 2003
***************
*** 3,9 ****
--- 3,33 ----
   *
   * Copyright (c) 1999, 2000
   *	Loic Dachary.  All rights reserved.
+  *
+  * Overview of the code (by Lachlan Andrew, lha@users.sourceforge.net):
+  *
+  * This code compresses pages on-the-fly, either using a built-in algorithm,
+  * or using the  zlib  library.  The compressed page is stored in pages of
+  * size  CMPR_MULTIPLY(db_io->pagesize)  -- a fixed multiple of the true
+  * page size,  db_io->pagesize.  If the compressed page requires multiple
+  * pages, extra pages are allocated at the end of the file, and "chained"
+  * on to the original page.  The chain is specified as an array in the first
+  * page (not a linked list).  If a subsequent write of the page requires
+  * a shorter chain, the spare pages are recorded as "free" and listed in
+  * the weak-compression database (with suffix given by DB_CMPR_SUFFIX).
+  *
+  * When writing a compressed page, extra memory may need to be allocated if
+  * chaining occurs.  This can cause recursive calls to  CDB___memp_alloc(),
+  * since the latter may write dirty cache pages to satisfy the request.
+  * There is currently an explicit check for recursive calls, both in
+  * CDB___memp_alloc()  and  CDB___memp_cmpr_write(), but a more elegant
+  * solution would be nice.
+  *
+  * There also seems to be an issue with the memory allocation for the chain
+  * array.  The small allocations seem to cause fragmentation in the memory
+  * pool (seen as very many small clean blocks, which don't go away).
   * 
+  *
   * TODO:
   *   Keith Bostic says:
   *   The only change I'd probably think about is if
***************
*** 44,51 ****
  
  #ifndef NO_SYSTEM_INCLUDES
  #include <sys/types.h>
- 
  #include <errno.h>
  #endif
  
  #include "db_int.h"
--- 68,75 ----
  
  #ifndef NO_SYSTEM_INCLUDES
  #include <sys/types.h>
  #include <errno.h>
+ #include <string.h>
  #endif
  
  #include "db_int.h"
***************
*** 268,273 ****
--- 292,298 ----
        }
        /*
         * Keep the chain in buffer header.
+        * Freed when  bhp  freed in  CDB___memp_bhfree().
         */
        CDB___memp_cmpr_alloc_chain(dbmfp->dbmp, bhp, BH_CMPR_POOL);
  
***************
*** 405,410 ****
--- 430,440 ----
       * overflow! the compressed buffer is too big -> get extra page
       */
      if(length > copy_length) {
+       if (dbmfp->dbmp->recursion_level >= 2 ) {
+ 	  fprintf(stderr,"CDB___memp_cmpr_write: Wanted %d > %d bytes\n", length, copy_length);
+ 	  ret = EBUSY;
+ 	  goto err;
+       }
        chain_length++;
        if(chain_length >= CMPR_MAX) {
  	  CDB___db_err(dbmfp->dbmp->dbenv, "CDB___memp_cmpr_write: chain_length overflow");
diff -cr /home/lha/devel/htdig/cvs/htdig/db/mp_region.c db/mp_region.c
*** /home/lha/devel/htdig/cvs/htdig/db/mp_region.c	Sun Feb  3 05:18:05 2002
--- db/mp_region.c	Thu Apr 24 23:39:06 2003
***************
*** 60,65 ****
--- 60,68 ----
  	TAILQ_INIT(&dbmp->dbmfq);
  	dbmp->dbenv = dbenv;
  
+ 	/* Clear locking for avoiding mp_alloc recursion */
+ 	dbmp->recursion_level = 0;
+ 
  	/*
  	 * Join/create the mpool region.  If this is a local region we don't
  	 * need much space because the most we'll store there is the pair of
