From 4347571f74648938e345d3e43e8ecf1ee0801888 Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Fri, 23 Aug 2002 17:55:33 +0000 Subject: [PATCH] * Makefile.in (LIBMINOR): Bump due to addition of bt_rseq() * hash/hash_debug.c: Remove inclusion of compat.h, as we don't have it in our build system. * btree/extern.h: Add missing prototypes/renames for __bt_dmpage(). Add renames for bt_rseq() support functions. * btree/bt_seq.c (bt_rseq): New function; like __bt_seq() but does recursive descent rather than using the prev/next pointers. This will catch some pages that might be missed if the database is inconsistent. Added support functions for bt_rseq() as well. * btree/bt_page.c (__bt_free): Set B_METADIRTY when updating free list. (__bt_new): Set B_METADIRTY when updating free list. [patch from www.sleepycat.com] * btree/bt_debug.c (__bt_dump): Bound loop by number of pages actually in file to avoid getting a nigh-infinite number of all-zeroes pages. (__bt_dmpage): Print a newline after dumping the meta page. (__bt_dpage): Add DB* parameter; use this to get pagesize in order to limit dumping of page contents, in case NEXTINDEX(h) happens to be bogus. (__bt_stat): Bound loop by number of pages actually in file so as to stop counting pages after the actual end of file. * btree/bt_close.c (__bt_sync): Apply a Kerbnet fix from long ago; don't return prematurely when B_METADIRTY is set but B_MODIFIED is clear. git-svn-id: svn://anonsvn.mit.edu/krb5/trunk@14752 dc483132-0cff-0310-8789-dd5450dbe970 --- src/util/db2/ChangeLog | 34 +++ src/util/db2/Makefile.in | 2 +- src/util/db2/btree/bt_close.c | 3 +- src/util/db2/btree/bt_debug.c | 21 +- src/util/db2/btree/bt_page.c | 2 + src/util/db2/btree/bt_seq.c | 410 +++++++++++++++++++++++++++++++++ src/util/db2/btree/extern.h | 13 +- src/util/db2/hash/hash_debug.c | 1 - 8 files changed, 474 insertions(+), 12 deletions(-) diff --git a/src/util/db2/ChangeLog b/src/util/db2/ChangeLog index 7c787e20b..520b270c1 100644 --- a/src/util/db2/ChangeLog +++ b/src/util/db2/ChangeLog @@ -1,3 +1,37 @@ +2002-08-23 Tom Yu + + * Makefile.in (LIBMINOR): Bump due to addition of bt_rseq(). + + * hash/hash_debug.c: Remove inclusion of compat.h, as we don't + have it in our build system. + + * btree/extern.h: Add missing prototypes/renames for + __bt_dmpage(). Add renames for bt_rseq() support functions. + + * btree/bt_seq.c (bt_rseq): New function; like __bt_seq() but does + recursive descent rather than using the prev/next pointers. This + will catch some pages that might be missed if the database is + inconsistent. Added support functions for bt_rseq() as well. + + * btree/bt_page.c (__bt_free): Set B_METADIRTY when updating free + list. + (__bt_new): Set B_METADIRTY when updating free list. + [patch from www.sleepycat.com] + + * btree/bt_debug.c (__bt_dump): Bound loop by number of pages + actually in file to avoid getting a nigh-infinite number of + all-zeroes pages. + (__bt_dmpage): Print a newline after dumping the meta page. + (__bt_dpage): Add DB* parameter; use this to get pagesize in order + to limit dumping of page contents, in case NEXTINDEX(h) happens to + be bogus. + (__bt_stat): Bound loop by number of pages actually in file so as + to stop counting pages after the actual end of file. + + * btree/bt_close.c (__bt_sync): Apply a Kerbnet fix from long ago; + don't return prematurely when B_METADIRTY is set but B_MODIFIED is + clear. + 2002-08-14 Ken Raeburn * Makefile.in (SUBDIROBJLISTS): New variable. diff --git a/src/util/db2/Makefile.in b/src/util/db2/Makefile.in index 1143db8fd..a1e2975ce 100644 --- a/src/util/db2/Makefile.in +++ b/src/util/db2/Makefile.in @@ -6,7 +6,7 @@ LOCAL_SUBDIRS=hash btree db mpool recno clib test LIB=db LIBMAJOR=1 -LIBMINOR=0 +LIBMINOR=1 STOBJLISTS=hash/OBJS.ST btree/OBJS.ST db/OBJS.ST mpool/OBJS.ST \ recno/OBJS.ST clib/OBJS.ST SUBDIROBJLISTS=$(STOBJLISTS) diff --git a/src/util/db2/btree/bt_close.c b/src/util/db2/btree/bt_close.c index b731dcb5d..11be13411 100644 --- a/src/util/db2/btree/bt_close.c +++ b/src/util/db2/btree/bt_close.c @@ -137,7 +137,8 @@ __bt_sync(dbp, flags) return (RET_ERROR); } - if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) + if (F_ISSET(t, B_INMEM | B_RDONLY) + || !F_ISSET(t, B_MODIFIED | B_METADIRTY)) return (RET_SUCCESS); if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) diff --git a/src/util/db2/btree/bt_debug.c b/src/util/db2/btree/bt_debug.c index 8cf1cdaf5..d36256b3a 100644 --- a/src/util/db2/btree/bt_debug.c +++ b/src/util/db2/btree/bt_debug.c @@ -114,10 +114,9 @@ __bt_dump(dbp) (void)fprintf(tracefp, ")\n"); } #undef X - - for (i = P_ROOT; + for (i = P_ROOT; i < t->bt_mp->npages && (h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i) - __bt_dpage(h); + __bt_dpage(dbp, h); (void)fflush(tracefp); return (0); } @@ -156,6 +155,7 @@ __bt_dmpage(h) X(R_RECNO, "RECNO"); (void)fprintf(tracefp, ")"); } + (void)fprintf(tracefp, "\n"); (void)fflush(tracefp); return (0); } @@ -178,7 +178,7 @@ __bt_dnpage(dbp, pgno) t = dbp->internal; if ((h = mpool_get(t->bt_mp, pgno, MPOOL_IGNOREPIN)) != NULL) - __bt_dpage(h); + __bt_dpage(dbp, h); (void)fflush(tracefp); return (0); } @@ -190,14 +190,16 @@ __bt_dnpage(dbp, pgno) * h: pointer to the PAGE */ int -__bt_dpage(h) +__bt_dpage(dbp, h) + DB *dbp; PAGE *h; { BINTERNAL *bi; BLEAF *bl; RINTERNAL *ri; RLEAF *rl; - indx_t cur, top; + u_long pgsize; + indx_t cur, top, lim; char *sep; __bt_dinit(); @@ -223,10 +225,13 @@ __bt_dpage(h) if (h->flags & P_OVERFLOW) return; + pgsize = ((BTREE *)dbp->internal)->bt_mp->pagesize; + lim = (pgsize - BTDATAOFF) / sizeof(indx_t); top = NEXTINDEX(h); + lim = top > lim ? lim : top; (void)fprintf(tracefp, " lower %3d upper %3d nextind %d\n", h->lower, h->upper, top); - for (cur = 0; cur < top; cur++) { + for (cur = 0; cur < lim; cur++) { (void)fprintf(tracefp, "\t[%03d] %4d ", cur, h->linp[cur]); switch (h->flags & P_TYPE) { case P_BINTERNAL: @@ -307,7 +312,7 @@ __bt_stat(dbp) t = dbp->internal; pcont = pinternal = pleaf = 0; nkeys = ifree = lfree = 0; - for (i = P_ROOT; + for (i = P_ROOT; i < t->bt_mp->npages && (h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i) switch (h->flags & P_TYPE) { case P_BINTERNAL: diff --git a/src/util/db2/btree/bt_page.c b/src/util/db2/btree/bt_page.c index cb65040a7..3663cf7f9 100644 --- a/src/util/db2/btree/bt_page.c +++ b/src/util/db2/btree/bt_page.c @@ -65,6 +65,7 @@ __bt_free(t, h) h->prevpg = P_INVALID; h->nextpg = t->bt_free; t->bt_free = h->pgno; + F_SET(t, B_METADIRTY); /* Make sure the page gets written back. */ return (mpool_put(t->bt_mp, h, MPOOL_DIRTY)); @@ -92,6 +93,7 @@ __bt_new(t, npg) (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { *npg = t->bt_free; t->bt_free = h->nextpg; + F_SET(t, B_METADIRTY); return (h); } return (mpool_new(t->bt_mp, npg, MPOOL_PAGE_NEXT)); diff --git a/src/util/db2/btree/bt_seq.c b/src/util/db2/btree/bt_seq.c index d3924d862..d691a62ad 100644 --- a/src/util/db2/btree/bt_seq.c +++ b/src/util/db2/btree/bt_seq.c @@ -1,3 +1,27 @@ +/* + * Copyright (C) 2002 by the Massachusetts Institute of Technology. + * All rights reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + */ + /*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. @@ -488,3 +512,389 @@ __bt_setcur(t, pgno, idx) t->bt_cursor.pg.index = idx; F_SET(&t->bt_cursor, CURS_INIT); } + +/* Recursive descent cursor. */ +typedef struct rcursor_ { + CURSOR cursor; + size_t ssize; + EPGNO *stack; + EPGNO *sp; +} RCURSOR; +#define RCURSOR_MINSS 64 + +static int bt_rcinit(void **); +static void bt_rcdestroy(void **); +static int bt_rcpush(RCURSOR *, db_pgno_t, u_int); +static EPGNO *bt_rcpop(RCURSOR *); +static void bt_rcclr(RCURSOR *); +static int bt_rcgrowstk(RCURSOR *); +static int bt_rseqset(BTREE *, EPG *, DBT *, RCURSOR *, int); +static int bt_rseqadv(BTREE *, EPG *, RCURSOR *, int); + +static int +bt_rcinit(curs) + void **curs; +{ + RCURSOR *rc; + + rc = *curs = malloc(sizeof(RCURSOR)); + if (rc == NULL) { + errno = ENOMEM; + return RET_ERROR; + } + memset(rc, 0, sizeof(*rc)); + + rc->ssize = RCURSOR_MINSS; + rc->stack = malloc(rc->ssize * sizeof(EPGNO)); + if (rc->stack == NULL) { + free(rc); + errno = ENOMEM; + return RET_ERROR; + } + bt_rcclr(rc); + return RET_SUCCESS; +} + +static void +bt_rcdestroy(curs) + void **curs; +{ + RCURSOR *rc; + + rc = *curs; + free(rc->stack); + free(rc); + *curs = NULL; +} + +static int +bt_rcpush(rc, p, i) + RCURSOR *rc; + db_pgno_t p; + u_int i; +{ + int status; + + rc->sp->pgno = p; + rc->sp->index = i; + if (++rc->sp > rc->stack + rc->ssize) { + status = bt_rcgrowstk(rc); + if (status != RET_SUCCESS) + return status; + } + return RET_SUCCESS; +} + +static EPGNO * +bt_rcpop(rc) + RCURSOR *rc; +{ + return (rc->sp == rc->stack) ? NULL : --rc->sp; +} + +static void +bt_rcclr(rc) + RCURSOR *rc; +{ + rc->sp = rc->stack; +} + +static int +bt_rcgrowstk(rc) + RCURSOR *rc; +{ + size_t osize; + EPGNO *e; + + osize = rc->ssize; + rc->ssize *= 2; + e = realloc(rc->stack, rc->ssize * sizeof(EPGNO)); + if (e == NULL) { + rc->ssize = osize; + errno = ENOMEM; + return RET_ERROR; + } + rc->stack = e; + return RET_SUCCESS; +} + +/* + * bt_rseq -- + * Like __bt_seq but does recursive descent tree traversal + * instead of using the prev/next pointers. + */ +int +bt_rseq(dbp, key, data, curs, flags) + const DB *dbp; + DBT *key, *data; + void **curs; + u_int flags; +{ + RCURSOR *rc; + BTREE *t; + EPG e; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (curs == NULL) { + errno = EINVAL; + return RET_ERROR; + } + if (*curs == NULL) { + status = bt_rcinit(curs); + if (status != RET_SUCCESS) + return RET_ERROR; + } + rc = *curs; + + /* + * If scan unitialized as yet, or starting at a specific record, set + * the scan to a specific key. Both bt_rseqset and bt_rseqadv pin + * the page the cursor references if they're successful. + */ + switch (flags) { + case R_NEXT: + case R_PREV: + if (F_ISSET(&rc->cursor, CURS_INIT)) { + status = bt_rseqadv(t, &e, rc, flags); + break; + } + /* FALLTHROUGH */ + case R_FIRST: + case R_LAST: + case R_CURSOR: + status = bt_rseqset(t, &e, key, rc, flags); + break; + default: + errno = EINVAL; + return (RET_ERROR); + } + + if (status == RET_SUCCESS) { + status = + __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); + + /* + * If the user is doing concurrent access, we copied the + * key/data, toss the page. + */ + if (F_ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e.page, 0); + else + t->bt_pinned = e.page; + } else if (status == RET_SPECIAL) + bt_rcdestroy(curs); + return (status); +} + +/* + * bt_rseqset -- + * Set the sequential scan to a specific key. + * + * Parameters: + * t: tree + * ep: storage for returned key + * key: key for initial scan position + * rc: recursion cursor + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV + * + * Side effects: + * Pins the page the cursor references. + * Updates rc's stack and cursor. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +bt_rseqset(t, ep, key, rc, flags) + BTREE *t; + EPG *ep; + DBT *key; + RCURSOR *rc; + int flags; +{ + PAGE *h; + db_pgno_t pg; + int status; + + /* + * Find the first, last or specific key in the tree and point the + * cursor at it. The cursor may not be moved until a new key has + * been found. + */ + switch (flags) { + case R_CURSOR: /* Not implemented. */ + errno = EINVAL; + return RET_ERROR; + case R_FIRST: /* First record. */ + case R_NEXT: + bt_rcclr(rc); + /* Walk down the left-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, 0)->pgno; + status = bt_rcpush(rc, h->pgno, 0); + mpool_put(t->bt_mp, h, 0); + if (status != RET_SUCCESS) + return status; + } + ep->page = h; + ep->index = 0; + break; + case R_LAST: /* Last record. */ + case R_PREV: + bt_rcclr(rc); + /* Walk down the right-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; + status = bt_rcpush(rc, h->pgno, NEXTINDEX(h) - 1); + mpool_put(t->bt_mp, h, 0); + if (status != RET_SUCCESS) + return status; + } + ep->page = h; + ep->index = NEXTINDEX(h) - 1; + break; + } + rc->cursor.pg.pgno = ep->page->pgno; + rc->cursor.pg.index = ep->index; + F_CLR(&rc->cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); + F_SET(&rc->cursor, CURS_INIT); + return (RET_SUCCESS); +} + +/* + * bt_rseqadvance -- + * Advance the sequential scan. + * + * Parameters: + * t: tree + * ep: return page + * rc: recursion cursor + * flags: R_NEXT, R_PREV + * + * Side effects: + * Pins the page the new key/data record is on. + * Updates rc's stack and cursor. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +bt_rseqadv(t, ep, rc, flags) + BTREE *t; + EPG *ep; + RCURSOR *rc; + int flags; +{ + CURSOR *c; + PAGE *h; + indx_t idx; + db_pgno_t pg; + int status; + EPGNO *e; + + /* + * There are a couple of states that we can be in. The cursor has + * been initialized by the time we get here, but that's all we know. + */ + c = &rc->cursor; + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * Find the next/previous record in the tree and point the cursor at + * it. The cursor may not be moved until a new key has been found. + */ + switch (flags) { + case R_NEXT: /* Next record. */ + idx = c->pg.index; + while (++idx == NEXTINDEX(h)) { + /* Crawl up if we hit the right edge. */ + e = bt_rcpop(rc); + mpool_put(t->bt_mp, h, 0); + if (e == NULL) /* Hit the right edge of root. */ + return RET_SPECIAL; + idx = e->index; + pg = e->pgno; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + while (!(h->flags & (P_BLEAF | P_RLEAF))) { + /* Crawl down the left until we hit a leaf. */ + status = bt_rcpush(rc, h->pgno, idx); + pg = GETBINTERNAL(h, idx)->pgno; + mpool_put(t->bt_mp, h, 0); + if (status != RET_SUCCESS) + return status; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + idx = 0; + } + break; + case R_PREV: /* Previous record. */ + idx = c->pg.index; + while (!idx) { + /* Crawl up if we hit the left edge. */ + e = bt_rcpop(rc); + mpool_put(t->bt_mp, h, 0); + if (e == NULL) /* Hit the left edge of root. */ + return RET_SPECIAL; + idx = e->index; + pg = e->pgno; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + idx--; + while (!(h->flags & (P_BLEAF | P_RLEAF))) { + /* Crawl down the right until we hit a leaf. */ + status = bt_rcpush(rc, h->pgno, idx); + pg = GETBINTERNAL(h, idx)->pgno; + mpool_put(t->bt_mp, h, 0); + if (status != RET_SUCCESS) + return status; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + idx = NEXTINDEX(h) - 1; + } + break; + } + + ep->page = h; + ep->index = idx; + c->pg.pgno = h->pgno; + c->pg.index = idx; + F_CLR(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); + F_SET(c, CURS_INIT); + return (RET_SUCCESS); +} diff --git a/src/util/db2/btree/extern.h b/src/util/db2/btree/extern.h index 70a88072b..3aa88417e 100644 --- a/src/util/db2/btree/extern.h +++ b/src/util/db2/btree/extern.h @@ -58,10 +58,20 @@ #define __ovfl_get __kdb2_ovfl_get #define __ovfl_put __kdb2_ovfl_put #define __bt_dnpage __kdb2_bt_dnpage +#define __bt_dmpage __kdb2_bt_dmpage #define __bt_dpage __kdb2_bt_dpage #define __bt_dump __kdb2_bt_dump #define __bt_stat __kdb2_bt_stat +#define bt_rcinit kdb2_bt_rcinit +#define bt_rcdestroy kdb2_bt_rcdestroy +#define bt_rcpush kdb2_bt_rcpush +#define bt_rcpop kdb2_bt_rcpop +#define bt_rcclr kdb2_bt_rcclr +#define bt_rcgrowstk kdb2_bt_rcgrowstk +#define bt_rseqset kdb2_bt_rseqset +#define bt_rseqadv kdb2_bt_rseqadv + int __bt_close __P((DB *)); int __bt_cmp __P((BTREE *, const DBT *, EPG *)); int __bt_crsrdel __P((BTREE *, EPGNO *)); @@ -91,7 +101,8 @@ int __ovfl_put __P((BTREE *, const DBT *, db_pgno_t *)); #ifdef DEBUG int __bt_dnpage __P((DB *, db_pgno_t)); -int __bt_dpage __P((PAGE *)); +int __bt_dpage __P((DB *, PAGE *)); +int __bt_dmpage __P((PAGE *)); int __bt_dump __P((DB *)); #endif #ifdef STATISTICS diff --git a/src/util/db2/hash/hash_debug.c b/src/util/db2/hash/hash_debug.c index ed99c6932..69229fc8d 100644 --- a/src/util/db2/hash/hash_debug.c +++ b/src/util/db2/hash/hash_debug.c @@ -56,7 +56,6 @@ static char sccsid[] = "@(#)hash_debug.c 8.4 (Berkeley) 11/7/95"; #include "hash.h" #include "page.h" #include "extern.h" -#include "compat.h" void __dump_bucket(hashp, bucket) -- 2.26.2