00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 #include "db_config.h"
00042
00043 #ifndef NO_SYSTEM_INCLUDES
00044 #include <sys/types.h>
00045
00046 #include <string.h>
00047 #endif
00048
00049 #include "db_int.h"
00050 #include "dbinc/db_page.h"
00051 #ifndef HAVE_FTRUNCATE
00052 #include "dbinc/db_shash.h"
00053 #endif
00054 #include "dbinc/hash.h"
00055 #ifndef HAVE_FTRUNCATE
00056 #include "dbinc/lock.h"
00057 #include "dbinc/mp.h"
00058 #endif
00059 #include "dbinc/log.h"
00060 #include "dbinc/fop.h"
00061 #include "dbinc/txn.h"
00062
00063 #ifndef HAVE_FTRUNCATE
00064 static int __db_limbo_fix __P((DB *, DB_TXN *,
00065 DB_TXNLIST *, db_pgno_t *, DBMETA *, db_limbo_state));
00066 static int __db_limbo_bucket __P((DB_ENV *,
00067 DB_TXN *, DB_TXNLIST *, db_limbo_state));
00068 static int __db_limbo_move __P((DB_ENV *, DB_TXN *, DB_TXN *, DB_TXNLIST *));
00069 static int __db_limbo_prepare __P(( DB *, DB_TXN *, DB_TXNLIST *));
00070 static int __db_lock_move __P((DB_ENV *,
00071 u_int8_t *, db_pgno_t, db_lockmode_t, DB_TXN *, DB_TXN *));
00072 static int __db_txnlist_pgnoadd __P((DB_ENV *, DB_TXNHEAD *,
00073 int32_t, u_int8_t *, char *, db_pgno_t));
00074 #endif
00075 static int __db_txnlist_find_internal __P((DB_ENV *, DB_TXNHEAD *,
00076 db_txnlist_type, u_int32_t, u_int8_t *, DB_TXNLIST **,
00077 int, u_int32_t *));
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 int
00093 __db_dispatch(dbenv, dtab, dtabsize, db, lsnp, redo, info)
00094 DB_ENV *dbenv;
00095 int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
00096 size_t dtabsize;
00097 DBT *db;
00098 DB_LSN *lsnp;
00099 db_recops redo;
00100 DB_TXNHEAD *info;
00101 {
00102 DB_LSN prev_lsn;
00103 u_int32_t rectype, status, txnid;
00104 int make_call, ret;
00105
00106 memcpy(&rectype, db->data, sizeof(rectype));
00107 memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
00108 make_call = ret = 0;
00109
00110
00111 DB_ASSERT(dtab != NULL);
00112
00113
00114
00115
00116
00117
00118
00119 switch (redo) {
00120 case DB_TXN_ABORT:
00121 case DB_TXN_APPLY:
00122 case DB_TXN_PRINT:
00123 make_call = 1;
00124 break;
00125 case DB_TXN_OPENFILES:
00126
00127
00128
00129
00130
00131
00132
00133 memcpy(&prev_lsn, (u_int8_t *)db->data +
00134 sizeof(rectype) + sizeof(txnid), sizeof(prev_lsn));
00135 if (txnid != 0 && prev_lsn.file == 0 && (ret =
00136 __db_txnlist_add(dbenv, info, txnid, TXN_OK, NULL)) != 0)
00137 return (ret);
00138
00139
00140 case DB_TXN_POPENFILES:
00141 if (rectype == DB___dbreg_register ||
00142 rectype == DB___txn_child ||
00143 rectype == DB___txn_ckp || rectype == DB___txn_recycle)
00144 return (dtab[rectype](dbenv, db, lsnp, redo, info));
00145 break;
00146 case DB_TXN_BACKWARD_ROLL:
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169 switch (rectype) {
00170
00171
00172
00173
00174
00175 case DB___txn_regop:
00176 case DB___txn_recycle:
00177 case DB___txn_ckp:
00178 make_call = 1;
00179 break;
00180
00181
00182
00183
00184 case DB___txn_child:
00185 case DB___db_noop:
00186 case DB___fop_file_remove:
00187 case DB___dbreg_register:
00188 make_call = 1;
00189
00190
00191 default:
00192 if (txnid == 0)
00193 break;
00194
00195 ret = __db_txnlist_find(dbenv, info, txnid, &status);
00196
00197
00198 if (ret == DB_NOTFOUND)
00199 return (__db_txnlist_add(dbenv,
00200 info, txnid, TXN_IGNORE, lsnp));
00201 if (ret != 0)
00202 return (ret);
00203
00204
00205
00206
00207
00208
00209
00210 if (status == TXN_IGNORE && rectype != DB___txn_child) {
00211 make_call = 0;
00212 break;
00213 }
00214 if (status == TXN_COMMIT)
00215 break;
00216
00217
00218 make_call = 1;
00219 if (status == TXN_OK &&
00220 (ret = __db_txnlist_update(dbenv,
00221 info, txnid, rectype == DB___txn_xa_regop ?
00222 TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0)
00223 return (ret);
00224 }
00225 break;
00226 case DB_TXN_FORWARD_ROLL:
00227
00228
00229
00230
00231
00232
00233
00234 switch (rectype) {
00235 case DB___txn_recycle:
00236 case DB___txn_ckp:
00237 case DB___db_noop:
00238 make_call = 1;
00239 break;
00240
00241 default:
00242 if (txnid == 0)
00243 status = 0;
00244 else {
00245 ret = __db_txnlist_find(dbenv,
00246 info, txnid, &status);
00247
00248 if (ret == DB_NOTFOUND)
00249
00250 ;
00251 else if (ret != 0)
00252 return (ret);
00253 else if (status == TXN_COMMIT) {
00254 make_call = 1;
00255 break;
00256 }
00257 }
00258
00259 #ifndef HAVE_FTRUNCATE
00260 if (status != TXN_IGNORE &&
00261 (rectype == DB___ham_metagroup ||
00262 rectype == DB___ham_groupalloc ||
00263 rectype == DB___db_pg_alloc)) {
00264
00265
00266
00267
00268
00269
00270
00271 make_call = 1;
00272 redo = DB_TXN_BACKWARD_ALLOC;
00273 } else
00274 #endif
00275 if (rectype == DB___dbreg_register) {
00276
00277
00278
00279
00280
00281
00282
00283 if (txnid == 0)
00284 make_call = 1;
00285 }
00286 }
00287 break;
00288 case DB_TXN_BACKWARD_ALLOC:
00289 default:
00290 return (__db_unknown_flag(
00291 dbenv, "__db_dispatch", (u_int32_t)redo));
00292 }
00293
00294 if (make_call) {
00295
00296
00297
00298
00299
00300
00301
00302
00303 if (rectype & DB_debug_FLAG) {
00304 if (redo == DB_TXN_PRINT)
00305 rectype &= ~DB_debug_FLAG;
00306 else {
00307 memcpy(lsnp,
00308 (u_int8_t *)db->data +
00309 sizeof(rectype) +
00310 sizeof(txnid), sizeof(*lsnp));
00311 return (0);
00312 }
00313 }
00314 if (rectype >= DB_user_BEGIN && dbenv->app_dispatch != NULL)
00315 return (dbenv->app_dispatch(dbenv, db, lsnp, redo));
00316 else {
00317
00318
00319
00320
00321
00322 if (rectype > dtabsize || dtab[rectype] == NULL) {
00323 __db_err(dbenv,
00324 "Illegal record type %lu in log",
00325 (u_long)rectype);
00326 return (EINVAL);
00327 }
00328 return (dtab[rectype](dbenv, db, lsnp, redo, info));
00329 }
00330 }
00331
00332 return (0);
00333 }
00334
00335
00336
00337
00338
00339
00340
00341
00342 int
00343 __db_add_recovery(dbenv, dtab, dtabsize, func, ndx)
00344 DB_ENV *dbenv;
00345 int (***dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
00346 size_t *dtabsize;
00347 int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
00348 u_int32_t ndx;
00349 {
00350 size_t i, nsize;
00351 int ret;
00352
00353
00354 if (ndx >= *dtabsize) {
00355 nsize = ndx + 40;
00356 if ((ret =
00357 __os_realloc(dbenv, nsize * sizeof((*dtab)[0]), dtab)) != 0)
00358 return (ret);
00359 for (i = *dtabsize; i < nsize; ++i)
00360 (*dtab)[i] = NULL;
00361 *dtabsize = nsize;
00362 }
00363
00364 (*dtab)[ndx] = func;
00365 return (0);
00366 }
00367
00368
00369
00370
00371
00372
00373
00374
00375 int
00376 __db_txnlist_init(dbenv, low_txn, hi_txn, trunc_lsn, retp)
00377 DB_ENV *dbenv;
00378 u_int32_t low_txn, hi_txn;
00379 DB_LSN *trunc_lsn;
00380 DB_TXNHEAD **retp;
00381 {
00382 DB_TXNHEAD *headp;
00383 u_int32_t size, tmp;
00384 int ret;
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394 if (low_txn == 0)
00395 size = 1;
00396 else {
00397 if (hi_txn < low_txn) {
00398 tmp = hi_txn;
00399 hi_txn = low_txn;
00400 low_txn = tmp;
00401 }
00402 tmp = hi_txn - low_txn;
00403
00404 if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2)
00405 tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn);
00406 size = tmp / 5;
00407 if (size < 100)
00408 size = 100;
00409 }
00410 if ((ret = __os_malloc(dbenv,
00411 sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0)
00412 return (ret);
00413
00414 memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head));
00415 headp->maxid = hi_txn;
00416 headp->generation = 0;
00417 headp->nslots = size;
00418 headp->gen_alloc = 8;
00419 if ((ret = __os_malloc(dbenv, headp->gen_alloc *
00420 sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) {
00421 __os_free(dbenv, headp);
00422 return (ret);
00423 }
00424 headp->gen_array[0].generation = 0;
00425 headp->gen_array[0].txn_min = TXN_MINIMUM;
00426 headp->gen_array[0].txn_max = TXN_MAXIMUM;
00427 if (trunc_lsn != NULL) {
00428 headp->trunc_lsn = *trunc_lsn;
00429 headp->maxlsn = *trunc_lsn;
00430 } else {
00431 ZERO_LSN(headp->trunc_lsn);
00432 ZERO_LSN(headp->maxlsn);
00433 }
00434 ZERO_LSN(headp->ckplsn);
00435
00436 *retp = headp;
00437 return (0);
00438 }
00439
00440
00441
00442
00443
00444
00445
00446
00447 int
00448 __db_txnlist_add(dbenv, hp, txnid, status, lsn)
00449 DB_ENV *dbenv;
00450 DB_TXNHEAD *hp;
00451 u_int32_t txnid, status;
00452 DB_LSN *lsn;
00453 {
00454 DB_TXNLIST *elp;
00455 int ret;
00456
00457 if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
00458 return (ret);
00459
00460 LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links);
00461
00462 elp->type = TXNLIST_TXNID;
00463 elp->u.t.txnid = txnid;
00464 elp->u.t.status = status;
00465 elp->u.t.generation = hp->generation;
00466 if (txnid > hp->maxid)
00467 hp->maxid = txnid;
00468 if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
00469 hp->maxlsn = *lsn;
00470
00471 DB_ASSERT(lsn == NULL ||
00472 status != TXN_COMMIT || log_compare(&hp->maxlsn, lsn) >= 0);
00473
00474 return (0);
00475 }
00476
00477
00478
00479
00480
00481
00482
00483 int
00484 __db_txnlist_remove(dbenv, hp, txnid)
00485 DB_ENV *dbenv;
00486 DB_TXNHEAD *hp;
00487 u_int32_t txnid;
00488 {
00489 DB_TXNLIST *entry;
00490 u_int32_t status;
00491
00492 return (__db_txnlist_find_internal(dbenv,
00493 hp, TXNLIST_TXNID, txnid, NULL, &entry, 1, &status));
00494 }
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506 void
00507 __db_txnlist_ckp(dbenv, hp, ckp_lsn)
00508 DB_ENV *dbenv;
00509 DB_TXNHEAD *hp;
00510 DB_LSN *ckp_lsn;
00511 {
00512
00513 COMPQUIET(dbenv, NULL);
00514
00515 if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) &&
00516 log_compare(&hp->maxlsn, ckp_lsn) >= 0)
00517 hp->ckplsn = *ckp_lsn;
00518 }
00519
00520
00521
00522
00523
00524
00525
00526 void
00527 __db_txnlist_end(dbenv, hp)
00528 DB_ENV *dbenv;
00529 DB_TXNHEAD *hp;
00530 {
00531 u_int32_t i;
00532 DB_TXNLIST *p;
00533
00534 if (hp == NULL)
00535 return;
00536
00537 for (i = 0; i < hp->nslots; i++)
00538 while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) {
00539 switch (p->type) {
00540 case TXNLIST_LSN:
00541 __os_free(dbenv, p->u.l.lsn_stack);
00542 break;
00543 case TXNLIST_DELETE:
00544 case TXNLIST_PGNO:
00545 case TXNLIST_TXNID:
00546 default:
00547
00548
00549
00550
00551 break;
00552 }
00553 LIST_REMOVE(p, links);
00554 __os_free(dbenv, p);
00555 }
00556
00557 if (hp->gen_array != NULL)
00558 __os_free(dbenv, hp->gen_array);
00559 __os_free(dbenv, hp);
00560 }
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573 int
00574 __db_txnlist_find(dbenv, hp, txnid, statusp)
00575 DB_ENV *dbenv;
00576 DB_TXNHEAD *hp;
00577 u_int32_t txnid, *statusp;
00578 {
00579 DB_TXNLIST *entry;
00580
00581 if (txnid == 0)
00582 return (DB_NOTFOUND);
00583
00584 return (__db_txnlist_find_internal(dbenv, hp,
00585 TXNLIST_TXNID, txnid, NULL, &entry, 0, statusp));
00586 }
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596 int
00597 __db_txnlist_update(dbenv, hp, txnid, status, lsn, ret_status, add_ok)
00598 DB_ENV *dbenv;
00599 DB_TXNHEAD *hp;
00600 u_int32_t txnid, status;
00601 DB_LSN *lsn;
00602 u_int32_t *ret_status;
00603 int add_ok;
00604 {
00605 DB_TXNLIST *elp;
00606 int ret;
00607
00608 if (txnid == 0)
00609 return (DB_NOTFOUND);
00610
00611 ret = __db_txnlist_find_internal(dbenv,
00612 hp, TXNLIST_TXNID, txnid, NULL, &elp, 0, ret_status);
00613
00614 if (ret == DB_NOTFOUND && add_ok) {
00615 *ret_status = status;
00616 return (__db_txnlist_add(dbenv, hp, txnid, status, lsn));
00617 }
00618 if (ret != 0)
00619 return (ret);
00620
00621 if (*ret_status == TXN_IGNORE)
00622 return (0);
00623
00624 elp->u.t.status = status;
00625
00626 if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
00627 hp->maxlsn = *lsn;
00628
00629 return (ret);
00630 }
00631
00632
00633
00634
00635
00636
00637
00638
00639 static int
00640 __db_txnlist_find_internal(dbenv,
00641 hp, type, txnid, uid, txnlistp, delete, statusp)
00642 DB_ENV *dbenv;
00643 DB_TXNHEAD *hp;
00644 db_txnlist_type type;
00645 u_int32_t txnid;
00646 u_int8_t uid[DB_FILE_ID_LEN];
00647 DB_TXNLIST **txnlistp;
00648 int delete;
00649 u_int32_t *statusp;
00650 {
00651 struct __db_headlink *head;
00652 DB_TXNLIST *p;
00653 u_int32_t generation, hash, i;
00654 int ret;
00655
00656 ret = 0;
00657
00658 if (hp == NULL)
00659 return (DB_NOTFOUND);
00660
00661 switch (type) {
00662 case TXNLIST_TXNID:
00663 hash = txnid;
00664
00665 for (i = 0; i <= hp->generation; i++)
00666
00667 if (hp->gen_array[i].txn_min <
00668 hp->gen_array[i].txn_max ?
00669 (txnid >= hp->gen_array[i].txn_min &&
00670 txnid <= hp->gen_array[i].txn_max) :
00671 (txnid >= hp->gen_array[i].txn_min ||
00672 txnid <= hp->gen_array[i].txn_max))
00673 break;
00674 DB_ASSERT(i <= hp->generation);
00675 generation = hp->gen_array[i].generation;
00676 break;
00677 case TXNLIST_PGNO:
00678 memcpy(&hash, uid, sizeof(hash));
00679 generation = 0;
00680 break;
00681 case TXNLIST_DELETE:
00682 case TXNLIST_LSN:
00683 default:
00684 return (__db_panic(dbenv, EINVAL));
00685 }
00686
00687 head = &hp->head[DB_TXNLIST_MASK(hp, hash)];
00688
00689 for (p = LIST_FIRST(head); p != NULL; p = LIST_NEXT(p, links)) {
00690 if (p->type != type)
00691 continue;
00692 switch (type) {
00693 case TXNLIST_TXNID:
00694 if (p->u.t.txnid != txnid ||
00695 generation != p->u.t.generation)
00696 continue;
00697 *statusp = p->u.t.status;
00698 break;
00699
00700 case TXNLIST_PGNO:
00701 if (memcmp(uid, p->u.p.uid, DB_FILE_ID_LEN) != 0)
00702 continue;
00703 break;
00704 case TXNLIST_DELETE:
00705 case TXNLIST_LSN:
00706 default:
00707 return (__db_panic(dbenv, EINVAL));
00708 }
00709 if (delete == 1) {
00710 LIST_REMOVE(p, links);
00711 __os_free(dbenv, p);
00712 *txnlistp = NULL;
00713 } else if (p != LIST_FIRST(head)) {
00714
00715 LIST_REMOVE(p, links);
00716 LIST_INSERT_HEAD(head, p, links);
00717 *txnlistp = p;
00718 } else
00719 *txnlistp = p;
00720 return (ret);
00721 }
00722
00723 return (DB_NOTFOUND);
00724 }
00725
00726
00727
00728
00729
00730
00731
00732
00733 int
00734 __db_txnlist_gen(dbenv, hp, incr, min, max)
00735 DB_ENV *dbenv;
00736 DB_TXNHEAD *hp;
00737 int incr;
00738 u_int32_t min, max;
00739 {
00740 int ret;
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754 if (incr < 0) {
00755 --hp->generation;
00756 memmove(hp->gen_array, &hp->gen_array[1],
00757 (hp->generation + 1) * sizeof(hp->gen_array[0]));
00758 } else {
00759 ++hp->generation;
00760 if (hp->generation >= hp->gen_alloc) {
00761 hp->gen_alloc *= 2;
00762 if ((ret = __os_realloc(dbenv, hp->gen_alloc *
00763 sizeof(hp->gen_array[0]), &hp->gen_array)) != 0)
00764 return (ret);
00765 }
00766 memmove(&hp->gen_array[1], &hp->gen_array[0],
00767 hp->generation * sizeof(hp->gen_array[0]));
00768 hp->gen_array[0].generation = hp->generation;
00769 hp->gen_array[0].txn_min = min;
00770 hp->gen_array[0].txn_max = max;
00771 }
00772 return (0);
00773 }
00774
00775
00776
00777
00778
00779
00780
00781 int
00782 __db_txnlist_lsnadd(dbenv, hp, lsnp)
00783 DB_ENV *dbenv;
00784 DB_TXNHEAD *hp;
00785 DB_LSN *lsnp;
00786 {
00787 DB_TXNLIST *elp;
00788 int ret;
00789
00790 if (IS_ZERO_LSN(*lsnp))
00791 return (0);
00792
00793 for (elp = LIST_FIRST(&hp->head[0]);
00794 elp != NULL; elp = LIST_NEXT(elp, links))
00795 if (elp->type == TXNLIST_LSN)
00796 break;
00797
00798 if (elp == NULL) {
00799 if ((ret = __db_txnlist_lsninit(dbenv, hp, lsnp)) != 0)
00800 return (ret);
00801 return (DB_SURPRISE_KID);
00802 }
00803
00804 if (elp->u.l.stack_indx == elp->u.l.stack_size) {
00805 elp->u.l.stack_size <<= 1;
00806 if ((ret = __os_realloc(dbenv, sizeof(DB_LSN) *
00807 elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) {
00808 __db_txnlist_end(dbenv, hp);
00809 return (ret);
00810 }
00811 }
00812 elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp;
00813
00814 return (0);
00815 }
00816
00817
00818
00819
00820
00821
00822
00823
00824 int
00825 __db_txnlist_lsnget(dbenv, hp, lsnp, flags)
00826 DB_ENV *dbenv;
00827 DB_TXNHEAD *hp;
00828 DB_LSN *lsnp;
00829 u_int32_t flags;
00830 {
00831 DB_TXNLIST *elp;
00832
00833 COMPQUIET(dbenv, NULL);
00834 COMPQUIET(flags, 0);
00835
00836 for (elp = LIST_FIRST(&hp->head[0]);
00837 elp != NULL; elp = LIST_NEXT(elp, links))
00838 if (elp->type == TXNLIST_LSN)
00839 break;
00840
00841 if (elp == NULL || elp->u.l.stack_indx == 0) {
00842 ZERO_LSN(*lsnp);
00843 return (0);
00844 }
00845
00846 *lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx];
00847
00848 return (0);
00849 }
00850
00851
00852
00853
00854
00855
00856
00857 int
00858 __db_txnlist_lsninit(dbenv, hp, lsnp)
00859 DB_ENV *dbenv;
00860 DB_TXNHEAD *hp;
00861 DB_LSN *lsnp;
00862 {
00863 DB_TXNLIST *elp;
00864 int ret;
00865
00866 elp = NULL;
00867
00868 if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
00869 goto err;
00870 LIST_INSERT_HEAD(&hp->head[0], elp, links);
00871
00872 elp->type = TXNLIST_LSN;
00873 if ((ret = __os_malloc(dbenv,
00874 sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0)
00875 goto err;
00876 elp->u.l.stack_indx = 1;
00877 elp->u.l.stack_size = DB_LSN_STACK_SIZE;
00878 elp->u.l.lsn_stack[0] = *lsnp;
00879
00880 return (0);
00881
00882 err: __db_txnlist_end(dbenv, hp);
00883 return (ret);
00884 }
00885
00886 #ifndef HAVE_FTRUNCATE
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896 int
00897 __db_add_limbo(dbenv, hp, fileid, pgno, count)
00898 DB_ENV *dbenv;
00899 DB_TXNHEAD *hp;
00900 int32_t fileid;
00901 db_pgno_t pgno;
00902 int32_t count;
00903 {
00904 DB_LOG *dblp;
00905 FNAME *fnp;
00906 int ret;
00907
00908 dblp = dbenv->lg_handle;
00909 if ((ret = __dbreg_id_to_fname(dblp, fileid, 0, &fnp)) != 0)
00910 return (ret);
00911
00912 do {
00913 if ((ret =
00914 __db_txnlist_pgnoadd(dbenv, hp, fileid, fnp->ufid,
00915 R_ADDR(&dblp->reginfo, fnp->name_off), pgno)) != 0)
00916 return (ret);
00917 pgno++;
00918 } while (--count != 0);
00919
00920 return (0);
00921 }
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960 int
00961 __db_do_the_limbo(dbenv, ptxn, txn, hp, state)
00962 DB_ENV *dbenv;
00963 DB_TXN *ptxn, *txn;
00964 DB_TXNHEAD *hp;
00965 db_limbo_state state;
00966 {
00967 DB_TXNLIST *elp;
00968 u_int32_t h;
00969 int ret;
00970
00971 ret = 0;
00972
00973
00974
00975
00976
00977 for (h = 0; h < hp->nslots; h++) {
00978 if ((elp = LIST_FIRST(&hp->head[h])) == NULL)
00979 continue;
00980 if (ptxn != NULL) {
00981 if ((ret =
00982 __db_limbo_move(dbenv, ptxn, txn, elp)) != 0)
00983 goto err;
00984 } else if ((ret =
00985 __db_limbo_bucket(dbenv, txn, elp, state)) != 0)
00986 goto err;
00987 }
00988
00989 err: if (ret != 0) {
00990 __db_err(dbenv, "Fatal error in abort of an allocation");
00991 ret = __db_panic(dbenv, ret);
00992 }
00993
00994 return (ret);
00995 }
00996
00997
00998
00999
01000
01001
01002
01003 static int
01004 __db_lock_move(dbenv, fileid, pgno, mode, ptxn, txn)
01005 DB_ENV *dbenv;
01006 u_int8_t *fileid;
01007 db_pgno_t pgno;
01008 db_lockmode_t mode;
01009 DB_TXN *ptxn, *txn;
01010 {
01011 DBT lock_dbt;
01012 DB_LOCK lock;
01013 DB_LOCK_ILOCK lock_obj;
01014 DB_LOCKREQ req;
01015 int ret;
01016
01017 lock_obj.pgno = pgno;
01018 memcpy(lock_obj.fileid, fileid, DB_FILE_ID_LEN);
01019 lock_obj.type = DB_PAGE_LOCK;
01020
01021 memset(&lock_dbt, 0, sizeof(lock_dbt));
01022 lock_dbt.data = &lock_obj;
01023 lock_dbt.size = sizeof(lock_obj);
01024
01025 if ((ret = __lock_get(dbenv,
01026 txn->txnid, 0, &lock_dbt, mode, &lock)) == 0) {
01027 memset(&req, 0, sizeof(req));
01028 req.lock = lock;
01029 req.op = DB_LOCK_TRADE;
01030 ret = __lock_vec(dbenv, ptxn->txnid, 0, &req, 1, NULL);
01031 }
01032 return (ret);
01033 }
01034
01035
01036
01037
01038
01039 static int
01040 __db_limbo_move(dbenv, ptxn, txn, elp)
01041 DB_ENV *dbenv;
01042 DB_TXN *ptxn, *txn;
01043 DB_TXNLIST *elp;
01044 {
01045 int ret;
01046
01047 for (; elp != NULL; elp = LIST_NEXT(elp, links)) {
01048 if (elp->type != TXNLIST_PGNO || elp->u.p.locked == 1)
01049 continue;
01050 if ((ret = __db_lock_move(dbenv, elp->u.p.uid,
01051 PGNO_BASE_MD, DB_LOCK_WRITE, ptxn, txn)) != 0)
01052 return (ret);
01053 elp->u.p.locked = 1;
01054 }
01055
01056 return (0);
01057 }
01058
01059
01060
01061
01062
01063
01064
01065 #define T_RESTORED(txn) ((txn) != NULL && F_ISSET(txn, TXN_RESTORED))
01066 static int
01067 __db_limbo_bucket(dbenv, txn, elp, state)
01068 DB_ENV *dbenv;
01069 DB_TXN *txn;
01070 DB_TXNLIST *elp;
01071 db_limbo_state state;
01072 {
01073 DB *dbp;
01074 DB_MPOOLFILE *mpf;
01075 DBMETA *meta;
01076 DB_TXN *ctxn, *t;
01077 FNAME *fname;
01078 db_pgno_t last_pgno, pgno;
01079 int dbp_created, in_retry, ret, t_ret;
01080
01081 ctxn = NULL;
01082 in_retry = 0;
01083 meta = NULL;
01084 mpf = NULL;
01085 ret = 0;
01086 for (; elp != NULL; elp = LIST_NEXT(elp, links)) {
01087 if (elp->type != TXNLIST_PGNO)
01088 continue;
01089 retry: dbp_created = 0;
01090
01091
01092
01093
01094
01095 if (state == LIMBO_PREPARE)
01096 ctxn = txn;
01097 else if (!in_retry && state != LIMBO_RECOVER &&
01098 state != LIMBO_TIMESTAMP && !T_RESTORED(txn) &&
01099 (ret = __txn_compensate_begin(dbenv, &ctxn)) != 0)
01100 return (ret);
01101
01102
01103
01104
01105
01106 t = ctxn == NULL ? txn : ctxn;
01107
01108
01109 ret = __dbreg_id_to_db(dbenv, t, &dbp, elp->u.p.fileid, 0);
01110
01111
01112
01113
01114
01115 if (ret == DB_DELETED || ret == ENOENT ||
01116 ((ret == 0 &&
01117 memcmp(elp->u.p.uid, dbp->fileid, DB_FILE_ID_LEN) != 0))) {
01118 if ((ret = __dbreg_fid_to_fname(
01119 dbenv->lg_handle, elp->u.p.uid, 0, &fname)) == 0)
01120 ret = __dbreg_id_to_db(
01121 dbenv, t, &dbp, fname->id, 0);
01122 }
01123
01124
01125
01126
01127 if (ret == DB_DELETED ||
01128 (ret == 0 && F_ISSET(dbp, DB_AM_DISCARD)))
01129 goto next;
01130
01131 if (ret != 0) {
01132 if ((ret = db_create(&dbp, dbenv, 0)) != 0)
01133 goto err;
01134
01135
01136
01137
01138
01139 F_SET(dbp, DB_AM_COMPENSATE);
01140 dbp_created = 1;
01141
01142
01143 ret = __db_open(dbp, t, elp->u.p.fname, NULL,
01144 DB_UNKNOWN, DB_ODDFILESIZE, __db_omode(OWNER_RW),
01145 PGNO_BASE_MD);
01146 if (ret == ENOENT)
01147 goto next;
01148 }
01149
01150
01151
01152
01153
01154 if (memcmp(elp->u.p.uid, dbp->fileid, DB_FILE_ID_LEN) != 0)
01155 goto next;
01156
01157 mpf = dbp->mpf;
01158 last_pgno = PGNO_INVALID;
01159
01160 if (meta == NULL &&
01161 (ctxn == NULL || state == LIMBO_COMPENSATE)) {
01162 pgno = PGNO_BASE_MD;
01163 if ((ret = __memp_fget(mpf, &pgno, 0, &meta)) != 0)
01164 goto err;
01165 last_pgno = meta->free;
01166 }
01167
01168 if (state == LIMBO_PREPARE) {
01169 if ((ret = __db_limbo_prepare(dbp, ctxn, elp)) != 0)
01170 goto err;
01171 } else
01172 ret = __db_limbo_fix(dbp,
01173 ctxn, elp, &last_pgno, meta, state);
01174
01175
01176
01177
01178
01179
01180
01181 if (ret != 0) {
01182 if (ret == DB_RUNRECOVERY || ctxn == NULL)
01183 goto err;
01184 in_retry = 1;
01185 if ((ret = __txn_abort(ctxn)) != 0)
01186 goto err;
01187 ctxn = NULL;
01188 goto retry;
01189 }
01190
01191 if (state == LIMBO_PREPARE)
01192 ctxn = NULL;
01193
01194 else if (ctxn != NULL) {
01195
01196
01197
01198
01199
01200 if (state == LIMBO_COMPENSATE)
01201 F_CLR(
01202 (DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01203 ret = __txn_commit(ctxn, DB_TXN_NOSYNC);
01204 ctxn = NULL;
01205 if (state == LIMBO_COMPENSATE)
01206 F_SET(
01207 (DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01208 if (ret != 0)
01209 goto retry;
01210 }
01211
01212
01213
01214
01215
01216
01217
01218 else if (last_pgno == meta->free) {
01219
01220 if ((ret = __memp_fput(mpf, meta, 0)) != 0)
01221 goto err;
01222 meta = NULL;
01223 } else {
01224
01225
01226
01227
01228
01229
01230
01231 if (!IS_RECOVERING(dbenv) && !T_RESTORED(txn))
01232 __db_err(dbenv, "Flushing free list to disk");
01233 if ((ret = __memp_fput(mpf, meta, 0)) != 0)
01234 goto err;
01235 meta = NULL;
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245 if ((ret = __db_sync(dbp)) == 0) {
01246 pgno = PGNO_BASE_MD;
01247 if ((ret =
01248 __memp_fget(mpf, &pgno, 0, &meta)) != 0)
01249 goto err;
01250 meta->free = last_pgno;
01251 if ((ret = __memp_fput(mpf,
01252 meta, DB_MPOOL_DIRTY)) != 0)
01253 goto err;
01254 meta = NULL;
01255 } else {
01256 __db_err(dbenv,
01257 "%s: %s", dbp->fname, db_strerror(ret));
01258 __db_err(dbenv, "%s: %s %s", dbp->fname,
01259 "allocation flush failed, some free pages",
01260 "may not appear in the free list");
01261 ret = 0;
01262 }
01263 }
01264
01265 next:
01266
01267
01268
01269
01270 if (ctxn != NULL &&
01271 (t_ret = __txn_abort(ctxn)) != 0 && ret == 0)
01272 ret = t_ret;
01273
01274 if (dbp_created &&
01275 (t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0)
01276 ret = t_ret;
01277 dbp = NULL;
01278 if (state != LIMBO_PREPARE && state != LIMBO_TIMESTAMP) {
01279 __os_free(dbenv, elp->u.p.fname);
01280 __os_free(dbenv, elp->u.p.pgno_array);
01281 }
01282 if (ret == ENOENT)
01283 ret = 0;
01284 else if (ret != 0)
01285 goto err;
01286 }
01287
01288 err: if (meta != NULL)
01289 (void)__memp_fput(mpf, meta, 0);
01290 return (ret);
01291 }
01292
01293
01294
01295
01296
01297
01298 static int
01299 __db_limbo_fix(dbp, ctxn, elp, lastp, meta, state)
01300 DB *dbp;
01301 DB_TXN *ctxn;
01302 DB_TXNLIST *elp;
01303 db_pgno_t *lastp;
01304 DBMETA *meta;
01305 db_limbo_state state;
01306 {
01307 DBC *dbc;
01308 DBT ldbt;
01309 DB_MPOOLFILE *mpf;
01310 PAGE *freep, *pagep;
01311 db_pgno_t next, pgno;
01312 u_int32_t i;
01313 int ret, t_ret;
01314
01315
01316
01317
01318
01319
01320 dbc = NULL;
01321 mpf = dbp->mpf;
01322 pagep = NULL;
01323 ret = 0;
01324
01325 for (i = 0; i < elp->u.p.nentries; i++) {
01326 pgno = elp->u.p.pgno_array[i];
01327
01328 if (pgno == PGNO_INVALID)
01329 continue;
01330
01331 if ((ret =
01332 __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
01333 if (ret != ENOSPC)
01334 goto err;
01335 continue;
01336 }
01337
01338 if (state == LIMBO_COMPENSATE || IS_ZERO_LSN(LSN(pagep))) {
01339 if (ctxn == NULL) {
01340
01341
01342
01343
01344
01345 for (next = *lastp; next != 0; ) {
01346 if (next == pgno)
01347 break;
01348 if ((ret = __memp_fget(mpf,
01349 &next, 0, &freep)) != 0)
01350 goto err;
01351 next = NEXT_PGNO(freep);
01352 if ((ret =
01353 __memp_fput(mpf, freep, 0)) != 0)
01354 goto err;
01355 }
01356
01357 if (next != pgno) {
01358 P_INIT(pagep, dbp->pgsize, pgno,
01359 PGNO_INVALID, *lastp, 0, P_INVALID);
01360
01361 INIT_LSN(LSN(pagep));
01362 *lastp = pgno;
01363 }
01364 } else if (state == LIMBO_COMPENSATE) {
01365
01366
01367
01368
01369
01370 ZERO_LSN(pagep->lsn);
01371 memset(&ldbt, 0, sizeof(ldbt));
01372 ldbt.data = pagep;
01373 ldbt.size = P_OVERHEAD(dbp);
01374 if ((ret = __db_pg_new_log(dbp, ctxn,
01375 &LSN(meta), 0, pagep->pgno,
01376 &LSN(meta), PGNO_BASE_MD,
01377 &ldbt, pagep->next_pgno)) != 0)
01378 goto err;
01379 } else {
01380 if (dbc == NULL && (ret =
01381 __db_cursor(dbp, ctxn, &dbc, 0)) != 0)
01382 goto err;
01383
01384
01385
01386
01387
01388
01389 F_SET(dbc, DBC_COMPENSATE);
01390
01391
01392
01393
01394
01395
01396
01397 F_CLR(dbc, DBC_RECOVER);
01398
01399 ret = __db_free(dbc, pagep);
01400 pagep = NULL;
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411 if (ret != 0) {
01412
01413 (void)__db_c_close(dbc);
01414 dbc = NULL;
01415 goto err;
01416 }
01417 }
01418 }
01419 else
01420 elp->u.p.pgno_array[i] = PGNO_INVALID;
01421
01422 if (pagep != NULL) {
01423 ret = __memp_fput(mpf, pagep, DB_MPOOL_DIRTY);
01424 pagep = NULL;
01425 }
01426 if (ret != 0)
01427 goto err;
01428 }
01429
01430 err: if (pagep != NULL &&
01431 (t_ret = __memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
01432 ret = t_ret;
01433 if (dbc != NULL && (t_ret = __db_c_close(dbc)) != 0 && ret == 0)
01434 ret = t_ret;
01435 return (ret);
01436 }
01437
01438 static int
01439 __db_limbo_prepare(dbp, txn, elp)
01440 DB *dbp;
01441 DB_TXN *txn;
01442 DB_TXNLIST *elp;
01443 {
01444 DB_LSN lsn;
01445 DB_MPOOLFILE *mpf;
01446 PAGE *pagep;
01447 db_pgno_t pgno;
01448 u_int32_t i;
01449 int ret, t_ret;
01450
01451
01452
01453
01454
01455 pagep = NULL;
01456 ret = 0;
01457 mpf = dbp->mpf;
01458
01459 for (i = 0; i < elp->u.p.nentries; i++) {
01460 pgno = elp->u.p.pgno_array[i];
01461
01462 if ((ret =
01463 __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
01464 if (ret != ENOSPC)
01465 return (ret);
01466 continue;
01467 }
01468
01469 if (IS_ZERO_LSN(LSN(pagep)))
01470 ret = __db_pg_prepare_log(dbp, txn, &lsn, 0, pgno);
01471
01472 if ((t_ret = __memp_fput(mpf, pagep, 0)) != 0 && ret == 0)
01473 ret = t_ret;
01474
01475 if (ret != 0)
01476 return (ret);
01477 }
01478
01479 return (0);
01480 }
01481
01482 #define DB_TXNLIST_MAX_PGNO 8
01483
01484
01485
01486
01487
01488
01489 static int
01490 __db_txnlist_pgnoadd(dbenv, hp, fileid, uid, fname, pgno)
01491 DB_ENV *dbenv;
01492 DB_TXNHEAD *hp;
01493 int32_t fileid;
01494 u_int8_t uid[DB_FILE_ID_LEN];
01495 char *fname;
01496 db_pgno_t pgno;
01497 {
01498 DB_TXNLIST *elp;
01499 size_t len;
01500 u_int32_t hash, status;
01501 int ret;
01502
01503 elp = NULL;
01504
01505 if ((ret = __db_txnlist_find_internal(dbenv, hp,
01506 TXNLIST_PGNO, 0, uid, &elp, 0, &status)) != 0 && ret != DB_NOTFOUND)
01507 goto err;
01508
01509 if (ret == DB_NOTFOUND || status != TXN_OK) {
01510 if ((ret =
01511 __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
01512 goto err;
01513 memcpy(&hash, uid, sizeof(hash));
01514 LIST_INSERT_HEAD(
01515 &hp->head[DB_TXNLIST_MASK(hp, hash)], elp, links);
01516 memcpy(elp->u.p.uid, uid, DB_FILE_ID_LEN);
01517
01518 len = strlen(fname) + 1;
01519 if ((ret = __os_malloc(dbenv, len, &elp->u.p.fname)) != 0)
01520 goto err;
01521 memcpy(elp->u.p.fname, fname, len);
01522
01523 elp->u.p.maxentry = 0;
01524 elp->u.p.locked = 0;
01525 elp->type = TXNLIST_PGNO;
01526 if ((ret = __os_malloc(dbenv,
01527 8 * sizeof(db_pgno_t), &elp->u.p.pgno_array)) != 0)
01528 goto err;
01529 elp->u.p.maxentry = DB_TXNLIST_MAX_PGNO;
01530 elp->u.p.nentries = 0;
01531 } else if (elp->u.p.nentries == elp->u.p.maxentry) {
01532 elp->u.p.maxentry <<= 1;
01533 if ((ret = __os_realloc(dbenv, elp->u.p.maxentry *
01534 sizeof(db_pgno_t), &elp->u.p.pgno_array)) != 0)
01535 goto err;
01536 }
01537
01538 elp->u.p.pgno_array[elp->u.p.nentries++] = pgno;
01539
01540 elp->u.p.fileid = fileid;
01541
01542 return (0);
01543
01544 err: return (ret);
01545 }
01546 #endif
01547
01548 #ifdef DEBUG
01549
01550
01551
01552
01553
01554
01555 void
01556 __db_txnlist_print(hp)
01557 DB_TXNHEAD *hp;
01558 {
01559 DB_TXNLIST *p;
01560 u_int32_t i;
01561 char *txntype;
01562
01563 printf("Maxid: %lu Generation: %lu\n",
01564 (u_long)hp->maxid, (u_long)hp->generation);
01565 for (i = 0; i < hp->nslots; i++)
01566 for (p = LIST_FIRST(&hp->head[i]);
01567 p != NULL; p = LIST_NEXT(p, links)) {
01568 if (p->type != TXNLIST_TXNID) {
01569 printf("Unrecognized type: %d\n", p->type);
01570 continue;
01571 }
01572 switch (p->u.t.status) {
01573 case TXN_OK:
01574 txntype = "OK";
01575 break;
01576 case TXN_COMMIT:
01577 txntype = "commit";
01578 break;
01579 case TXN_PREPARE:
01580 txntype = "prepare";
01581 break;
01582 case TXN_ABORT:
01583 txntype = "abort";
01584 break;
01585 case TXN_IGNORE:
01586 txntype = "ignore";
01587 break;
01588 case TXN_EXPECTED:
01589 txntype = "expected";
01590 break;
01591 case TXN_UNEXPECTED:
01592 txntype = "unexpected";
01593 break;
01594 default:
01595 txntype = "UNKNOWN";
01596 break;
01597 }
01598 printf("TXNID: %lx(%lu): %s\n",
01599 (u_long)p->u.t.txnid,
01600 (u_long)p->u.t.generation, txntype);
01601 }
01602 }
01603 #endif