00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "db_config.h"
00011
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <stdlib.h>
00014 #include <string.h>
00015 #if TIME_WITH_SYS_TIME
00016 #include <sys/time.h>
00017 #include <time.h>
00018 #else
00019 #if HAVE_SYS_TIME_H
00020 #include <sys/time.h>
00021 #else
00022 #include <time.h>
00023 #endif
00024 #endif
00025 #endif
00026
00027 #include "db_int.h"
00028 #include "dbinc/db_page.h"
00029 #include "dbinc/db_am.h"
00030 #include "dbinc/log.h"
00031
00032 static void __rep_cmp_vote __P((DB_ENV *, REP *, int, DB_LSN *,
00033 int, u_int32_t, u_int32_t));
00034 static int __rep_cmp_vote2 __P((DB_ENV *, REP *, int, u_int32_t));
00035 static int __rep_elect_init
00036 __P((DB_ENV *, DB_LSN *, int, int, int, int *, u_int32_t *));
00037 static int __rep_tally __P((DB_ENV *, REP *, int, int *, u_int32_t, roff_t));
00038 static int __rep_wait __P((DB_ENV *, u_int32_t, int *, u_int32_t));
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 int
00049 __rep_elect(dbenv, nsites, nvotes, priority, timeout, eidp, flags)
00050 DB_ENV *dbenv;
00051 int nsites, nvotes, priority;
00052 u_int32_t timeout;
00053 int *eidp;
00054 u_int32_t flags;
00055 {
00056 DB_LOG *dblp;
00057 DB_LSN lsn;
00058 DB_REP *db_rep;
00059 REP *rep;
00060 int ack, done, in_progress, ret, send_vote;
00061 u_int32_t egen, orig_tally, tiebreaker, to;
00062 #ifdef DIAGNOSTIC
00063 DB_MSGBUF mb;
00064 #endif
00065
00066 PANIC_CHECK(dbenv);
00067 COMPQUIET(flags, 0);
00068 ENV_REQUIRES_CONFIG(dbenv, dbenv->rep_handle, "rep_elect", DB_INIT_REP);
00069
00070
00071 if (nsites <= 0) {
00072 __db_err(dbenv,
00073 "DB_ENV->rep_elect: nsites must be greater than 0");
00074 return (EINVAL);
00075 }
00076 if (nvotes < 0) {
00077 __db_err(dbenv,
00078 "DB_ENV->rep_elect: nvotes may not be negative");
00079 return (EINVAL);
00080 }
00081 if (priority < 0) {
00082 __db_err(dbenv,
00083 "DB_ENV->rep_elect: priority may not be negative");
00084 return (EINVAL);
00085 }
00086 if (nsites < nvotes) {
00087 __db_err(dbenv,
00088 "DB_ENV->rep_elect: nvotes (%d) is larger than nsites (%d)",
00089 nvotes, nsites);
00090 return (EINVAL);
00091 }
00092
00093 ack = nvotes;
00094
00095 if (nvotes == 0)
00096 ack = (nsites / 2) + 1;
00097
00098
00099
00100
00101
00102
00103
00104
00105 if (nvotes <= (nsites / 2)) {
00106 __db_err(dbenv,
00107 "DB_ENV->rep_elect:WARNING: nvotes (%d) is sub-majority with nsites (%d)",
00108 nvotes, nsites);
00109 }
00110
00111 db_rep = dbenv->rep_handle;
00112 rep = db_rep->region;
00113 dblp = dbenv->lg_handle;
00114
00115 RPRINT(dbenv, rep,
00116 (dbenv, &mb, "Start election nsites %d, ack %d, priority %d",
00117 nsites, ack, priority));
00118
00119 LOG_SYSTEM_LOCK(dbenv);
00120 lsn = ((LOG *)dblp->reginfo.primary)->lsn;
00121 LOG_SYSTEM_UNLOCK(dbenv);
00122
00123 orig_tally = 0;
00124 to = timeout;
00125 if ((ret = __rep_elect_init(dbenv,
00126 &lsn, nsites, ack, priority, &in_progress, &orig_tally)) != 0) {
00127 if (ret == DB_REP_NEWMASTER) {
00128 ret = 0;
00129 *eidp = dbenv->rep_eid;
00130 }
00131 goto err;
00132 }
00133
00134
00135
00136
00137 if (in_progress) {
00138 *eidp = rep->master_id;
00139 return (0);
00140 }
00141 __os_clock(dbenv, &rep->esec, &rep->eusec);
00142 restart:
00143
00144 __os_unique_id(dbenv, &tiebreaker);
00145
00146 REP_SYSTEM_LOCK(dbenv);
00147 F_SET(rep, REP_F_EPHASE1 | REP_F_NOARCHIVE);
00148 F_CLR(rep, REP_F_TALLY);
00149
00150
00151
00152
00153
00154
00155
00156 if ((ret = __rep_write_egen(dbenv, rep->egen + 1)) != 0)
00157 goto lockdone;
00158
00159
00160 if (__rep_tally(dbenv, rep, rep->eid, &rep->sites, rep->egen,
00161 rep->tally_off) != 0) {
00162 ret = EINVAL;
00163 goto lockdone;
00164 }
00165 __rep_cmp_vote(dbenv, rep, rep->eid, &lsn, priority, rep->gen,
00166 tiebreaker);
00167
00168 RPRINT(dbenv, rep, (dbenv, &mb, "Beginning an election"));
00169
00170
00171 send_vote = DB_EID_INVALID;
00172 egen = rep->egen;
00173 REP_SYSTEM_UNLOCK(dbenv);
00174 __rep_send_vote(dbenv, &lsn, nsites, ack, priority, tiebreaker, egen,
00175 DB_EID_BROADCAST, REP_VOTE1);
00176 DB_ENV_TEST_RECOVERY(dbenv, DB_TEST_ELECTVOTE1, ret, NULL);
00177 ret = __rep_wait(dbenv, to, eidp, REP_F_EPHASE1);
00178 switch (ret) {
00179 case 0:
00180
00181 if (*eidp != DB_EID_INVALID) {
00182 RPRINT(dbenv, rep, (dbenv, &mb,
00183 "Ended election phase 1 %d", ret));
00184 goto edone;
00185 }
00186 goto phase2;
00187 case DB_REP_EGENCHG:
00188 if (to > timeout)
00189 to = timeout;
00190 to = (to * 8) / 10;
00191 RPRINT(dbenv, rep, (dbenv, &mb,
00192 "Egen changed while waiting. Now %lu. New timeout %lu, orig timeout %lu",
00193 (u_long)rep->egen, (u_long)to, (u_long)timeout));
00194
00195
00196
00197
00198
00199
00200
00201
00202 goto restart;
00203 case DB_TIMEOUT:
00204 break;
00205 default:
00206 goto err;
00207 }
00208
00209
00210
00211
00212
00213
00214 REP_SYSTEM_LOCK(dbenv);
00215
00216
00217
00218
00219 if (egen != rep->egen) {
00220 REP_SYSTEM_UNLOCK(dbenv);
00221 RPRINT(dbenv, rep, (dbenv, &mb, "Egen changed from %lu to %lu",
00222 (u_long)egen, (u_long)rep->egen));
00223 goto restart;
00224 }
00225 if (rep->sites >= rep->nvotes) {
00226
00227
00228 send_vote = rep->winner;
00229
00230
00231
00232
00233
00234 if (rep->winner == rep->eid) {
00235 (void)__rep_tally(dbenv, rep, rep->eid, &rep->votes,
00236 egen, rep->v2tally_off);
00237 RPRINT(dbenv, rep, (dbenv, &mb,
00238 "Counted my vote %d", rep->votes));
00239 }
00240 F_SET(rep, REP_F_EPHASE2);
00241 F_CLR(rep, REP_F_EPHASE1);
00242 }
00243 REP_SYSTEM_UNLOCK(dbenv);
00244 if (send_vote == DB_EID_INVALID) {
00245
00246 RPRINT(dbenv, rep, (dbenv, &mb,
00247 "Not enough votes to elect: recvd %d of %d from %d sites",
00248 rep->sites, rep->nvotes, rep->nsites));
00249 ret = DB_REP_UNAVAIL;
00250 goto err;
00251
00252 } else {
00253
00254
00255
00256
00257 if (send_vote != rep->eid) {
00258 RPRINT(dbenv, rep, (dbenv, &mb, "Sending vote"));
00259 __rep_send_vote(dbenv, NULL, 0, 0, 0, 0, egen,
00260 send_vote, REP_VOTE2);
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273 to = to * 2;
00274
00275 }
00276
00277 phase2: ret = __rep_wait(dbenv, to, eidp, REP_F_EPHASE2);
00278 RPRINT(dbenv, rep, (dbenv, &mb,
00279 "Ended election phase 2 %d", ret));
00280 switch (ret) {
00281 case 0:
00282 goto edone;
00283 case DB_REP_EGENCHG:
00284 if (to > timeout)
00285 to = timeout;
00286 to = (to * 8) / 10;
00287 RPRINT(dbenv, rep, (dbenv, &mb,
00288 "While waiting egen changed to %lu. Phase 2 New timeout %lu, orig timeout %lu",
00289 (u_long)rep->egen,
00290 (u_long)to, (u_long)timeout));
00291 goto restart;
00292 case DB_TIMEOUT:
00293 ret = DB_REP_UNAVAIL;
00294 break;
00295 default:
00296 goto err;
00297 }
00298 REP_SYSTEM_LOCK(dbenv);
00299 if (egen != rep->egen) {
00300 REP_SYSTEM_UNLOCK(dbenv);
00301 RPRINT(dbenv, rep, (dbenv, &mb,
00302 "Egen ph2 changed from %lu to %lu",
00303 (u_long)egen, (u_long)rep->egen));
00304 goto restart;
00305 }
00306 done = rep->votes >= rep->nvotes;
00307 RPRINT(dbenv, rep, (dbenv, &mb,
00308 "After phase 2: done %d, votes %d, nsites %d",
00309 done, rep->votes, rep->nsites));
00310 if (send_vote == rep->eid && done) {
00311 __rep_elect_master(dbenv, rep, eidp);
00312 ret = 0;
00313 goto lockdone;
00314 }
00315 REP_SYSTEM_UNLOCK(dbenv);
00316 }
00317
00318 err: REP_SYSTEM_LOCK(dbenv);
00319 lockdone:
00320
00321
00322
00323
00324
00325
00326 if (ret == 0 || ret == DB_REP_UNAVAIL)
00327 __rep_elect_done(dbenv, rep);
00328 else if (orig_tally)
00329 F_SET(rep, orig_tally);
00330
00331
00332
00333
00334
00335 if (0) {
00336 edone: REP_SYSTEM_LOCK(dbenv);
00337 }
00338 rep->elect_th = 0;
00339
00340 RPRINT(dbenv, rep, (dbenv, &mb,
00341 "Ended election with %d, sites %d, egen %lu, flags 0x%lx",
00342 ret, rep->sites, (u_long)rep->egen, (u_long)rep->flags));
00343 REP_SYSTEM_UNLOCK(dbenv);
00344
00345 DB_TEST_RECOVERY_LABEL
00346 return (ret);
00347 }
00348
00349
00350
00351
00352
00353
00354
00355 int
00356 __rep_vote1(dbenv, rp, rec, eid)
00357 DB_ENV *dbenv;
00358 REP_CONTROL *rp;
00359 DBT *rec;
00360 int eid;
00361 {
00362 DB_LOG *dblp;
00363 DB_LSN lsn;
00364 DB_REP *db_rep;
00365 DBT data_dbt;
00366 LOG *lp;
00367 REP *rep;
00368 REP_VOTE_INFO *vi;
00369 u_int32_t egen;
00370 int done, master, ret;
00371 #ifdef DIAGNOSTIC
00372 DB_MSGBUF mb;
00373 #endif
00374
00375 ret = 0;
00376 db_rep = dbenv->rep_handle;
00377 rep = db_rep->region;
00378 dblp = dbenv->lg_handle;
00379 lp = dblp->reginfo.primary;
00380
00381 if (F_ISSET(rep, REP_F_MASTER)) {
00382 RPRINT(dbenv, rep,
00383 (dbenv, &mb, "Master received vote"));
00384 LOG_SYSTEM_LOCK(dbenv);
00385 lsn = lp->lsn;
00386 LOG_SYSTEM_UNLOCK(dbenv);
00387 (void)__rep_send_message(dbenv,
00388 DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0);
00389 return (ret);
00390 }
00391
00392 vi = (REP_VOTE_INFO *)rec->data;
00393 REP_SYSTEM_LOCK(dbenv);
00394
00395
00396
00397
00398
00399
00400
00401 RPRINT(dbenv, rep, (dbenv, &mb,
00402 "Received vote1 egen %lu, egen %lu",
00403 (u_long)vi->egen, (u_long)rep->egen));
00404 if (vi->egen < rep->egen) {
00405 RPRINT(dbenv, rep, (dbenv, &mb,
00406 "Received old vote %lu, egen %lu, ignoring vote1",
00407 (u_long)vi->egen, (u_long)rep->egen));
00408 egen = rep->egen;
00409 REP_SYSTEM_UNLOCK(dbenv);
00410 data_dbt.data = &egen;
00411 data_dbt.size = sizeof(egen);
00412 (void)__rep_send_message(dbenv,
00413 eid, REP_ALIVE, &rp->lsn, &data_dbt, 0, 0);
00414 return (ret);
00415 }
00416 if (vi->egen > rep->egen) {
00417 RPRINT(dbenv, rep, (dbenv, &mb,
00418 "Received VOTE1 from egen %lu, my egen %lu; reset",
00419 (u_long)vi->egen, (u_long)rep->egen));
00420 __rep_elect_done(dbenv, rep);
00421 rep->egen = vi->egen;
00422 }
00423 if (!IN_ELECTION(rep))
00424 F_SET(rep, REP_F_TALLY);
00425
00426
00427 if (vi->nsites > rep->nsites)
00428 rep->nsites = vi->nsites;
00429
00430
00431 if (vi->nvotes > rep->nvotes)
00432 rep->nvotes = vi->nvotes;
00433
00434
00435
00436
00437
00438 if (rep->sites + 1 > rep->nsites)
00439 rep->nsites = rep->sites + 1;
00440 if (rep->nsites > rep->asites &&
00441 (ret = __rep_grow_sites(dbenv, rep->nsites)) != 0) {
00442 RPRINT(dbenv, rep, (dbenv, &mb,
00443 "Grow sites returned error %d", ret));
00444 goto err;
00445 }
00446
00447
00448
00449
00450 if (F_ISSET(rep, REP_F_EPHASE2)) {
00451 RPRINT(dbenv, rep, (dbenv, &mb,
00452 "In phase 2, ignoring vote1"));
00453 goto err;
00454 }
00455
00456
00457
00458
00459
00460 if ((ret = __rep_tally(dbenv, rep, eid, &rep->sites,
00461 vi->egen, rep->tally_off)) != 0) {
00462 RPRINT(dbenv, rep, (dbenv, &mb,
00463 "Tally returned %d, sites %d",
00464 ret, rep->sites));
00465 ret = 0;
00466 goto err;
00467 }
00468 RPRINT(dbenv, rep, (dbenv, &mb,
00469 "Incoming vote: (eid)%d (pri)%d (gen)%lu (egen)%lu [%lu,%lu]",
00470 eid, vi->priority,
00471 (u_long)rp->gen, (u_long)vi->egen,
00472 (u_long)rp->lsn.file, (u_long)rp->lsn.offset));
00473 #ifdef DIAGNOSTIC
00474 if (rep->sites > 1)
00475 RPRINT(dbenv, rep, (dbenv, &mb,
00476 "Existing vote: (eid)%d (pri)%d (gen)%lu (sites)%d [%lu,%lu]",
00477 rep->winner, rep->w_priority,
00478 (u_long)rep->w_gen, rep->sites,
00479 (u_long)rep->w_lsn.file,
00480 (u_long)rep->w_lsn.offset));
00481 #endif
00482 __rep_cmp_vote(dbenv, rep, eid, &rp->lsn, vi->priority,
00483 rp->gen, vi->tiebreaker);
00484
00485
00486
00487
00488
00489 if (!IN_ELECTION(rep)) {
00490 RPRINT(dbenv, rep, (dbenv, &mb,
00491 "Not in election, but received vote1 0x%x",
00492 rep->flags));
00493 ret = DB_REP_HOLDELECTION;
00494 goto err;
00495 }
00496
00497 master = rep->winner;
00498 lsn = rep->w_lsn;
00499
00500
00501
00502
00503
00504
00505
00506 done = rep->sites >= rep->nsites && rep->w_priority != 0;
00507 if (done) {
00508 RPRINT(dbenv, rep,
00509 (dbenv, &mb, "Phase1 election done"));
00510 RPRINT(dbenv, rep, (dbenv, &mb, "Voting for %d%s",
00511 master, master == rep->eid ? "(self)" : ""));
00512 egen = rep->egen;
00513 F_SET(rep, REP_F_EPHASE2);
00514 F_CLR(rep, REP_F_EPHASE1);
00515 if (master == rep->eid) {
00516 (void)__rep_tally(dbenv, rep, rep->eid,
00517 &rep->votes, egen, rep->v2tally_off);
00518 goto err;
00519 }
00520 REP_SYSTEM_UNLOCK(dbenv);
00521
00522
00523 __rep_send_vote(dbenv, NULL, 0, 0, 0, 0, egen,
00524 master, REP_VOTE2);
00525 } else
00526 err: REP_SYSTEM_UNLOCK(dbenv);
00527 return (ret);
00528 }
00529
00530
00531
00532
00533
00534
00535
00536 int
00537 __rep_vote2(dbenv, rec, eidp)
00538 DB_ENV *dbenv;
00539 DBT *rec;
00540 int *eidp;
00541 {
00542 DB_LOG *dblp;
00543 DB_LSN lsn;
00544 DB_REP *db_rep;
00545 LOG *lp;
00546 REP *rep;
00547 REP_VOTE_INFO *vi;
00548 int done, ret;
00549 #ifdef DIAGNOSTIC
00550 DB_MSGBUF mb;
00551 #endif
00552
00553 ret = 0;
00554 db_rep = dbenv->rep_handle;
00555 rep = db_rep->region;
00556 dblp = dbenv->lg_handle;
00557 lp = dblp->reginfo.primary;
00558
00559 RPRINT(dbenv, rep, (dbenv, &mb, "We received a vote%s",
00560 F_ISSET(rep, REP_F_MASTER) ? " (master)" : ""));
00561 if (F_ISSET(rep, REP_F_MASTER)) {
00562 LOG_SYSTEM_LOCK(dbenv);
00563 lsn = lp->lsn;
00564 LOG_SYSTEM_UNLOCK(dbenv);
00565 rep->stat.st_elections_won++;
00566 (void)__rep_send_message(dbenv,
00567 DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0);
00568 return (ret);
00569 }
00570
00571 REP_SYSTEM_LOCK(dbenv);
00572
00573
00574 DB_ASSERT(rep->priority != 0);
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584 vi = (REP_VOTE_INFO *)rec->data;
00585 if (!IN_ELECTION_TALLY(rep) && vi->egen >= rep->egen) {
00586 RPRINT(dbenv, rep, (dbenv, &mb,
00587 "Not in election gen %lu, at %lu, got vote",
00588 (u_long)vi->egen, (u_long)rep->egen));
00589 ret = DB_REP_HOLDELECTION;
00590 goto err;
00591 }
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 if ((ret = __rep_cmp_vote2(dbenv, rep, *eidp, vi->egen)) != 0) {
00612 ret = 0;
00613 goto err;
00614 }
00615
00616
00617
00618 if ((ret = __rep_tally(dbenv, rep, *eidp, &rep->votes,
00619 vi->egen, rep->v2tally_off)) != 0) {
00620 ret = 0;
00621 goto err;
00622 }
00623 done = rep->votes >= rep->nvotes;
00624 RPRINT(dbenv, rep, (dbenv, &mb, "Counted vote %d of %d",
00625 rep->votes, rep->nvotes));
00626 if (done) {
00627 __rep_elect_master(dbenv, rep, eidp);
00628 ret = DB_REP_NEWMASTER;
00629 }
00630
00631 err: REP_SYSTEM_UNLOCK(dbenv);
00632 return (ret);
00633 }
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643 static int
00644 __rep_tally(dbenv, rep, eid, countp, egen, vtoff)
00645 DB_ENV *dbenv;
00646 REP *rep;
00647 int eid, *countp;
00648 u_int32_t egen;
00649 roff_t vtoff;
00650 {
00651 REP_VTALLY *tally, *vtp;
00652 int i;
00653 #ifdef DIAGNOSTIC
00654 DB_MSGBUF mb;
00655 #else
00656 COMPQUIET(rep, NULL);
00657 #endif
00658
00659 tally = R_ADDR((REGINFO *)dbenv->reginfo, vtoff);
00660 i = 0;
00661 vtp = &tally[i];
00662 while (i < *countp) {
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673 if (vtp->eid == eid) {
00674 RPRINT(dbenv, rep, (dbenv, &mb,
00675 "Tally found[%d] (%d, %lu), this vote (%d, %lu)",
00676 i, vtp->eid, (u_long)vtp->egen,
00677 eid, (u_long)egen));
00678 if (vtp->egen >= egen)
00679 return (1);
00680 else {
00681 vtp->egen = egen;
00682 return (0);
00683 }
00684 }
00685 i++;
00686 vtp = &tally[i];
00687 }
00688
00689
00690
00691
00692 #ifdef DIAGNOSTIC
00693 if (vtoff == rep->tally_off)
00694 RPRINT(dbenv, rep, (dbenv, &mb, "Tallying VOTE1[%d] (%d, %lu)",
00695 i, eid, (u_long)egen));
00696 else
00697 RPRINT(dbenv, rep, (dbenv, &mb, "Tallying VOTE2[%d] (%d, %lu)",
00698 i, eid, (u_long)egen));
00699 #endif
00700 vtp->eid = eid;
00701 vtp->egen = egen;
00702 (*countp)++;
00703 return (0);
00704 }
00705
00706
00707
00708
00709
00710
00711
00712 static void
00713 __rep_cmp_vote(dbenv, rep, eid, lsnp, priority, gen, tiebreaker)
00714 DB_ENV *dbenv;
00715 REP *rep;
00716 int eid;
00717 DB_LSN *lsnp;
00718 int priority;
00719 u_int32_t gen, tiebreaker;
00720 {
00721 int cmp;
00722
00723 #ifdef DIAGNOSTIC
00724 DB_MSGBUF mb;
00725 #else
00726 COMPQUIET(dbenv, NULL);
00727 #endif
00728 cmp = log_compare(lsnp, &rep->w_lsn);
00729
00730
00731
00732
00733 if (rep->sites > 1 && priority != 0) {
00734
00735
00736
00737
00738 if (cmp > 0 ||
00739 (cmp == 0 && (priority > rep->w_priority ||
00740 (priority == rep->w_priority &&
00741 (tiebreaker > rep->w_tiebreaker))))) {
00742 RPRINT(dbenv, rep, (dbenv, &mb, "Accepting new vote"));
00743 rep->winner = eid;
00744 rep->w_priority = priority;
00745 rep->w_lsn = *lsnp;
00746 rep->w_gen = gen;
00747 rep->w_tiebreaker = tiebreaker;
00748 }
00749 } else if (rep->sites == 1) {
00750 if (priority != 0) {
00751
00752 rep->winner = eid;
00753 rep->w_priority = priority;
00754 rep->w_gen = gen;
00755 rep->w_lsn = *lsnp;
00756 rep->w_tiebreaker = tiebreaker;
00757 } else {
00758 rep->winner = DB_EID_INVALID;
00759 rep->w_priority = 0;
00760 rep->w_gen = 0;
00761 ZERO_LSN(rep->w_lsn);
00762 rep->w_tiebreaker = 0;
00763 }
00764 }
00765 return;
00766 }
00767
00768
00769
00770
00771
00772
00773
00774
00775 static int
00776 __rep_cmp_vote2(dbenv, rep, eid, egen)
00777 DB_ENV *dbenv;
00778 REP *rep;
00779 int eid;
00780 u_int32_t egen;
00781 {
00782 int i;
00783 REP_VTALLY *tally, *vtp;
00784 #ifdef DIAGNOSTIC
00785 DB_MSGBUF mb;
00786 #endif
00787
00788 tally = R_ADDR((REGINFO *)dbenv->reginfo, rep->tally_off);
00789 i = 0;
00790 vtp = &tally[i];
00791 for (i = 0; i < rep->sites; i++) {
00792 vtp = &tally[i];
00793 if (vtp->eid == eid && vtp->egen == egen) {
00794 RPRINT(dbenv, rep, (dbenv, &mb,
00795 "Found matching vote1 (%d, %lu), at %d of %d",
00796 eid, (u_long)egen, i, rep->sites));
00797 return (0);
00798 }
00799 }
00800 RPRINT(dbenv, rep,
00801 (dbenv, &mb, "Didn't find vote1 for eid %d, egen %lu",
00802 eid, (u_long)egen));
00803 return (1);
00804 }
00805
00806
00807
00808
00809
00810
00811 static int
00812 __rep_elect_init(dbenv, lsnp, nsites, nvotes, priority, beginp, otally)
00813 DB_ENV *dbenv;
00814 DB_LSN *lsnp;
00815 int nsites, nvotes, priority;
00816 int *beginp;
00817 u_int32_t *otally;
00818 {
00819 DB_REP *db_rep;
00820 REP *rep;
00821 int ret;
00822
00823 db_rep = dbenv->rep_handle;
00824 rep = db_rep->region;
00825
00826 ret = 0;
00827
00828
00829 rep->stat.st_elections++;
00830
00831
00832 if (F_ISSET(rep, REP_F_MASTER)) {
00833 (void)__rep_send_message(dbenv,
00834 DB_EID_BROADCAST, REP_NEWMASTER, lsnp, NULL, 0, 0);
00835 rep->stat.st_elections_won++;
00836 return (DB_REP_NEWMASTER);
00837 }
00838
00839 REP_SYSTEM_LOCK(dbenv);
00840 if (otally != NULL)
00841 *otally = F_ISSET(rep, REP_F_TALLY);
00842 *beginp = IN_ELECTION(rep) || rep->elect_th;
00843 if (!*beginp) {
00844
00845
00846
00847
00848
00849
00850 if (nsites > rep->asites &&
00851 (ret = __rep_grow_sites(dbenv, nsites)) != 0)
00852 goto err;
00853 DB_ENV_TEST_RECOVERY(dbenv, DB_TEST_ELECTINIT, ret, NULL);
00854 rep->elect_th = 1;
00855 rep->nsites = nsites;
00856 rep->nvotes = nvotes;
00857 rep->priority = priority;
00858 rep->master_id = DB_EID_INVALID;
00859 }
00860 DB_TEST_RECOVERY_LABEL
00861 err: REP_SYSTEM_UNLOCK(dbenv);
00862 return (ret);
00863 }
00864
00865
00866
00867
00868
00869
00870
00871
00872 void
00873 __rep_elect_master(dbenv, rep, eidp)
00874 DB_ENV *dbenv;
00875 REP *rep;
00876 int *eidp;
00877 {
00878 #ifdef DIAGNOSTIC
00879 DB_MSGBUF mb;
00880 #else
00881 COMPQUIET(dbenv, NULL);
00882 #endif
00883 rep->master_id = rep->eid;
00884 F_SET(rep, REP_F_MASTERELECT);
00885 if (eidp != NULL)
00886 *eidp = rep->master_id;
00887 rep->stat.st_elections_won++;
00888 RPRINT(dbenv, rep, (dbenv, &mb,
00889 "Got enough votes to win; election done; winner is %d, gen %lu",
00890 rep->master_id, (u_long)rep->gen));
00891 }
00892
00893 static int
00894 __rep_wait(dbenv, timeout, eidp, flags)
00895 DB_ENV *dbenv;
00896 u_int32_t timeout;
00897 int *eidp;
00898 u_int32_t flags;
00899 {
00900 DB_REP *db_rep;
00901 REP *rep;
00902 int done, echg;
00903 u_int32_t egen, sleeptime;
00904
00905 done = echg = 0;
00906 db_rep = dbenv->rep_handle;
00907 rep = db_rep->region;
00908 egen = rep->egen;
00909
00910
00911
00912
00913
00914
00915 sleeptime = (timeout > 5000000) ? 500000 : timeout / 10;
00916 if (sleeptime == 0)
00917 sleeptime++;
00918 while (timeout > 0) {
00919 __os_sleep(dbenv, 0, sleeptime);
00920 REP_SYSTEM_LOCK(dbenv);
00921 echg = egen != rep->egen;
00922 done = !F_ISSET(rep, flags) && rep->master_id != DB_EID_INVALID;
00923
00924 *eidp = rep->master_id;
00925 REP_SYSTEM_UNLOCK(dbenv);
00926
00927 if (done)
00928 return (0);
00929
00930 if (echg)
00931 return (DB_REP_EGENCHG);
00932
00933 if (timeout > sleeptime)
00934 timeout -= sleeptime;
00935 else
00936 timeout = 0;
00937 }
00938 return (DB_TIMEOUT);
00939 }