Header And Logo

PostgreSQL
| The world's most advanced open source database.

sysv_sema.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * sysv_sema.c
00004  *    Implement PGSemaphores using SysV semaphore facilities
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/port/sysv_sema.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include <signal.h>
00018 #include <unistd.h>
00019 #include <sys/file.h>
00020 #ifdef HAVE_SYS_IPC_H
00021 #include <sys/ipc.h>
00022 #endif
00023 #ifdef HAVE_SYS_SEM_H
00024 #include <sys/sem.h>
00025 #endif
00026 
00027 #include "miscadmin.h"
00028 #include "storage/ipc.h"
00029 #include "storage/pg_sema.h"
00030 
00031 
00032 #ifndef HAVE_UNION_SEMUN
00033 union semun
00034 {
00035     int         val;
00036     struct semid_ds *buf;
00037     unsigned short *array;
00038 };
00039 #endif
00040 
00041 typedef key_t IpcSemaphoreKey;  /* semaphore key passed to semget(2) */
00042 typedef int IpcSemaphoreId;     /* semaphore ID returned by semget(2) */
00043 
00044 /*
00045  * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
00046  * we allocate.  It must be *less than* your kernel's SEMMSL (max semaphores
00047  * per set) parameter, which is often around 25.  (Less than, because we
00048  * allocate one extra sema in each set for identification purposes.)
00049  */
00050 #define SEMAS_PER_SET   16
00051 
00052 #define IPCProtection   (0600)  /* access/modify by user only */
00053 
00054 #define PGSemaMagic     537     /* must be less than SEMVMX */
00055 
00056 
00057 static IpcSemaphoreId *mySemaSets;      /* IDs of sema sets acquired so far */
00058 static int  numSemaSets;        /* number of sema sets acquired so far */
00059 static int  maxSemaSets;        /* allocated size of mySemaSets array */
00060 static IpcSemaphoreKey nextSemaKey;     /* next key to try using */
00061 static int  nextSemaNumber;     /* next free sem num in last sema set */
00062 
00063 
00064 static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
00065                            int numSems);
00066 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
00067                        int value);
00068 static void IpcSemaphoreKill(IpcSemaphoreId semId);
00069 static int  IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
00070 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
00071 static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
00072 static void ReleaseSemaphores(int status, Datum arg);
00073 
00074 
00075 /*
00076  * InternalIpcSemaphoreCreate
00077  *
00078  * Attempt to create a new semaphore set with the specified key.
00079  * Will fail (return -1) if such a set already exists.
00080  *
00081  * If we fail with a failure code other than collision-with-existing-set,
00082  * print out an error and abort.  Other types of errors suggest nonrecoverable
00083  * problems.
00084  */
00085 static IpcSemaphoreId
00086 InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
00087 {
00088     int         semId;
00089 
00090     semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
00091 
00092     if (semId < 0)
00093     {
00094         /*
00095          * Fail quietly if error indicates a collision with existing set. One
00096          * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
00097          * could get a permission violation instead?  Also, EIDRM might occur
00098          * if an old set is slated for destruction but not gone yet.
00099          */
00100         if (errno == EEXIST || errno == EACCES
00101 #ifdef EIDRM
00102             || errno == EIDRM
00103 #endif
00104             )
00105             return -1;
00106 
00107         /*
00108          * Else complain and abort
00109          */
00110         ereport(FATAL,
00111                 (errmsg("could not create semaphores: %m"),
00112                  errdetail("Failed system call was semget(%lu, %d, 0%o).",
00113                            (unsigned long) semKey, numSems,
00114                            IPC_CREAT | IPC_EXCL | IPCProtection),
00115                  (errno == ENOSPC) ?
00116                  errhint("This error does *not* mean that you have run out of disk space.  "
00117           "It occurs when either the system limit for the maximum number of "
00118              "semaphore sets (SEMMNI), or the system wide maximum number of "
00119             "semaphores (SEMMNS), would be exceeded.  You need to raise the "
00120           "respective kernel parameter.  Alternatively, reduce PostgreSQL's "
00121                          "consumption of semaphores by reducing its max_connections parameter.\n"
00122               "The PostgreSQL documentation contains more information about "
00123                          "configuring your system for PostgreSQL.") : 0));
00124     }
00125 
00126     return semId;
00127 }
00128 
00129 /*
00130  * Initialize a semaphore to the specified value.
00131  */
00132 static void
00133 IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
00134 {
00135     union semun semun;
00136 
00137     semun.val = value;
00138     if (semctl(semId, semNum, SETVAL, semun) < 0)
00139         ereport(FATAL,
00140                 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
00141                                  semId, semNum, value),
00142                  (errno == ERANGE) ?
00143                  errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
00144                   "%d.  Look into the PostgreSQL documentation for details.",
00145                          value) : 0));
00146 }
00147 
00148 /*
00149  * IpcSemaphoreKill(semId)  - removes a semaphore set
00150  */
00151 static void
00152 IpcSemaphoreKill(IpcSemaphoreId semId)
00153 {
00154     union semun semun;
00155 
00156     semun.val = 0;              /* unused, but keep compiler quiet */
00157 
00158     if (semctl(semId, 0, IPC_RMID, semun) < 0)
00159         elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
00160 }
00161 
00162 /* Get the current value (semval) of the semaphore */
00163 static int
00164 IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
00165 {
00166     union semun dummy;          /* for Solaris */
00167 
00168     dummy.val = 0;              /* unused */
00169 
00170     return semctl(semId, semNum, GETVAL, dummy);
00171 }
00172 
00173 /* Get the PID of the last process to do semop() on the semaphore */
00174 static pid_t
00175 IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
00176 {
00177     union semun dummy;          /* for Solaris */
00178 
00179     dummy.val = 0;              /* unused */
00180 
00181     return semctl(semId, semNum, GETPID, dummy);
00182 }
00183 
00184 
00185 /*
00186  * Create a semaphore set with the given number of useful semaphores
00187  * (an additional sema is actually allocated to serve as identifier).
00188  * Dead Postgres sema sets are recycled if found, but we do not fail
00189  * upon collision with non-Postgres sema sets.
00190  *
00191  * The idea here is to detect and re-use keys that may have been assigned
00192  * by a crashed postmaster or backend.
00193  */
00194 static IpcSemaphoreId
00195 IpcSemaphoreCreate(int numSems)
00196 {
00197     IpcSemaphoreId semId;
00198     union semun semun;
00199     PGSemaphoreData mysema;
00200 
00201     /* Loop till we find a free IPC key */
00202     for (nextSemaKey++;; nextSemaKey++)
00203     {
00204         pid_t       creatorPID;
00205 
00206         /* Try to create new semaphore set */
00207         semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
00208         if (semId >= 0)
00209             break;              /* successful create */
00210 
00211         /* See if it looks to be leftover from a dead Postgres process */
00212         semId = semget(nextSemaKey, numSems + 1, 0);
00213         if (semId < 0)
00214             continue;           /* failed: must be some other app's */
00215         if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
00216             continue;           /* sema belongs to a non-Postgres app */
00217 
00218         /*
00219          * If the creator PID is my own PID or does not belong to any extant
00220          * process, it's safe to zap it.
00221          */
00222         creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
00223         if (creatorPID <= 0)
00224             continue;           /* oops, GETPID failed */
00225         if (creatorPID != getpid())
00226         {
00227             if (kill(creatorPID, 0) == 0 || errno != ESRCH)
00228                 continue;       /* sema belongs to a live process */
00229         }
00230 
00231         /*
00232          * The sema set appears to be from a dead Postgres process, or from a
00233          * previous cycle of life in this same process.  Zap it, if possible.
00234          * This probably shouldn't fail, but if it does, assume the sema set
00235          * belongs to someone else after all, and continue quietly.
00236          */
00237         semun.val = 0;          /* unused, but keep compiler quiet */
00238         if (semctl(semId, 0, IPC_RMID, semun) < 0)
00239             continue;
00240 
00241         /*
00242          * Now try again to create the sema set.
00243          */
00244         semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
00245         if (semId >= 0)
00246             break;              /* successful create */
00247 
00248         /*
00249          * Can only get here if some other process managed to create the same
00250          * sema key before we did.  Let him have that one, loop around to try
00251          * next key.
00252          */
00253     }
00254 
00255     /*
00256      * OK, we created a new sema set.  Mark it as created by this process. We
00257      * do this by setting the spare semaphore to PGSemaMagic-1 and then
00258      * incrementing it with semop().  That leaves it with value PGSemaMagic
00259      * and sempid referencing this process.
00260      */
00261     IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
00262     mysema.semId = semId;
00263     mysema.semNum = numSems;
00264     PGSemaphoreUnlock(&mysema);
00265 
00266     return semId;
00267 }
00268 
00269 
00270 /*
00271  * PGReserveSemaphores --- initialize semaphore support
00272  *
00273  * This is called during postmaster start or shared memory reinitialization.
00274  * It should do whatever is needed to be able to support up to maxSemas
00275  * subsequent PGSemaphoreCreate calls.  Also, if any system resources
00276  * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
00277  * callback to release them.
00278  *
00279  * The port number is passed for possible use as a key (for SysV, we use
00280  * it to generate the starting semaphore key).  In a standalone backend,
00281  * zero will be passed.
00282  *
00283  * In the SysV implementation, we acquire semaphore sets on-demand; the
00284  * maxSemas parameter is just used to size the array that keeps track of
00285  * acquired sets for subsequent releasing.
00286  */
00287 void
00288 PGReserveSemaphores(int maxSemas, int port)
00289 {
00290     maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
00291     mySemaSets = (IpcSemaphoreId *)
00292         malloc(maxSemaSets * sizeof(IpcSemaphoreId));
00293     if (mySemaSets == NULL)
00294         elog(PANIC, "out of memory");
00295     numSemaSets = 0;
00296     nextSemaKey = port * 1000;
00297     nextSemaNumber = SEMAS_PER_SET;     /* force sema set alloc on 1st call */
00298 
00299     on_shmem_exit(ReleaseSemaphores, 0);
00300 }
00301 
00302 /*
00303  * Release semaphores at shutdown or shmem reinitialization
00304  *
00305  * (called as an on_shmem_exit callback, hence funny argument list)
00306  */
00307 static void
00308 ReleaseSemaphores(int status, Datum arg)
00309 {
00310     int         i;
00311 
00312     for (i = 0; i < numSemaSets; i++)
00313         IpcSemaphoreKill(mySemaSets[i]);
00314     free(mySemaSets);
00315 }
00316 
00317 /*
00318  * PGSemaphoreCreate
00319  *
00320  * Initialize a PGSemaphore structure to represent a sema with count 1
00321  */
00322 void
00323 PGSemaphoreCreate(PGSemaphore sema)
00324 {
00325     /* Can't do this in a backend, because static state is postmaster's */
00326     Assert(!IsUnderPostmaster);
00327 
00328     if (nextSemaNumber >= SEMAS_PER_SET)
00329     {
00330         /* Time to allocate another semaphore set */
00331         if (numSemaSets >= maxSemaSets)
00332             elog(PANIC, "too many semaphores created");
00333         mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
00334         numSemaSets++;
00335         nextSemaNumber = 0;
00336     }
00337     /* Assign the next free semaphore in the current set */
00338     sema->semId = mySemaSets[numSemaSets - 1];
00339     sema->semNum = nextSemaNumber++;
00340     /* Initialize it to count 1 */
00341     IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
00342 }
00343 
00344 /*
00345  * PGSemaphoreReset
00346  *
00347  * Reset a previously-initialized PGSemaphore to have count 0
00348  */
00349 void
00350 PGSemaphoreReset(PGSemaphore sema)
00351 {
00352     IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
00353 }
00354 
00355 /*
00356  * PGSemaphoreLock
00357  *
00358  * Lock a semaphore (decrement count), blocking if count would be < 0
00359  */
00360 void
00361 PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
00362 {
00363     int         errStatus;
00364     struct sembuf sops;
00365 
00366     sops.sem_op = -1;           /* decrement */
00367     sops.sem_flg = 0;
00368     sops.sem_num = sema->semNum;
00369 
00370     /*
00371      * Note: if errStatus is -1 and errno == EINTR then it means we returned
00372      * from the operation prematurely because we were sent a signal.  So we
00373      * try and lock the semaphore again.
00374      *
00375      * Each time around the loop, we check for a cancel/die interrupt.  On
00376      * some platforms, if such an interrupt comes in while we are waiting, it
00377      * will cause the semop() call to exit with errno == EINTR, allowing us to
00378      * service the interrupt (if not in a critical section already) during the
00379      * next loop iteration.
00380      *
00381      * Once we acquire the lock, we do NOT check for an interrupt before
00382      * returning.  The caller needs to be able to record ownership of the lock
00383      * before any interrupt can be accepted.
00384      *
00385      * There is a window of a few instructions between CHECK_FOR_INTERRUPTS
00386      * and entering the semop() call.  If a cancel/die interrupt occurs in
00387      * that window, we would fail to notice it until after we acquire the lock
00388      * (or get another interrupt to escape the semop()).  We can avoid this
00389      * problem by temporarily setting ImmediateInterruptOK to true before we
00390      * do CHECK_FOR_INTERRUPTS; then, a die() interrupt in this interval will
00391      * execute directly.  However, there is a huge pitfall: there is another
00392      * window of a few instructions after the semop() before we are able to
00393      * reset ImmediateInterruptOK.  If an interrupt occurs then, we'll lose
00394      * control, which means that the lock has been acquired but our caller did
00395      * not get a chance to record the fact. Therefore, we only set
00396      * ImmediateInterruptOK if the caller tells us it's OK to do so, ie, the
00397      * caller does not need to record acquiring the lock.  (This is currently
00398      * true for lockmanager locks, since the process that granted us the lock
00399      * did all the necessary state updates. It's not true for SysV semaphores
00400      * used to implement LW locks or emulate spinlocks --- but the wait time
00401      * for such locks should not be very long, anyway.)
00402      *
00403      * On some platforms, signals marked SA_RESTART (which is most, for us)
00404      * will not interrupt the semop(); it will just keep waiting.  Therefore
00405      * it's necessary for cancel/die interrupts to be serviced directly by the
00406      * signal handler.  On these platforms the behavior is really the same
00407      * whether the signal arrives just before the semop() begins, or while it
00408      * is waiting.  The loop on EINTR is thus important only for other types
00409      * of interrupts.
00410      */
00411     do
00412     {
00413         ImmediateInterruptOK = interruptOK;
00414         CHECK_FOR_INTERRUPTS();
00415         errStatus = semop(sema->semId, &sops, 1);
00416         ImmediateInterruptOK = false;
00417     } while (errStatus < 0 && errno == EINTR);
00418 
00419     if (errStatus < 0)
00420         elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
00421 }
00422 
00423 /*
00424  * PGSemaphoreUnlock
00425  *
00426  * Unlock a semaphore (increment count)
00427  */
00428 void
00429 PGSemaphoreUnlock(PGSemaphore sema)
00430 {
00431     int         errStatus;
00432     struct sembuf sops;
00433 
00434     sops.sem_op = 1;            /* increment */
00435     sops.sem_flg = 0;
00436     sops.sem_num = sema->semNum;
00437 
00438     /*
00439      * Note: if errStatus is -1 and errno == EINTR then it means we returned
00440      * from the operation prematurely because we were sent a signal.  So we
00441      * try and unlock the semaphore again. Not clear this can really happen,
00442      * but might as well cope.
00443      */
00444     do
00445     {
00446         errStatus = semop(sema->semId, &sops, 1);
00447     } while (errStatus < 0 && errno == EINTR);
00448 
00449     if (errStatus < 0)
00450         elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
00451 }
00452 
00453 /*
00454  * PGSemaphoreTryLock
00455  *
00456  * Lock a semaphore only if able to do so without blocking
00457  */
00458 bool
00459 PGSemaphoreTryLock(PGSemaphore sema)
00460 {
00461     int         errStatus;
00462     struct sembuf sops;
00463 
00464     sops.sem_op = -1;           /* decrement */
00465     sops.sem_flg = IPC_NOWAIT;  /* but don't block */
00466     sops.sem_num = sema->semNum;
00467 
00468     /*
00469      * Note: if errStatus is -1 and errno == EINTR then it means we returned
00470      * from the operation prematurely because we were sent a signal.  So we
00471      * try and lock the semaphore again.
00472      */
00473     do
00474     {
00475         errStatus = semop(sema->semId, &sops, 1);
00476     } while (errStatus < 0 && errno == EINTR);
00477 
00478     if (errStatus < 0)
00479     {
00480         /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
00481 #ifdef EAGAIN
00482         if (errno == EAGAIN)
00483             return false;       /* failed to lock it */
00484 #endif
00485 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
00486         if (errno == EWOULDBLOCK)
00487             return false;       /* failed to lock it */
00488 #endif
00489         /* Otherwise we got trouble */
00490         elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
00491     }
00492 
00493     return true;
00494 }