Header And Logo

PostgreSQL
| The world's most advanced open source database.

uavc.c

Go to the documentation of this file.
00001 /* -------------------------------------------------------------------------
00002  *
00003  * contrib/sepgsql/uavc.c
00004  *
00005  * Implementation of userspace access vector cache; that enables to cache
00006  * access control decisions recently used, and reduce number of kernel
00007  * invocations to avoid unnecessary performance hit.
00008  *
00009  * Copyright (c) 2011-2013, PostgreSQL Global Development Group
00010  *
00011  * -------------------------------------------------------------------------
00012  */
00013 #include "postgres.h"
00014 
00015 #include "access/hash.h"
00016 #include "catalog/pg_proc.h"
00017 #include "commands/seclabel.h"
00018 #include "storage/ipc.h"
00019 #include "utils/guc.h"
00020 #include "utils/memutils.h"
00021 
00022 #include "sepgsql.h"
00023 
00024 /*
00025  * avc_cache
00026  *
00027  * It enables to cache access control decision (and behavior on execution of
00028  * trusted procedure, db_procedure class only) for a particular pair of
00029  * security labels and object class in userspace.
00030  */
00031 typedef struct
00032 {
00033     uint32      hash;           /* hash value of this cache entry */
00034     char       *scontext;       /* security context of the subject */
00035     char       *tcontext;       /* security context of the target */
00036     uint16      tclass;         /* object class of the target */
00037 
00038     uint32      allowed;        /* permissions to be allowed */
00039     uint32      auditallow;     /* permissions to be audited on allowed */
00040     uint32      auditdeny;      /* permissions to be audited on denied */
00041 
00042     bool        permissive;     /* true, if permissive rule */
00043     bool        hot_cache;      /* true, if recently referenced */
00044     bool        tcontext_is_valid;
00045     /* true, if tcontext is valid */
00046     char       *ncontext;       /* temporary scontext on execution of trusted
00047                                  * procedure, or NULL elsewhere */
00048 }   avc_cache;
00049 
00050 /*
00051  * Declaration of static variables
00052  */
00053 #define AVC_NUM_SLOTS       512
00054 #define AVC_NUM_RECLAIM     16
00055 #define AVC_DEF_THRESHOLD   384
00056 
00057 static MemoryContext avc_mem_cxt;
00058 static List *avc_slots[AVC_NUM_SLOTS];  /* avc's hash buckets */
00059 static int  avc_num_caches;     /* number of caches currently used */
00060 static int  avc_lru_hint;       /* index of the buckets to be reclaimed next */
00061 static int  avc_threshold;      /* threshold to launch cache-reclaiming  */
00062 static char *avc_unlabeled;     /* system 'unlabeled' label */
00063 
00064 /*
00065  * Hash function
00066  */
00067 static uint32
00068 sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
00069 {
00070     return hash_any((const unsigned char *) scontext, strlen(scontext))
00071         ^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
00072         ^ tclass;
00073 }
00074 
00075 /*
00076  * Reset all the avc caches
00077  */
00078 static void
00079 sepgsql_avc_reset(void)
00080 {
00081     MemoryContextReset(avc_mem_cxt);
00082 
00083     memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
00084     avc_num_caches = 0;
00085     avc_lru_hint = 0;
00086     avc_unlabeled = NULL;
00087 }
00088 
00089 /*
00090  * Reclaim caches recently unreferenced
00091  */
00092 static void
00093 sepgsql_avc_reclaim(void)
00094 {
00095     ListCell   *cell;
00096     ListCell   *next;
00097     ListCell   *prev;
00098     int         index;
00099 
00100     while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
00101     {
00102         index = avc_lru_hint;
00103 
00104         prev = NULL;
00105         for (cell = list_head(avc_slots[index]); cell; cell = next)
00106         {
00107             avc_cache  *cache = lfirst(cell);
00108 
00109             next = lnext(cell);
00110             if (!cache->hot_cache)
00111             {
00112                 avc_slots[index]
00113                     = list_delete_cell(avc_slots[index], cell, prev);
00114 
00115                 pfree(cache->scontext);
00116                 pfree(cache->tcontext);
00117                 if (cache->ncontext)
00118                     pfree(cache->ncontext);
00119                 pfree(cache);
00120 
00121                 avc_num_caches--;
00122             }
00123             else
00124             {
00125                 cache->hot_cache = false;
00126                 prev = cell;
00127             }
00128         }
00129         avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
00130     }
00131 }
00132 
00133 /* -------------------------------------------------------------------------
00134  *
00135  * sepgsql_avc_check_valid
00136  *
00137  * This function checks whether the cached entries are still valid.  If
00138  * the security policy has been reloaded (or any other events that requires
00139  * resetting userspace caches has occurred) since the last reference to
00140  * the access vector cache, we must flush the cache.
00141  *
00142  * Access control decisions must be atomic, but multiple system calls may
00143  * be required to make a decision; thus, when referencing the access vector
00144  * cache, we must loop until we complete without an intervening cache flush
00145  * event.  In practice, looping even once should be very rare.  Callers should
00146  * do something like this:
00147  *
00148  *   sepgsql_avc_check_valid();
00149  *   do {
00150  *           :
00151  *       <reference to uavc>
00152  *           :
00153  *   } while (!sepgsql_avc_check_valid())
00154  *
00155  * -------------------------------------------------------------------------
00156  */
00157 static bool
00158 sepgsql_avc_check_valid(void)
00159 {
00160     if (selinux_status_updated() > 0)
00161     {
00162         sepgsql_avc_reset();
00163 
00164         return false;
00165     }
00166     return true;
00167 }
00168 
00169 /*
00170  * sepgsql_avc_unlabeled
00171  *
00172  * Returns an alternative label to be applied when no label or an invalid
00173  * label would otherwise be assigned.
00174  */
00175 static char *
00176 sepgsql_avc_unlabeled(void)
00177 {
00178     if (!avc_unlabeled)
00179     {
00180         security_context_t unlabeled;
00181 
00182         if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
00183             ereport(ERROR,
00184                     (errcode(ERRCODE_INTERNAL_ERROR),
00185                errmsg("SELinux: failed to get initial security label: %m")));
00186         PG_TRY();
00187         {
00188             avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
00189         }
00190         PG_CATCH();
00191         {
00192             freecon(unlabeled);
00193             PG_RE_THROW();
00194         }
00195         PG_END_TRY();
00196 
00197         freecon(unlabeled);
00198     }
00199     return avc_unlabeled;
00200 }
00201 
00202 /*
00203  * sepgsql_avc_compute
00204  *
00205  * A fallback path, when cache mishit. It asks SELinux its access control
00206  * decision for the supplied pair of security context and object class.
00207  */
00208 static avc_cache *
00209 sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
00210 {
00211     char       *ucontext = NULL;
00212     char       *ncontext = NULL;
00213     MemoryContext oldctx;
00214     avc_cache  *cache;
00215     uint32      hash;
00216     int         index;
00217     struct av_decision avd;
00218 
00219     hash = sepgsql_avc_hash(scontext, tcontext, tclass);
00220     index = hash % AVC_NUM_SLOTS;
00221 
00222     /*
00223      * Validation check of the supplied security context. Because it always
00224      * invoke system-call, frequent check should be avoided. Unless security
00225      * policy is reloaded, validation status shall be kept, so we also cache
00226      * whether the supplied security context was valid, or not.
00227      */
00228     if (security_check_context_raw((security_context_t) tcontext) != 0)
00229         ucontext = sepgsql_avc_unlabeled();
00230 
00231     /*
00232      * Ask SELinux its access control decision
00233      */
00234     if (!ucontext)
00235         sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
00236     else
00237         sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
00238 
00239     /*
00240      * It also caches a security label to be switched when a client labeled as
00241      * 'scontext' executes a procedure labeled as 'tcontext', not only access
00242      * control decision on the procedure. The security label to be switched
00243      * shall be computed uniquely on a pair of 'scontext' and 'tcontext',
00244      * thus, it is reasonable to cache the new label on avc, and enables to
00245      * reduce unnecessary system calls. It shall be referenced at
00246      * sepgsql_needs_fmgr_hook to check whether the supplied function is a
00247      * trusted procedure, or not.
00248      */
00249     if (tclass == SEPG_CLASS_DB_PROCEDURE)
00250     {
00251         if (!ucontext)
00252             ncontext = sepgsql_compute_create(scontext, tcontext,
00253                                               SEPG_CLASS_PROCESS, NULL);
00254         else
00255             ncontext = sepgsql_compute_create(scontext, ucontext,
00256                                               SEPG_CLASS_PROCESS, NULL);
00257         if (strcmp(scontext, ncontext) == 0)
00258         {
00259             pfree(ncontext);
00260             ncontext = NULL;
00261         }
00262     }
00263 
00264     /*
00265      * Set up an avc_cache object
00266      */
00267     oldctx = MemoryContextSwitchTo(avc_mem_cxt);
00268 
00269     cache = palloc0(sizeof(avc_cache));
00270 
00271     cache->hash = hash;
00272     cache->scontext = pstrdup(scontext);
00273     cache->tcontext = pstrdup(tcontext);
00274     cache->tclass = tclass;
00275 
00276     cache->allowed = avd.allowed;
00277     cache->auditallow = avd.auditallow;
00278     cache->auditdeny = avd.auditdeny;
00279     cache->hot_cache = true;
00280     if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
00281         cache->permissive = true;
00282     if (!ucontext)
00283         cache->tcontext_is_valid = true;
00284     if (ncontext)
00285         cache->ncontext = pstrdup(ncontext);
00286 
00287     avc_num_caches++;
00288 
00289     if (avc_num_caches > avc_threshold)
00290         sepgsql_avc_reclaim();
00291 
00292     avc_slots[index] = lcons(cache, avc_slots[index]);
00293 
00294     MemoryContextSwitchTo(oldctx);
00295 
00296     return cache;
00297 }
00298 
00299 /*
00300  * sepgsql_avc_lookup
00301  *
00302  * Look up a cache entry that matches the supplied security contexts and
00303  * object class.  If not found, create a new cache entry.
00304  */
00305 static avc_cache *
00306 sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
00307 {
00308     avc_cache  *cache;
00309     ListCell   *cell;
00310     uint32      hash;
00311     int         index;
00312 
00313     hash = sepgsql_avc_hash(scontext, tcontext, tclass);
00314     index = hash % AVC_NUM_SLOTS;
00315 
00316     foreach(cell, avc_slots[index])
00317     {
00318         cache = lfirst(cell);
00319 
00320         if (cache->hash == hash &&
00321             cache->tclass == tclass &&
00322             strcmp(cache->tcontext, tcontext) == 0 &&
00323             strcmp(cache->scontext, scontext) == 0)
00324         {
00325             cache->hot_cache = true;
00326             return cache;
00327         }
00328     }
00329     /* not found, so insert a new cache */
00330     return sepgsql_avc_compute(scontext, tcontext, tclass);
00331 }
00332 
00333 /*
00334  * sepgsql_avc_check_perms(_label)
00335  *
00336  * It returns 'true', if the security policy suggested to allow the required
00337  * permissions. Otherwise, it returns 'false' or raises an error according
00338  * to the 'abort_on_violation' argument.
00339  * The 'tobject' and 'tclass' identify the target object being referenced,
00340  * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
00341  * object classes.
00342  * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
00343  * was supplied, it means to skip all the audit messages.
00344  */
00345 bool
00346 sepgsql_avc_check_perms_label(const char *tcontext,
00347                               uint16 tclass, uint32 required,
00348                               const char *audit_name,
00349                               bool abort_on_violation)
00350 {
00351     char       *scontext = sepgsql_get_client_label();
00352     avc_cache  *cache;
00353     uint32      denied;
00354     uint32      audited;
00355     bool        result;
00356 
00357     sepgsql_avc_check_valid();
00358     do
00359     {
00360         result = true;
00361 
00362         /*
00363          * If the target object is unlabeled, we perform the check using the
00364          * label supplied by sepgsql_avc_unlabeled().
00365          */
00366         if (tcontext)
00367             cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
00368         else
00369             cache = sepgsql_avc_lookup(scontext,
00370                                        sepgsql_avc_unlabeled(), tclass);
00371 
00372         denied = required & ~cache->allowed;
00373 
00374         /*
00375          * Compute permissions to be audited
00376          */
00377         if (sepgsql_get_debug_audit())
00378             audited = (denied ? (denied & ~0) : (required & ~0));
00379         else
00380             audited = denied ? (denied & cache->auditdeny)
00381                 : (required & cache->auditallow);
00382 
00383         if (denied)
00384         {
00385             /*
00386              * In permissive mode or permissive domain, violated permissions
00387              * shall be audited to the log files at once, and then implicitly
00388              * allowed to avoid a flood of access denied logs, because the
00389              * purpose of permissive mode/domain is to collect a violation log
00390              * that will make it possible to fix up the security policy.
00391              */
00392             if (!sepgsql_getenforce() || cache->permissive)
00393                 cache->allowed |= required;
00394             else
00395                 result = false;
00396         }
00397     } while (!sepgsql_avc_check_valid());
00398 
00399     /*
00400      * In the case when we have something auditable actions here,
00401      * sepgsql_audit_log shall be called with text representation of security
00402      * labels for both of subject and object. It records this access
00403      * violation, so DBA will be able to find out unexpected security problems
00404      * later.
00405      */
00406     if (audited != 0 &&
00407         audit_name != SEPGSQL_AVC_NOAUDIT &&
00408         sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
00409     {
00410         sepgsql_audit_log(!!denied,
00411                           cache->scontext,
00412                           cache->tcontext_is_valid ?
00413                           cache->tcontext : sepgsql_avc_unlabeled(),
00414                           cache->tclass,
00415                           audited,
00416                           audit_name);
00417     }
00418 
00419     if (abort_on_violation && !result)
00420         ereport(ERROR,
00421                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00422                  errmsg("SELinux: security policy violation")));
00423 
00424     return result;
00425 }
00426 
00427 bool
00428 sepgsql_avc_check_perms(const ObjectAddress *tobject,
00429                         uint16 tclass, uint32 required,
00430                         const char *audit_name,
00431                         bool abort_on_violation)
00432 {
00433     char       *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
00434     bool        rc;
00435 
00436     rc = sepgsql_avc_check_perms_label(tcontext,
00437                                        tclass, required,
00438                                        audit_name, abort_on_violation);
00439     if (tcontext)
00440         pfree(tcontext);
00441 
00442     return rc;
00443 }
00444 
00445 /*
00446  * sepgsql_avc_trusted_proc
00447  *
00448  * If the supplied function OID is configured as a trusted procedure, this
00449  * function will return a security label to be used during the execution of
00450  * that function.  Otherwise, it returns NULL.
00451  */
00452 char *
00453 sepgsql_avc_trusted_proc(Oid functionId)
00454 {
00455     char       *scontext = sepgsql_get_client_label();
00456     char       *tcontext;
00457     ObjectAddress tobject;
00458     avc_cache  *cache;
00459 
00460     tobject.classId = ProcedureRelationId;
00461     tobject.objectId = functionId;
00462     tobject.objectSubId = 0;
00463     tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
00464 
00465     sepgsql_avc_check_valid();
00466     do
00467     {
00468         if (tcontext)
00469             cache = sepgsql_avc_lookup(scontext, tcontext,
00470                                        SEPG_CLASS_DB_PROCEDURE);
00471         else
00472             cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
00473                                        SEPG_CLASS_DB_PROCEDURE);
00474     } while (!sepgsql_avc_check_valid());
00475 
00476     return cache->ncontext;
00477 }
00478 
00479 /*
00480  * sepgsql_avc_exit
00481  *
00482  * Clean up userspace AVC on process exit.
00483  */
00484 static void
00485 sepgsql_avc_exit(int code, Datum arg)
00486 {
00487     selinux_status_close();
00488 }
00489 
00490 /*
00491  * sepgsql_avc_init
00492  *
00493  * Initialize the userspace AVC.  This should be called from _PG_init.
00494  */
00495 void
00496 sepgsql_avc_init(void)
00497 {
00498     int         rc;
00499 
00500     /*
00501      * All the avc stuff shall be allocated on avc_mem_cxt
00502      */
00503     avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
00504                                         "userspace access vector cache",
00505                                         ALLOCSET_DEFAULT_MINSIZE,
00506                                         ALLOCSET_DEFAULT_INITSIZE,
00507                                         ALLOCSET_DEFAULT_MAXSIZE);
00508     memset(avc_slots, 0, sizeof(avc_slots));
00509     avc_num_caches = 0;
00510     avc_lru_hint = 0;
00511     avc_threshold = AVC_DEF_THRESHOLD;
00512 
00513     /*
00514      * SELinux allows to mmap(2) its kernel status page in read-only mode to
00515      * inform userspace applications its status updating (such as policy
00516      * reloading) without system-call invocations. This feature is only
00517      * supported in Linux-2.6.38 or later, however, libselinux provides a
00518      * fallback mode to know its status using netlink sockets.
00519      */
00520     rc = selinux_status_open(1);
00521     if (rc < 0)
00522         ereport(ERROR,
00523                 (errcode(ERRCODE_INTERNAL_ERROR),
00524                  errmsg("SELinux: could not open selinux status : %m")));
00525     else if (rc > 0)
00526         ereport(LOG,
00527                 (errmsg("SELinux: kernel status page uses fallback mode")));
00528 
00529     /* Arrange to close selinux status page on process exit. */
00530     on_proc_exit(sepgsql_avc_exit, 0);
00531 }