00001 /*------------------------------------------------------------------------- 00002 * 00003 * barrier.h 00004 * Memory barrier operations. 00005 * 00006 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00007 * Portions Copyright (c) 1994, Regents of the University of California 00008 * 00009 * src/include/storage/barrier.h 00010 * 00011 *------------------------------------------------------------------------- 00012 */ 00013 #ifndef BARRIER_H 00014 #define BARRIER_H 00015 00016 #include "storage/s_lock.h" 00017 00018 extern slock_t dummy_spinlock; 00019 00020 /* 00021 * A compiler barrier need not (and preferably should not) emit any actual 00022 * machine code, but must act as an optimization fence: the compiler must not 00023 * reorder loads or stores to main memory around the barrier. However, the 00024 * CPU may still reorder loads or stores at runtime, if the architecture's 00025 * memory model permits this. 00026 * 00027 * A memory barrier must act as a compiler barrier, and in addition must 00028 * guarantee that all loads and stores issued prior to the barrier are 00029 * completed before any loads or stores issued after the barrier. Unless 00030 * loads and stores are totally ordered (which is not the case on most 00031 * architectures) this requires issuing some sort of memory fencing 00032 * instruction. 00033 * 00034 * A read barrier must act as a compiler barrier, and in addition must 00035 * guarantee that any loads issued prior to the barrier are completed before 00036 * any loads issued after the barrier. Similarly, a write barrier acts 00037 * as a compiler barrier, and also orders stores. Read and write barriers 00038 * are thus weaker than a full memory barrier, but stronger than a compiler 00039 * barrier. In practice, on machines with strong memory ordering, read and 00040 * write barriers may require nothing more than a compiler barrier. 00041 * 00042 * For an introduction to using memory barriers within the PostgreSQL backend, 00043 * see src/backend/storage/lmgr/README.barrier 00044 */ 00045 00046 #if defined(DISABLE_BARRIERS) 00047 00048 /* 00049 * Fall through to the spinlock-based implementation. 00050 */ 00051 #elif defined(__INTEL_COMPILER) 00052 00053 /* 00054 * icc defines __GNUC__, but doesn't support gcc's inline asm syntax 00055 */ 00056 #define pg_memory_barrier() _mm_mfence() 00057 #define pg_compiler_barrier() __memory_barrier() 00058 #elif defined(__GNUC__) 00059 00060 /* This works on any architecture, since it's only talking to GCC itself. */ 00061 #define pg_compiler_barrier() __asm__ __volatile__("" : : : "memory") 00062 00063 #if defined(__i386__) 00064 00065 /* 00066 * i386 does not allow loads to be reordered with other loads, or stores to be 00067 * reordered with other stores, but a load can be performed before a subsequent 00068 * store. 00069 * 00070 * "lock; addl" has worked for longer than "mfence". 00071 */ 00072 #define pg_memory_barrier() \ 00073 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory") 00074 #define pg_read_barrier() pg_compiler_barrier() 00075 #define pg_write_barrier() pg_compiler_barrier() 00076 #elif defined(__x86_64__) /* 64 bit x86 */ 00077 00078 /* 00079 * x86_64 has similar ordering characteristics to i386. 00080 * 00081 * Technically, some x86-ish chips support uncached memory access and/or 00082 * special instructions that are weakly ordered. In those cases we'd need 00083 * the read and write barriers to be lfence and sfence. But since we don't 00084 * do those things, a compiler barrier should be enough. 00085 */ 00086 #define pg_memory_barrier() \ 00087 __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory") 00088 #define pg_read_barrier() pg_compiler_barrier() 00089 #define pg_write_barrier() pg_compiler_barrier() 00090 #elif defined(__ia64__) || defined(__ia64) 00091 00092 /* 00093 * Itanium is weakly ordered, so read and write barriers require a full 00094 * fence. 00095 */ 00096 #define pg_memory_barrier() __asm__ __volatile__ ("mf" : : : "memory") 00097 #elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) 00098 00099 /* 00100 * lwsync orders loads with respect to each other, and similarly with stores. 00101 * But a load can be performed before a subsequent store, so sync must be used 00102 * for a full memory barrier. 00103 */ 00104 #define pg_memory_barrier() __asm__ __volatile__ ("sync" : : : "memory") 00105 #define pg_read_barrier() __asm__ __volatile__ ("lwsync" : : : "memory") 00106 #define pg_write_barrier() __asm__ __volatile__ ("lwsync" : : : "memory") 00107 #elif defined(__alpha) || defined(__alpha__) /* Alpha */ 00108 00109 /* 00110 * Unlike all other known architectures, Alpha allows dependent reads to be 00111 * reordered, but we don't currently find it necessary to provide a conditional 00112 * read barrier to cover that case. We might need to add that later. 00113 */ 00114 #define pg_memory_barrier() __asm__ __volatile__ ("mb" : : : "memory") 00115 #define pg_read_barrier() __asm__ __volatile__ ("rmb" : : : "memory") 00116 #define pg_write_barrier() __asm__ __volatile__ ("wmb" : : : "memory") 00117 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) 00118 00119 /* 00120 * If we're on GCC 4.1.0 or higher, we should be able to get a memory 00121 * barrier out of this compiler built-in. But we prefer to rely on our 00122 * own definitions where possible, and use this only as a fallback. 00123 */ 00124 #define pg_memory_barrier() __sync_synchronize() 00125 #endif 00126 #elif defined(__ia64__) || defined(__ia64) 00127 00128 #define pg_compiler_barrier() _Asm_sched_fence() 00129 #define pg_memory_barrier() _Asm_mf() 00130 #elif defined(WIN32_ONLY_COMPILER) 00131 00132 /* Should work on both MSVC and Borland. */ 00133 #include <intrin.h> 00134 #pragma intrinsic(_ReadWriteBarrier) 00135 #define pg_compiler_barrier() _ReadWriteBarrier() 00136 #define pg_memory_barrier() MemoryBarrier() 00137 #endif 00138 00139 /* 00140 * If we have no memory barrier implementation for this architecture, we 00141 * fall back to acquiring and releasing a spinlock. This might, in turn, 00142 * fall back to the semaphore-based spinlock implementation, which will be 00143 * amazingly slow. 00144 * 00145 * It's not self-evident that every possible legal implementation of a 00146 * spinlock acquire-and-release would be equivalent to a full memory barrier. 00147 * For example, I'm not sure that Itanium's acq and rel add up to a full 00148 * fence. But all of our actual implementations seem OK in this regard. 00149 */ 00150 #if !defined(pg_memory_barrier) 00151 #define pg_memory_barrier(x) \ 00152 do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0) 00153 #endif 00154 00155 /* 00156 * If read or write barriers are undefined, we upgrade them to full memory 00157 * barriers. 00158 * 00159 * If a compiler barrier is unavailable, you probably don't want a full 00160 * memory barrier instead, so if you have a use case for a compiler barrier, 00161 * you'd better use #ifdef. 00162 */ 00163 #if !defined(pg_read_barrier) 00164 #define pg_read_barrier() pg_memory_barrier() 00165 #endif 00166 #if !defined(pg_write_barrier) 00167 #define pg_write_barrier() pg_memory_barrier() 00168 #endif 00169 00170 #endif /* BARRIER_H */