Header And Logo

PostgreSQL
| The world's most advanced open source database.

pg_test_fsync.c

Go to the documentation of this file.
00001 /*
00002  *  pg_test_fsync.c
00003  *      tests all supported fsync() methods
00004  */
00005 
00006 #include "postgres_fe.h"
00007 
00008 #include <sys/stat.h>
00009 #include <sys/time.h>
00010 #include <time.h>
00011 #include <unistd.h>
00012 #include <signal.h>
00013 
00014 #include "getopt_long.h"
00015 #include "access/xlogdefs.h"
00016 
00017 
00018 /*
00019  * put the temp files in the local directory
00020  * unless the user specifies otherwise
00021  */
00022 #define FSYNC_FILENAME  "./pg_test_fsync.out"
00023 
00024 #define XLOG_BLCKSZ_K   (XLOG_BLCKSZ / 1024)
00025 
00026 #define LABEL_FORMAT        "        %-32s"
00027 #define NA_FORMAT           "%18s"
00028 #define OPS_FORMAT          "%9.3f ops/sec  %6.0f usecs/op"
00029 #define USECS_SEC           1000000
00030 
00031 /* These are macros to avoid timing the function call overhead. */
00032 #ifndef WIN32
00033 #define START_TIMER \
00034 do { \
00035     alarm_triggered = false; \
00036     alarm(secs_per_test); \
00037     gettimeofday(&start_t, NULL); \
00038 } while (0)
00039 #else
00040 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
00041 #define START_TIMER \
00042 do { \
00043     alarm_triggered = false; \
00044     if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
00045         INVALID_HANDLE_VALUE) \
00046     { \
00047         fprintf(stderr, "Cannot create thread for alarm\n"); \
00048         exit(1); \
00049     } \
00050     gettimeofday(&start_t, NULL); \
00051 } while (0)
00052 #endif
00053 
00054 #define STOP_TIMER  \
00055 do { \
00056     gettimeofday(&stop_t, NULL); \
00057     print_elapse(start_t, stop_t, ops); \
00058 } while (0)
00059 
00060 
00061 static const char *progname;
00062 
00063 static int  secs_per_test = 5;
00064 static int  needs_unlink = 0;
00065 static char full_buf[XLOG_SEG_SIZE],
00066            *buf,
00067            *filename = FSYNC_FILENAME;
00068 static struct timeval start_t,
00069             stop_t;
00070 static bool alarm_triggered = false;
00071 
00072 
00073 static void handle_args(int argc, char *argv[]);
00074 static void prepare_buf(void);
00075 static void test_open(void);
00076 static void test_non_sync(void);
00077 static void test_sync(int writes_per_op);
00078 static void test_open_syncs(void);
00079 static void test_open_sync(const char *msg, int writes_size);
00080 static void test_file_descriptor_sync(void);
00081 
00082 #ifndef WIN32
00083 static void process_alarm(int sig);
00084 #else
00085 static DWORD WINAPI process_alarm(LPVOID param);
00086 #endif
00087 static void signal_cleanup(int sig);
00088 
00089 #ifdef HAVE_FSYNC_WRITETHROUGH
00090 static int  pg_fsync_writethrough(int fd);
00091 #endif
00092 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
00093 static void die(const char *str);
00094 
00095 
00096 int
00097 main(int argc, char *argv[])
00098 {
00099     progname = get_progname(argv[0]);
00100 
00101     handle_args(argc, argv);
00102 
00103     /* Prevent leaving behind the test file */
00104     pqsignal(SIGINT, signal_cleanup);
00105     pqsignal(SIGTERM, signal_cleanup);
00106 #ifndef WIN32
00107     pqsignal(SIGALRM, process_alarm);
00108 #endif
00109 #ifdef SIGHUP
00110     /* Not defined on win32 */
00111     pqsignal(SIGHUP, signal_cleanup);
00112 #endif
00113 
00114     prepare_buf();
00115 
00116     test_open();
00117 
00118     /* Test using 1 XLOG_BLCKSZ write */
00119     test_sync(1);
00120 
00121     /* Test using 2 XLOG_BLCKSZ writes */
00122     test_sync(2);
00123 
00124     test_open_syncs();
00125 
00126     test_file_descriptor_sync();
00127 
00128     test_non_sync();
00129 
00130     unlink(filename);
00131 
00132     return 0;
00133 }
00134 
00135 static void
00136 handle_args(int argc, char *argv[])
00137 {
00138     static struct option long_options[] = {
00139         {"filename", required_argument, NULL, 'f'},
00140         {"secs-per-test", required_argument, NULL, 's'},
00141         {NULL, 0, NULL, 0}
00142     };
00143 
00144     int         option;         /* Command line option */
00145     int         optindex = 0;   /* used by getopt_long */
00146 
00147     if (argc > 1)
00148     {
00149         if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 ||
00150             strcmp(argv[1], "-?") == 0)
00151         {
00152             printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname);
00153             exit(0);
00154         }
00155         if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
00156         {
00157             puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
00158             exit(0);
00159         }
00160     }
00161 
00162     while ((option = getopt_long(argc, argv, "f:s:",
00163                                  long_options, &optindex)) != -1)
00164     {
00165         switch (option)
00166         {
00167             case 'f':
00168                 filename = strdup(optarg);
00169                 break;
00170 
00171             case 's':
00172                 secs_per_test = atoi(optarg);
00173                 break;
00174 
00175             default:
00176                 fprintf(stderr, "Try \"%s --help\" for more information.\n",
00177                         progname);
00178                 exit(1);
00179                 break;
00180         }
00181     }
00182 
00183     if (argc > optind)
00184     {
00185         fprintf(stderr,
00186                 "%s: too many command-line arguments (first is \"%s\")\n",
00187                 progname, argv[optind]);
00188         fprintf(stderr, "Try \"%s --help\" for more information.\n",
00189                 progname);
00190         exit(1);
00191     }
00192 
00193     printf("%d seconds per test\n", secs_per_test);
00194 #if PG_O_DIRECT != 0
00195     printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n");
00196 #else
00197     printf("Direct I/O is not supported on this platform.\n");
00198 #endif
00199 }
00200 
00201 static void
00202 prepare_buf(void)
00203 {
00204     int         ops;
00205 
00206     /* write random data into buffer */
00207     for (ops = 0; ops < XLOG_SEG_SIZE; ops++)
00208         full_buf[ops] = random();
00209 
00210     buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf);
00211 }
00212 
00213 static void
00214 test_open(void)
00215 {
00216     int         tmpfile;
00217 
00218     /*
00219      * test if we can open the target file
00220      */
00221     if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
00222         die("could not open output file");
00223     needs_unlink = 1;
00224     if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
00225         die("write failed");
00226 
00227     /* fsync now so that dirty buffers don't skew later tests */
00228     if (fsync(tmpfile) != 0)
00229         die("fsync failed");
00230 
00231     close(tmpfile);
00232 }
00233 
00234 static void
00235 test_sync(int writes_per_op)
00236 {
00237     int         tmpfile,
00238                 ops,
00239                 writes;
00240     bool        fs_warning = false;
00241 
00242     if (writes_per_op == 1)
00243         printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K);
00244     else
00245         printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K);
00246     printf("(in wal_sync_method preference order, except fdatasync\n");
00247     printf("is Linux's default)\n");
00248 
00249     /*
00250      * Test open_datasync if available
00251      */
00252     printf(LABEL_FORMAT, "open_datasync");
00253     fflush(stdout);
00254 
00255 #ifdef OPEN_DATASYNC_FLAG
00256     if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
00257     {
00258         printf(NA_FORMAT, "n/a*\n");
00259         fs_warning = true;
00260     }
00261     else
00262     {
00263         if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
00264             die("could not open output file");
00265         START_TIMER;
00266         for (ops = 0; alarm_triggered == false; ops++)
00267         {
00268             for (writes = 0; writes < writes_per_op; writes++)
00269                 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00270                     die("write failed");
00271             if (lseek(tmpfile, 0, SEEK_SET) == -1)
00272                 die("seek failed");
00273         }
00274         STOP_TIMER;
00275         close(tmpfile);
00276     }
00277 #else
00278     printf(NA_FORMAT, "n/a\n");
00279 #endif
00280 
00281 /*
00282  * Test fdatasync if available
00283  */
00284     printf(LABEL_FORMAT, "fdatasync");
00285     fflush(stdout);
00286 
00287 #ifdef HAVE_FDATASYNC
00288     if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00289         die("could not open output file");
00290     START_TIMER;
00291     for (ops = 0; alarm_triggered == false; ops++)
00292     {
00293         for (writes = 0; writes < writes_per_op; writes++)
00294             if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00295                 die("write failed");
00296         fdatasync(tmpfile);
00297         if (lseek(tmpfile, 0, SEEK_SET) == -1)
00298             die("seek failed");
00299     }
00300     STOP_TIMER;
00301     close(tmpfile);
00302 #else
00303     printf(NA_FORMAT, "n/a\n");
00304 #endif
00305 
00306 /*
00307  * Test fsync
00308  */
00309     printf(LABEL_FORMAT, "fsync");
00310     fflush(stdout);
00311 
00312     if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00313         die("could not open output file");
00314     START_TIMER;
00315     for (ops = 0; alarm_triggered == false; ops++)
00316     {
00317         for (writes = 0; writes < writes_per_op; writes++)
00318             if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00319                 die("write failed");
00320         if (fsync(tmpfile) != 0)
00321             die("fsync failed");
00322         if (lseek(tmpfile, 0, SEEK_SET) == -1)
00323             die("seek failed");
00324     }
00325     STOP_TIMER;
00326     close(tmpfile);
00327 
00328 /*
00329  * If fsync_writethrough is available, test as well
00330  */
00331     printf(LABEL_FORMAT, "fsync_writethrough");
00332     fflush(stdout);
00333 
00334 #ifdef HAVE_FSYNC_WRITETHROUGH
00335     if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00336         die("could not open output file");
00337     START_TIMER;
00338     for (ops = 0; alarm_triggered == false; ops++)
00339     {
00340         for (writes = 0; writes < writes_per_op; writes++)
00341             if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00342                 die("write failed");
00343         if (pg_fsync_writethrough(tmpfile) != 0)
00344             die("fsync failed");
00345         if (lseek(tmpfile, 0, SEEK_SET) == -1)
00346             die("seek failed");
00347     }
00348     STOP_TIMER;
00349     close(tmpfile);
00350 #else
00351     printf(NA_FORMAT, "n/a\n");
00352 #endif
00353 
00354 /*
00355  * Test open_sync if available
00356  */
00357     printf(LABEL_FORMAT, "open_sync");
00358     fflush(stdout);
00359 
00360 #ifdef OPEN_SYNC_FLAG
00361     if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
00362     {
00363         printf(NA_FORMAT, "n/a*\n");
00364         fs_warning = true;
00365     }
00366     else
00367     {
00368         START_TIMER;
00369         for (ops = 0; alarm_triggered == false; ops++)
00370         {
00371             for (writes = 0; writes < writes_per_op; writes++)
00372                 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00373                     die("write failed");
00374             if (lseek(tmpfile, 0, SEEK_SET) == -1)
00375                 die("seek failed");
00376         }
00377         STOP_TIMER;
00378         close(tmpfile);
00379     }
00380 #else
00381     printf(NA_FORMAT, "n/a\n");
00382 #endif
00383 
00384     if (fs_warning)
00385     {
00386         printf("* This file system and its mount options do not support direct\n");
00387         printf("I/O, e.g. ext4 in journaled mode.\n");
00388     }
00389 }
00390 
00391 static void
00392 test_open_syncs(void)
00393 {
00394     printf("\nCompare open_sync with different write sizes:\n");
00395     printf("(This is designed to compare the cost of writing 16kB\n");
00396     printf("in different write open_sync sizes.)\n");
00397 
00398     test_open_sync(" 1 * 16kB open_sync write", 16);
00399     test_open_sync(" 2 *  8kB open_sync writes", 8);
00400     test_open_sync(" 4 *  4kB open_sync writes", 4);
00401     test_open_sync(" 8 *  2kB open_sync writes", 2);
00402     test_open_sync("16 *  1kB open_sync writes", 1);
00403 }
00404 
00405 /*
00406  * Test open_sync with different size files
00407  */
00408 static void
00409 test_open_sync(const char *msg, int writes_size)
00410 {
00411 #ifdef OPEN_SYNC_FLAG
00412     int         tmpfile,
00413                 ops,
00414                 writes;
00415 #endif
00416 
00417     printf(LABEL_FORMAT, msg);
00418     fflush(stdout);
00419 
00420 #ifdef OPEN_SYNC_FLAG
00421     if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
00422         printf(NA_FORMAT, "n/a*\n");
00423     else
00424     {
00425         START_TIMER;
00426         for (ops = 0; alarm_triggered == false; ops++)
00427         {
00428             for (writes = 0; writes < 16 / writes_size; writes++)
00429                 if (write(tmpfile, buf, writes_size * 1024) !=
00430                     writes_size * 1024)
00431                     die("write failed");
00432             if (lseek(tmpfile, 0, SEEK_SET) == -1)
00433                 die("seek failed");
00434         }
00435         STOP_TIMER;
00436         close(tmpfile);
00437     }
00438 #else
00439     printf(NA_FORMAT, "n/a\n");
00440 #endif
00441 }
00442 
00443 static void
00444 test_file_descriptor_sync(void)
00445 {
00446     int         tmpfile,
00447                 ops;
00448 
00449     /*
00450      * Test whether fsync can sync data written on a different descriptor for
00451      * the same file.  This checks the efficiency of multi-process fsyncs
00452      * against the same file. Possibly this should be done with writethrough
00453      * on platforms which support it.
00454      */
00455     printf("\nTest if fsync on non-write file descriptor is honored:\n");
00456     printf("(If the times are similar, fsync() can sync data written\n");
00457     printf("on a different descriptor.)\n");
00458 
00459     /*
00460      * first write, fsync and close, which is the normal behavior without
00461      * multiple descriptors
00462      */
00463     printf(LABEL_FORMAT, "write, fsync, close");
00464     fflush(stdout);
00465 
00466     START_TIMER;
00467     for (ops = 0; alarm_triggered == false; ops++)
00468     {
00469         if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00470             die("could not open output file");
00471         if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00472             die("write failed");
00473         if (fsync(tmpfile) != 0)
00474             die("fsync failed");
00475         close(tmpfile);
00476 
00477         /*
00478          * open and close the file again to be consistent with the following
00479          * test
00480          */
00481         if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00482             die("could not open output file");
00483         close(tmpfile);
00484     }
00485     STOP_TIMER;
00486 
00487     /*
00488      * Now open, write, close, open again and fsync This simulates processes
00489      * fsyncing each other's writes.
00490      */
00491     printf(LABEL_FORMAT, "write, close, fsync");
00492     fflush(stdout);
00493 
00494     START_TIMER;
00495     for (ops = 0; alarm_triggered == false; ops++)
00496     {
00497         if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00498             die("could not open output file");
00499         if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00500             die("write failed");
00501         close(tmpfile);
00502         /* reopen file */
00503         if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00504             die("could not open output file");
00505         if (fsync(tmpfile) != 0)
00506             die("fsync failed");
00507         close(tmpfile);
00508     }
00509     STOP_TIMER;
00510 }
00511 
00512 static void
00513 test_non_sync(void)
00514 {
00515     int         tmpfile,
00516                 ops;
00517 
00518     /*
00519      * Test a simple write without fsync
00520      */
00521     printf("\nNon-Sync'ed %dkB writes:\n", XLOG_BLCKSZ_K);
00522     printf(LABEL_FORMAT, "write");
00523     fflush(stdout);
00524 
00525     START_TIMER;
00526     for (ops = 0; alarm_triggered == false; ops++)
00527     {
00528         if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
00529             die("could not open output file");
00530         if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
00531             die("write failed");
00532         close(tmpfile);
00533     }
00534     STOP_TIMER;
00535 }
00536 
00537 static void
00538 signal_cleanup(int signum)
00539 {
00540     /* Delete the file if it exists. Ignore errors */
00541     if (needs_unlink)
00542         unlink(filename);
00543     /* Finish incomplete line on stdout */
00544     puts("");
00545     exit(signum);
00546 }
00547 
00548 #ifdef HAVE_FSYNC_WRITETHROUGH
00549 
00550 static int
00551 pg_fsync_writethrough(int fd)
00552 {
00553 #ifdef WIN32
00554     return _commit(fd);
00555 #elif defined(F_FULLFSYNC)
00556     return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
00557 #else
00558     errno = ENOSYS;
00559     return -1;
00560 #endif
00561 }
00562 #endif
00563 
00564 /*
00565  * print out the writes per second for tests
00566  */
00567 static void
00568 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
00569 {
00570     double      total_time = (stop_t.tv_sec - start_t.tv_sec) +
00571     (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
00572     double      per_second = ops / total_time;
00573     double      avg_op_time_us = (total_time / ops) * USECS_SEC;
00574 
00575     printf(OPS_FORMAT "\n", per_second, avg_op_time_us);
00576 }
00577 
00578 #ifndef WIN32
00579 static void
00580 process_alarm(int sig)
00581 {
00582     alarm_triggered = true;
00583 }
00584 #else
00585 static DWORD WINAPI
00586 process_alarm(LPVOID param)
00587 {
00588     /* WIN32 doesn't support alarm, so we create a thread and sleep here */
00589     Sleep(secs_per_test * 1000);
00590     alarm_triggered = true;
00591     ExitThread(0);
00592 }
00593 #endif
00594 
00595 static void
00596 die(const char *str)
00597 {
00598     fprintf(stderr, "%s: %s\n", str, strerror(errno));
00599     exit(1);
00600 }