Header And Logo

PostgreSQL
| The world's most advanced open source database.

socket.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * socket.c
00004  *    Microsoft Windows Win32 Socket Functions
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  * IDENTIFICATION
00009  *    src/backend/port/win32/socket.c
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 
00014 #include "postgres.h"
00015 
00016 /*
00017  * Indicate if pgwin32_recv() and pgwin32_send() should operate
00018  * in non-blocking mode.
00019  *
00020  * Since the socket emulation layer always sets the actual socket to
00021  * non-blocking mode in order to be able to deliver signals, we must
00022  * specify this in a separate flag if we actually need non-blocking
00023  * operation.
00024  *
00025  * This flag changes the behaviour *globally* for all socket operations,
00026  * so it should only be set for very short periods of time.
00027  */
00028 int         pgwin32_noblock = 0;
00029 
00030 #undef socket
00031 #undef accept
00032 #undef connect
00033 #undef select
00034 #undef recv
00035 #undef send
00036 
00037 /*
00038  * Blocking socket functions implemented so they listen on both
00039  * the socket and the signal event, required for signal handling.
00040  */
00041 
00042 /*
00043  * Convert the last socket error code into errno
00044  */
00045 static void
00046 TranslateSocketError(void)
00047 {
00048     switch (WSAGetLastError())
00049     {
00050         case WSANOTINITIALISED:
00051         case WSAENETDOWN:
00052         case WSAEINPROGRESS:
00053         case WSAEINVAL:
00054         case WSAESOCKTNOSUPPORT:
00055         case WSAEFAULT:
00056         case WSAEINVALIDPROVIDER:
00057         case WSAEINVALIDPROCTABLE:
00058         case WSAEMSGSIZE:
00059             errno = EINVAL;
00060             break;
00061         case WSAEAFNOSUPPORT:
00062             errno = EAFNOSUPPORT;
00063             break;
00064         case WSAEMFILE:
00065             errno = EMFILE;
00066             break;
00067         case WSAENOBUFS:
00068             errno = ENOBUFS;
00069             break;
00070         case WSAEPROTONOSUPPORT:
00071         case WSAEPROTOTYPE:
00072             errno = EPROTONOSUPPORT;
00073             break;
00074         case WSAECONNREFUSED:
00075             errno = ECONNREFUSED;
00076             break;
00077         case WSAEINTR:
00078             errno = EINTR;
00079             break;
00080         case WSAENOTSOCK:
00081             errno = EBADFD;
00082             break;
00083         case WSAEOPNOTSUPP:
00084             errno = EOPNOTSUPP;
00085             break;
00086         case WSAEWOULDBLOCK:
00087             errno = EWOULDBLOCK;
00088             break;
00089         case WSAEACCES:
00090             errno = EACCES;
00091             break;
00092         case WSAENOTCONN:
00093         case WSAENETRESET:
00094         case WSAECONNRESET:
00095         case WSAESHUTDOWN:
00096         case WSAECONNABORTED:
00097         case WSAEDISCON:
00098             errno = ECONNREFUSED;       /* ENOTCONN? */
00099             break;
00100         default:
00101             ereport(NOTICE,
00102                     (errmsg_internal("unrecognized win32 socket error code: %d", WSAGetLastError())));
00103             errno = EINVAL;
00104     }
00105 }
00106 
00107 static int
00108 pgwin32_poll_signals(void)
00109 {
00110     if (UNBLOCKED_SIGNAL_QUEUE())
00111     {
00112         pgwin32_dispatch_queued_signals();
00113         errno = EINTR;
00114         return 1;
00115     }
00116     return 0;
00117 }
00118 
00119 static int
00120 isDataGram(SOCKET s)
00121 {
00122     int         type;
00123     int         typelen = sizeof(type);
00124 
00125     if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
00126         return 1;
00127 
00128     return (type == SOCK_DGRAM) ? 1 : 0;
00129 }
00130 
00131 int
00132 pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
00133 {
00134     static HANDLE waitevent = INVALID_HANDLE_VALUE;
00135     static SOCKET current_socket = -1;
00136     static int  isUDP = 0;
00137     HANDLE      events[2];
00138     int         r;
00139 
00140     /* Create an event object just once and use it on all future calls */
00141     if (waitevent == INVALID_HANDLE_VALUE)
00142     {
00143         waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
00144 
00145         if (waitevent == INVALID_HANDLE_VALUE)
00146             ereport(ERROR,
00147                     (errmsg_internal("could not create socket waiting event: error code %lu", GetLastError())));
00148     }
00149     else if (!ResetEvent(waitevent))
00150         ereport(ERROR,
00151                 (errmsg_internal("could not reset socket waiting event: error code %lu", GetLastError())));
00152 
00153     /*
00154      * Track whether socket is UDP or not.  (NB: most likely, this is both
00155      * useless and wrong; there is no reason to think that the behavior of
00156      * WSAEventSelect is different for TCP and UDP.)
00157      */
00158     if (current_socket != s)
00159         isUDP = isDataGram(s);
00160     current_socket = s;
00161 
00162     /*
00163      * Attach event to socket.  NOTE: we must detach it again before
00164      * returning, since other bits of code may try to attach other events to
00165      * the socket.
00166      */
00167     if (WSAEventSelect(s, waitevent, what) != 0)
00168     {
00169         TranslateSocketError();
00170         return 0;
00171     }
00172 
00173     events[0] = pgwin32_signal_event;
00174     events[1] = waitevent;
00175 
00176     /*
00177      * Just a workaround of unknown locking problem with writing in UDP socket
00178      * under high load: Client's pgsql backend sleeps infinitely in
00179      * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
00180      * So, we will wait with small timeout(0.1 sec) and if sockect is still
00181      * blocked, try WSASend (see comments in pgwin32_select) and wait again.
00182      */
00183     if ((what & FD_WRITE) && isUDP)
00184     {
00185         for (;;)
00186         {
00187             r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
00188 
00189             if (r == WAIT_TIMEOUT)
00190             {
00191                 char        c;
00192                 WSABUF      buf;
00193                 DWORD       sent;
00194 
00195                 buf.buf = &c;
00196                 buf.len = 0;
00197 
00198                 r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
00199                 if (r == 0)     /* Completed - means things are fine! */
00200                 {
00201                     WSAEventSelect(s, NULL, 0);
00202                     return 1;
00203                 }
00204                 else if (WSAGetLastError() != WSAEWOULDBLOCK)
00205                 {
00206                     TranslateSocketError();
00207                     WSAEventSelect(s, NULL, 0);
00208                     return 0;
00209                 }
00210             }
00211             else
00212                 break;
00213         }
00214     }
00215     else
00216         r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
00217 
00218     WSAEventSelect(s, NULL, 0);
00219 
00220     if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
00221     {
00222         pgwin32_dispatch_queued_signals();
00223         errno = EINTR;
00224         return 0;
00225     }
00226     if (r == WAIT_OBJECT_0 + 1)
00227         return 1;
00228     if (r == WAIT_TIMEOUT)
00229     {
00230         errno = EWOULDBLOCK;
00231         return 0;
00232     }
00233     ereport(ERROR,
00234             (errmsg_internal("unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
00235     return 0;
00236 }
00237 
00238 /*
00239  * Create a socket, setting it to overlapped and non-blocking
00240  */
00241 SOCKET
00242 pgwin32_socket(int af, int type, int protocol)
00243 {
00244     SOCKET      s;
00245     unsigned long on = 1;
00246 
00247     s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
00248     if (s == INVALID_SOCKET)
00249     {
00250         TranslateSocketError();
00251         return INVALID_SOCKET;
00252     }
00253 
00254     if (ioctlsocket(s, FIONBIO, &on))
00255     {
00256         TranslateSocketError();
00257         return INVALID_SOCKET;
00258     }
00259     errno = 0;
00260 
00261     return s;
00262 }
00263 
00264 
00265 SOCKET
00266 pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
00267 {
00268     SOCKET      rs;
00269 
00270     /*
00271      * Poll for signals, but don't return with EINTR, since we don't handle
00272      * that in pqcomm.c
00273      */
00274     pgwin32_poll_signals();
00275 
00276     rs = WSAAccept(s, addr, addrlen, NULL, 0);
00277     if (rs == INVALID_SOCKET)
00278     {
00279         TranslateSocketError();
00280         return INVALID_SOCKET;
00281     }
00282     return rs;
00283 }
00284 
00285 
00286 /* No signal delivery during connect. */
00287 int
00288 pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
00289 {
00290     int         r;
00291 
00292     r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
00293     if (r == 0)
00294         return 0;
00295 
00296     if (WSAGetLastError() != WSAEWOULDBLOCK)
00297     {
00298         TranslateSocketError();
00299         return -1;
00300     }
00301 
00302     while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
00303     {
00304         /* Loop endlessly as long as we are just delivering signals */
00305     }
00306 
00307     return 0;
00308 }
00309 
00310 int
00311 pgwin32_recv(SOCKET s, char *buf, int len, int f)
00312 {
00313     WSABUF      wbuf;
00314     int         r;
00315     DWORD       b;
00316     DWORD       flags = f;
00317     int         n;
00318 
00319     if (pgwin32_poll_signals())
00320         return -1;
00321 
00322     wbuf.len = len;
00323     wbuf.buf = buf;
00324 
00325     r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
00326     if (r != SOCKET_ERROR && b > 0)
00327         /* Read succeeded right away */
00328         return b;
00329 
00330     if (r == SOCKET_ERROR &&
00331         WSAGetLastError() != WSAEWOULDBLOCK)
00332     {
00333         TranslateSocketError();
00334         return -1;
00335     }
00336 
00337     if (pgwin32_noblock)
00338     {
00339         /*
00340          * No data received, and we are in "emulated non-blocking mode", so
00341          * return indicating that we'd block if we were to continue.
00342          */
00343         errno = EWOULDBLOCK;
00344         return -1;
00345     }
00346 
00347     /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
00348 
00349     for (n = 0; n < 5; n++)
00350     {
00351         if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
00352                                         INFINITE) == 0)
00353             return -1;          /* errno already set */
00354 
00355         r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
00356         if (r == SOCKET_ERROR)
00357         {
00358             if (WSAGetLastError() == WSAEWOULDBLOCK)
00359             {
00360                 /*
00361                  * There seem to be cases on win2k (at least) where WSARecv
00362                  * can return WSAEWOULDBLOCK even when
00363                  * pgwin32_waitforsinglesocket claims the socket is readable.
00364                  * In this case, just sleep for a moment and try again. We try
00365                  * up to 5 times - if it fails more than that it's not likely
00366                  * to ever come back.
00367                  */
00368                 pg_usleep(10000);
00369                 continue;
00370             }
00371             TranslateSocketError();
00372             return -1;
00373         }
00374         return b;
00375     }
00376     ereport(NOTICE,
00377       (errmsg_internal("could not read from ready socket (after retries)")));
00378     errno = EWOULDBLOCK;
00379     return -1;
00380 }
00381 
00382 /*
00383  * The second argument to send() is defined by SUS to be a "const void *"
00384  * and so we use the same signature here to keep compilers happy when
00385  * handling callers.
00386  *
00387  * But the buf member of a WSABUF struct is defined as "char *", so we cast
00388  * the second argument to that here when assigning it, also to keep compilers
00389  * happy.
00390  */
00391 
00392 int
00393 pgwin32_send(SOCKET s, const void *buf, int len, int flags)
00394 {
00395     WSABUF      wbuf;
00396     int         r;
00397     DWORD       b;
00398 
00399     if (pgwin32_poll_signals())
00400         return -1;
00401 
00402     wbuf.len = len;
00403     wbuf.buf = (char *) buf;
00404 
00405     /*
00406      * Readiness of socket to send data to UDP socket may be not true: socket
00407      * can become busy again! So loop until send or error occurs.
00408      */
00409     for (;;)
00410     {
00411         r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
00412         if (r != SOCKET_ERROR && b > 0)
00413             /* Write succeeded right away */
00414             return b;
00415 
00416         if (r == SOCKET_ERROR &&
00417             WSAGetLastError() != WSAEWOULDBLOCK)
00418         {
00419             TranslateSocketError();
00420             return -1;
00421         }
00422 
00423         if (pgwin32_noblock)
00424         {
00425             /*
00426              * No data sent, and we are in "emulated non-blocking mode", so
00427              * return indicating that we'd block if we were to continue.
00428              */
00429             errno = EWOULDBLOCK;
00430             return -1;
00431         }
00432 
00433         /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
00434 
00435         if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
00436             return -1;
00437     }
00438 
00439     return -1;
00440 }
00441 
00442 
00443 /*
00444  * Wait for activity on one or more sockets.
00445  * While waiting, allow signals to run
00446  *
00447  * NOTE! Currently does not implement exceptfds check,
00448  * since it is not used in postgresql!
00449  */
00450 int
00451 pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
00452 {
00453     WSAEVENT    events[FD_SETSIZE * 2]; /* worst case is readfds totally
00454                                          * different from writefds, so
00455                                          * 2*FD_SETSIZE sockets */
00456     SOCKET      sockets[FD_SETSIZE * 2];
00457     int         numevents = 0;
00458     int         i;
00459     int         r;
00460     DWORD       timeoutval = WSA_INFINITE;
00461     FD_SET      outreadfds;
00462     FD_SET      outwritefds;
00463     int         nummatches = 0;
00464 
00465     Assert(exceptfds == NULL);
00466 
00467     if (pgwin32_poll_signals())
00468         return -1;
00469 
00470     FD_ZERO(&outreadfds);
00471     FD_ZERO(&outwritefds);
00472 
00473     /*
00474      * Write FDs are different in the way that it is only flagged by
00475      * WSASelectEvent() if we have tried to write to them first. So try an
00476      * empty write
00477      */
00478     if (writefds)
00479     {
00480         for (i = 0; i < writefds->fd_count; i++)
00481         {
00482             char        c;
00483             WSABUF      buf;
00484             DWORD       sent;
00485 
00486             buf.buf = &c;
00487             buf.len = 0;
00488 
00489             r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
00490             if (r == 0)         /* Completed - means things are fine! */
00491                 FD_SET(writefds->fd_array[i], &outwritefds);
00492 
00493             else
00494             {                   /* Not completed */
00495                 if (WSAGetLastError() != WSAEWOULDBLOCK)
00496 
00497                     /*
00498                      * Not completed, and not just "would block", so an error
00499                      * occurred
00500                      */
00501                     FD_SET(writefds->fd_array[i], &outwritefds);
00502             }
00503         }
00504         if (outwritefds.fd_count > 0)
00505         {
00506             memcpy(writefds, &outwritefds, sizeof(fd_set));
00507             if (readfds)
00508                 FD_ZERO(readfds);
00509             return outwritefds.fd_count;
00510         }
00511     }
00512 
00513 
00514     /* Now set up for an actual select */
00515 
00516     if (timeout != NULL)
00517     {
00518         /* timeoutval is in milliseconds */
00519         timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
00520     }
00521 
00522     if (readfds != NULL)
00523     {
00524         for (i = 0; i < readfds->fd_count; i++)
00525         {
00526             events[numevents] = WSACreateEvent();
00527             sockets[numevents] = readfds->fd_array[i];
00528             numevents++;
00529         }
00530     }
00531     if (writefds != NULL)
00532     {
00533         for (i = 0; i < writefds->fd_count; i++)
00534         {
00535             if (!readfds ||
00536                 !FD_ISSET(writefds->fd_array[i], readfds))
00537             {
00538                 /* If the socket is not in the read list */
00539                 events[numevents] = WSACreateEvent();
00540                 sockets[numevents] = writefds->fd_array[i];
00541                 numevents++;
00542             }
00543         }
00544     }
00545 
00546     for (i = 0; i < numevents; i++)
00547     {
00548         int         flags = 0;
00549 
00550         if (readfds && FD_ISSET(sockets[i], readfds))
00551             flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
00552 
00553         if (writefds && FD_ISSET(sockets[i], writefds))
00554             flags |= FD_WRITE | FD_CLOSE;
00555 
00556         if (WSAEventSelect(sockets[i], events[i], flags) != 0)
00557         {
00558             TranslateSocketError();
00559             /* release already-assigned event objects */
00560             while (--i >= 0)
00561                 WSAEventSelect(sockets[i], NULL, 0);
00562             for (i = 0; i < numevents; i++)
00563                 WSACloseEvent(events[i]);
00564             return -1;
00565         }
00566     }
00567 
00568     events[numevents] = pgwin32_signal_event;
00569     r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
00570     if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
00571     {
00572         /*
00573          * We scan all events, even those not signalled, in case more than one
00574          * event has been tagged but Wait.. can only return one.
00575          */
00576         WSANETWORKEVENTS resEvents;
00577 
00578         for (i = 0; i < numevents; i++)
00579         {
00580             ZeroMemory(&resEvents, sizeof(resEvents));
00581             if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) != 0)
00582                 elog(ERROR, "failed to enumerate network events: error code %u",
00583                      WSAGetLastError());
00584             /* Read activity? */
00585             if (readfds && FD_ISSET(sockets[i], readfds))
00586             {
00587                 if ((resEvents.lNetworkEvents & FD_READ) ||
00588                     (resEvents.lNetworkEvents & FD_ACCEPT) ||
00589                     (resEvents.lNetworkEvents & FD_CLOSE))
00590                 {
00591                     FD_SET(sockets[i], &outreadfds);
00592 
00593                     nummatches++;
00594                 }
00595             }
00596             /* Write activity? */
00597             if (writefds && FD_ISSET(sockets[i], writefds))
00598             {
00599                 if ((resEvents.lNetworkEvents & FD_WRITE) ||
00600                     (resEvents.lNetworkEvents & FD_CLOSE))
00601                 {
00602                     FD_SET(sockets[i], &outwritefds);
00603 
00604                     nummatches++;
00605                 }
00606             }
00607         }
00608     }
00609 
00610     /* Clean up all the event objects */
00611     for (i = 0; i < numevents; i++)
00612     {
00613         WSAEventSelect(sockets[i], NULL, 0);
00614         WSACloseEvent(events[i]);
00615     }
00616 
00617     if (r == WSA_WAIT_TIMEOUT)
00618     {
00619         if (readfds)
00620             FD_ZERO(readfds);
00621         if (writefds)
00622             FD_ZERO(writefds);
00623         return 0;
00624     }
00625 
00626     if (r == WAIT_OBJECT_0 + numevents)
00627     {
00628         pgwin32_dispatch_queued_signals();
00629         errno = EINTR;
00630         if (readfds)
00631             FD_ZERO(readfds);
00632         if (writefds)
00633             FD_ZERO(writefds);
00634         return -1;
00635     }
00636 
00637     /* Overwrite socket sets with our resulting values */
00638     if (readfds)
00639         memcpy(readfds, &outreadfds, sizeof(fd_set));
00640     if (writefds)
00641         memcpy(writefds, &outwritefds, sizeof(fd_set));
00642     return nummatches;
00643 }
00644 
00645 
00646 /*
00647  * Return win32 error string, since strerror can't
00648  * handle winsock codes
00649  */
00650 static char wserrbuf[256];
00651 const char *
00652 pgwin32_socket_strerror(int err)
00653 {
00654     static HANDLE handleDLL = INVALID_HANDLE_VALUE;
00655 
00656     if (handleDLL == INVALID_HANDLE_VALUE)
00657     {
00658         handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
00659         if (handleDLL == NULL)
00660             ereport(FATAL,
00661                     (errmsg_internal("could not load netmsg.dll: error code %lu", GetLastError())));
00662     }
00663 
00664     ZeroMemory(&wserrbuf, sizeof(wserrbuf));
00665     if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
00666                       handleDLL,
00667                       err,
00668                       MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
00669                       wserrbuf,
00670                       sizeof(wserrbuf) - 1,
00671                       NULL) == 0)
00672     {
00673         /* Failed to get id */
00674         sprintf(wserrbuf, "unrecognized winsock error %d", err);
00675     }
00676     return wserrbuf;
00677 }