From 611217c96ec684799882cf330f40a0936131b6b5 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Fri, 27 Jun 2014 21:58:35 +0100 Subject: [PATCH 1/4] Use native Windows events for the event loop. This commit changes the event loop to use WSAEventSelect() and WSAWaitForMultipleEvents() on Windows. This paves the way for making the event loop more flexible on Windows by introducing the required infrastructure to make the event loop wait on any Win32 event. This commit only affects the internal implementation of the event module. Externally visible behavior remains strictly unchanged (for now). --- src/event.c | 150 +++++++++++++++++++++++++++++++++++++++++++--------- src/event.h | 3 ++ 2 files changed, 129 insertions(+), 24 deletions(-) diff --git a/src/event.c b/src/event.c index 27b884c4..0e4e2bdf 100644 --- a/src/event.c +++ b/src/event.c @@ -23,15 +23,26 @@ #include "event.h" #include "net.h" #include "utils.h" +#include "xalloc.h" struct timeval now; +#ifndef HAVE_MINGW static fd_set readfds; static fd_set writefds; +#else +static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE; +static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT; +static DWORD event_count = 0; +#endif static volatile bool running; static int io_compare(const io_t *a, const io_t *b) { +#ifndef HAVE_MINGW return a->fd - b->fd; +#else + return a->event - b->event; +#endif } static int timeout_compare(const timeout_t *a, const timeout_t *b) { @@ -60,6 +71,12 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) { return; io->fd = fd; +#ifdef HAVE_MINGW + io->event = WSACreateEvent(); + if (io->event == WSA_INVALID_EVENT) + abort(); + event_count++; +#endif io->cb = cb; io->data = data; io->node.data = io; @@ -71,8 +88,11 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) { } void io_set(io_t *io, int flags) { + if (flags == io->flags) + return; io->flags = flags; +#ifndef HAVE_MINGW if(flags & IO_READ) FD_SET(io->fd, &readfds); else @@ -82,6 +102,15 @@ void io_set(io_t *io, int flags) { FD_SET(io->fd, &writefds); else FD_CLR(io->fd, &writefds); +#else + long events = 0; + if (flags & IO_WRITE) + events |= WRITE_EVENTS; + if (flags & IO_READ) + events |= READ_EVENTS; + if (WSAEventSelect(io->fd, io->event, events) != 0) + abort(); +#endif } void io_del(io_t *io) { @@ -89,6 +118,11 @@ void io_del(io_t *io) { return; io_set(io, 0); +#ifdef HAVE_MINGW + if (WSACloseEvent(io->event) == FALSE) + abort(); + event_count--; +#endif splay_unlink_node(&io_tree, &io->node); io->cb = NULL; @@ -182,32 +216,39 @@ void signal_del(signal_t *sig) { } #endif +static struct timeval * get_time_remaining(struct timeval *diff) { + gettimeofday(&now, NULL); + struct timeval *tv = NULL; + + while(timeout_tree.head) { + timeout_t *timeout = timeout_tree.head->data; + timersub(&timeout->tv, &now, diff); + + if(diff->tv_sec < 0) { + timeout->cb(timeout->data); + if(timercmp(&timeout->tv, &now, <)) + timeout_del(timeout); + } else { + tv = diff; + break; + } + } + + return tv; +} + bool event_loop(void) { running = true; +#ifndef HAVE_MINGW fd_set readable; fd_set writable; while(running) { - gettimeofday(&now, NULL); - struct timeval diff, *tv = NULL; - - while(timeout_tree.head) { - timeout_t *timeout = timeout_tree.head->data; - timersub(&timeout->tv, &now, &diff); - - if(diff.tv_sec < 0) { - timeout->cb(timeout->data); - if(timercmp(&timeout->tv, &now, <)) - timeout_del(timeout); - } else { - tv = &diff; - break; - } - } - memcpy(&readable, &readfds, sizeof readable); memcpy(&writable, &writefds, sizeof writable); + struct timeval diff; + struct timeval *tv = get_time_remaining(&diff); int fds = 0; @@ -216,13 +257,7 @@ bool event_loop(void) { fds = last->fd + 1; } -#ifdef HAVE_MINGW - LeaveCriticalSection(&mutex); -#endif int n = select(fds, &readable, &writable, NULL, tv); -#ifdef HAVE_MINGW - EnterCriticalSection(&mutex); -#endif if(n < 0) { if(sockwouldblock(sockerrno)) @@ -241,13 +276,80 @@ bool event_loop(void) { io->cb(io->data, IO_READ); } } +#else + while (running) { + struct timeval diff; + struct timeval *tv = get_time_remaining(&diff); + DWORD timeout_ms = tv ? (tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE; + + if (!event_count) { + LeaveCriticalSection(&mutex); + Sleep(timeout_ms); + EnterCriticalSection(&mutex); + continue; + } + + /* + For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered, + which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers, + it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop + is that write events are level-triggered (i.e. they continue firing until the socket is full), we need + to emulate these semantics by making sure we fire each IO_WRITE that is still writeable. + + Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on + this event being fired again if ignored. + */ + io_t* writeable_io = NULL; + for splay_each(io_t, io, &io_tree) + if (io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) { + writeable_io = io; + break; + } + if (writeable_io) { + writeable_io->cb(writeable_io->data, IO_WRITE); + continue; + } + + WSAEVENT* events = xmalloc(event_count * sizeof(*events)); + DWORD event_index = 0; + for splay_each(io_t, io, &io_tree) { + events[event_index] = io->event; + event_index++; + } + + LeaveCriticalSection(&mutex); + DWORD result = WSAWaitForMultipleEvents(event_count, events, FALSE, timeout_ms, FALSE); + EnterCriticalSection(&mutex); + + WSAEVENT event; + if (result >= WSA_WAIT_EVENT_0 && result < WSA_WAIT_EVENT_0 + event_count) + event = events[result - WSA_WAIT_EVENT_0]; + free(events); + if (result == WSA_WAIT_TIMEOUT) + continue; + if (result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count) + return false; + + io_t *io = splay_search(&io_tree, &((io_t){.event = event})); + if (!io) + abort(); + + WSANETWORKEVENTS network_events; + if (WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) + return false; + if (network_events.lNetworkEvents & WRITE_EVENTS) + io->cb(io->data, IO_WRITE); + if (network_events.lNetworkEvents & READ_EVENTS) + io->cb(io->data, IO_READ); + } +#endif return true; } void event_flush_output(void) { for splay_each(io_t, io, &io_tree) - if(FD_ISSET(io->fd, &writefds)) + if(io->flags & IO_WRITE) io->cb(io->data, IO_WRITE); } diff --git a/src/event.h b/src/event.h index c6522c0e..bebb520d 100644 --- a/src/event.h +++ b/src/event.h @@ -32,6 +32,9 @@ typedef void (*signal_cb_t)(void *data); typedef struct io_t { int fd; int flags; +#ifdef HAVE_MINGW + WSAEVENT event; +#endif io_cb_t cb; void *data; splay_node_t node; From 2f9a1d4ab5ff51b05a5e8cc41a1528fdeb36c723 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Sat, 28 Jun 2014 15:15:41 +0100 Subject: [PATCH 2/4] Make the event loop expose a Windows event interface. This allows event loop users to specify Win32 events to wait on, thus making the event loop more flexible. --- src/event.c | 37 ++++++++++++++++++++++++++----------- src/event.h | 3 +++ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/event.c b/src/event.c index 0e4e2bdf..5e6908f5 100644 --- a/src/event.c +++ b/src/event.c @@ -72,9 +72,11 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) { io->fd = fd; #ifdef HAVE_MINGW - io->event = WSACreateEvent(); - if (io->event == WSA_INVALID_EVENT) - abort(); + if (io->fd != -1) { + io->event = WSACreateEvent(); + if (io->event == WSA_INVALID_EVENT) + abort(); + } event_count++; #endif io->cb = cb; @@ -87,10 +89,19 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) { abort(); } +#ifdef HAVE_MINGW +void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) { + io_add(io, cb, data, -1, 0); + io->event = event; +} +#endif + void io_set(io_t *io, int flags) { if (flags == io->flags) return; io->flags = flags; + if (io->fd == -1) + return; #ifndef HAVE_MINGW if(flags & IO_READ) @@ -119,7 +130,7 @@ void io_del(io_t *io) { io_set(io, 0); #ifdef HAVE_MINGW - if (WSACloseEvent(io->event) == FALSE) + if (io->fd != -1 && WSACloseEvent(io->event) == FALSE) abort(); event_count--; #endif @@ -334,13 +345,17 @@ bool event_loop(void) { if (!io) abort(); - WSANETWORKEVENTS network_events; - if (WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) - return false; - if (network_events.lNetworkEvents & WRITE_EVENTS) - io->cb(io->data, IO_WRITE); - if (network_events.lNetworkEvents & READ_EVENTS) - io->cb(io->data, IO_READ); + if (io->fd == -1) { + io->cb(io->data, 0); + } else { + WSANETWORKEVENTS network_events; + if (WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) + return false; + if (network_events.lNetworkEvents & WRITE_EVENTS) + io->cb(io->data, IO_WRITE); + if (network_events.lNetworkEvents & READ_EVENTS) + io->cb(io->data, IO_READ); + } } #endif diff --git a/src/event.h b/src/event.h index bebb520d..e4f5cee8 100644 --- a/src/event.h +++ b/src/event.h @@ -57,6 +57,9 @@ typedef struct signal_t { extern struct timeval now; extern void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags); +#ifdef HAVE_MINGW +extern void io_add_event(io_t *io, io_cb_t cb, void* data, WSAEVENT event); +#endif extern void io_del(io_t *io); extern void io_set(io_t *io, int flags); From ffbc99558cae4dff876645fe205349d8c4cd7acb Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Sat, 28 Jun 2014 15:19:11 +0100 Subject: [PATCH 3/4] Use a Windows event to stop tinc when running as a service. Currently, when the tinc service handler callback (which runs in a separate thread) receives a service shutdown request, it calls event_exit() to request the event loop to exit. This approach has a few issues: - The event loop will only notice the exit request when the next event fires. This slows down tinc service shutdown. In some extreme cases (DeviceStandby enabled, long PingTimeout and no connections), shutdown can take ages. - Strictly speaking, because of the absence of memory barriers, there is no guarantee that the event loop will even notice an exit request coming from another thread. I suppose marking the "running" variable as "volatile" is supposed to alleviate that, but it's unclear whether that provides any guarantees with modern systems and compilers. This commit fixes the issue by leveraging the new event loop Windows interface, using a custom Windows event that is manually set when shutdown is requested. --- src/event.c | 2 +- src/process.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/event.c b/src/event.c index 5e6908f5..8f15ebea 100644 --- a/src/event.c +++ b/src/event.c @@ -35,7 +35,7 @@ static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE; static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT; static DWORD event_count = 0; #endif -static volatile bool running; +static bool running; static int io_compare(const io_t *a, const io_t *b) { #ifndef HAVE_MINGW diff --git a/src/process.c b/src/process.c index c1038bcd..98f4d33a 100644 --- a/src/process.c +++ b/src/process.c @@ -108,6 +108,8 @@ static bool install_service(void) { return true; } +static io_t stop_io; + DWORD WINAPI controlhandler(DWORD request, DWORD type, LPVOID boe, LPVOID bah) { switch(request) { case SERVICE_CONTROL_INTERROGATE: @@ -124,16 +126,25 @@ DWORD WINAPI controlhandler(DWORD request, DWORD type, LPVOID boe, LPVOID bah) { return ERROR_CALL_NOT_IMPLEMENTED; } - event_exit(); - status.dwWaitHint = 30000; + status.dwWaitHint = 1000; status.dwCurrentState = SERVICE_STOP_PENDING; SetServiceStatus(statushandle, &status); + if (WSASetEvent(stop_io.event) == FALSE) + abort(); return NO_ERROR; } +static void stop_handler(void *data, int flags) { + event_exit(); +} + VOID WINAPI run_service(DWORD argc, LPTSTR* argv) { extern int main2(int argc, char **argv); + io_add_event(&stop_io, stop_handler, NULL, WSACreateEvent()); + if (stop_io.event == FALSE) + abort(); + status.dwServiceType = SERVICE_WIN32; status.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN; status.dwWin32ExitCode = 0; @@ -160,6 +171,9 @@ VOID WINAPI run_service(DWORD argc, LPTSTR* argv) { SetServiceStatus(statushandle, &status); } + if (WSACloseEvent(stop_io.event) == FALSE) + abort(); + io_del(&stop_io); return; } From 313a752cb5fbf27450d34c15b0085d2d8a4147af Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Sat, 28 Jun 2014 18:39:00 +0100 Subject: [PATCH 4/4] Remove the TAP-Win32 reader thread. tinc is using a separate thread to read from the TAP device on Windows. The rationale was that the notification mechanism for packets arriving on the virtual network device is based on Win32 events, and the event loop did not support listening to these events. Thanks to recent improvements, this event loop limitation has been lifted. Therefore we can get rid of the separate thread and simply add the Win32 "incoming packet" event to the event loop, just like a socket. The result is cleaner code that's easier to reason about. --- src/event.c | 10 ------- src/event.h | 1 - src/mingw/device.c | 73 ++++++++++++++++++++-------------------------- src/net.h | 2 -- src/tincd.c | 3 -- 5 files changed, 32 insertions(+), 57 deletions(-) diff --git a/src/event.c b/src/event.c index 8f15ebea..f3497420 100644 --- a/src/event.c +++ b/src/event.c @@ -294,9 +294,7 @@ bool event_loop(void) { DWORD timeout_ms = tv ? (tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE; if (!event_count) { - LeaveCriticalSection(&mutex); Sleep(timeout_ms); - EnterCriticalSection(&mutex); continue; } @@ -328,9 +326,7 @@ bool event_loop(void) { event_index++; } - LeaveCriticalSection(&mutex); DWORD result = WSAWaitForMultipleEvents(event_count, events, FALSE, timeout_ms, FALSE); - EnterCriticalSection(&mutex); WSAEVENT event; if (result >= WSA_WAIT_EVENT_0 && result < WSA_WAIT_EVENT_0 + event_count) @@ -362,12 +358,6 @@ bool event_loop(void) { return true; } -void event_flush_output(void) { - for splay_each(io_t, io, &io_tree) - if(io->flags & IO_WRITE) - io->cb(io->data, IO_WRITE); -} - void event_exit(void) { running = false; } diff --git a/src/event.h b/src/event.h index e4f5cee8..0ff8e01c 100644 --- a/src/event.h +++ b/src/event.h @@ -71,7 +71,6 @@ extern void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum); extern void signal_del(signal_t *sig); extern bool event_loop(void); -extern void event_flush_output(void); extern void event_exit(void); #endif diff --git a/src/mingw/device.c b/src/mingw/device.c index a765ce3b..33b13da6 100644 --- a/src/mingw/device.c +++ b/src/mingw/device.c @@ -36,6 +36,9 @@ int device_fd = -1; static HANDLE device_handle = INVALID_HANDLE_VALUE; +static io_t device_read_io; +static OVERLAPPED device_read_overlapped; +static vpn_packet_t device_read_packet; char *device = NULL; char *iface = NULL; static char *device_info = NULL; @@ -45,46 +48,34 @@ static uint64_t device_total_out = 0; extern char *myport; -static DWORD WINAPI tapreader(void *bla) { - int status; - DWORD len; - OVERLAPPED overlapped; - vpn_packet_t packet; +static void device_issue_read() { + device_read_overlapped.Offset = 0; + device_read_overlapped.OffsetHigh = 0; - logger(DEBUG_ALWAYS, LOG_DEBUG, "Tap reader running"); + int status = ReadFile(device_handle, (void *)device_read_packet.data, MTU, NULL, &device_read_overlapped); - /* Read from tap device and send to parent */ - - overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - - for(;;) { - overlapped.Offset = 0; - overlapped.OffsetHigh = 0; - ResetEvent(overlapped.hEvent); - - status = ReadFile(device_handle, (void *)packet.data, MTU, &len, &overlapped); - - if(!status) { - if(GetLastError() == ERROR_IO_PENDING) { - WaitForSingleObject(overlapped.hEvent, INFINITE); - if(!GetOverlappedResult(device_handle, &overlapped, &len, FALSE)) - continue; - } else { - logger(DEBUG_ALWAYS, LOG_ERR, "Error while reading from %s %s: %s", device_info, - device, strerror(errno)); - return -1; - } - } - - EnterCriticalSection(&mutex); - packet.len = len; - packet.priority = 0; - route(myself, &packet); - event_flush_output(); - LeaveCriticalSection(&mutex); + if(!status && GetLastError() != ERROR_IO_PENDING) { + logger(DEBUG_ALWAYS, LOG_ERR, "Error while reading from %s %s: %s", device_info, + device, strerror(errno)); } } +static void device_handle_read(void *data) { + ResetEvent(device_read_overlapped.hEvent); + + DWORD len; + if (!GetOverlappedResult(device_handle, &device_read_overlapped, &len, FALSE)) { + logger(DEBUG_ALWAYS, LOG_ERR, "Error getting read result from %s %s: %s", device_info, + device, strerror(errno)); + return; + } + + device_read_packet.len = len; + device_read_packet.priority = 0; + route(myself, &device_read_packet); + device_issue_read(); +} + static bool setup_device(void) { HKEY key, key2; int i; @@ -192,12 +183,9 @@ static bool setup_device(void) { /* Start the tap reader */ - thread = CreateThread(NULL, 0, tapreader, NULL, 0, NULL); - - if(!thread) { - logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "CreateThread", winerror(GetLastError())); - return false; - } + io_add_event(&device_read_io, device_handle_read, NULL, CreateEvent(NULL, TRUE, FALSE, NULL)); + device_read_overlapped.hEvent = device_read_io.event; + device_issue_read(); device_info = "Windows tap device"; @@ -221,6 +209,9 @@ static void disable_device(void) { } static void close_device(void) { + io_del(&device_read_io); + CancelIo(device_handle); + CloseHandle(device_read_overlapped.hEvent); CloseHandle(device_handle); device_handle = INVALID_HANDLE_VALUE; free(device); device = NULL; diff --git a/src/net.h b/src/net.h index f0ece896..3b2cdf05 100644 --- a/src/net.h +++ b/src/net.h @@ -203,8 +203,6 @@ extern void load_all_nodes(void); #ifndef HAVE_MINGW #define closesocket(s) close(s) -#else -extern CRITICAL_SECTION mutex; #endif #endif /* __TINC_NET_H__ */ diff --git a/src/tincd.c b/src/tincd.c index 56ed2f0c..87c0b7d1 100644 --- a/src/tincd.c +++ b/src/tincd.c @@ -106,7 +106,6 @@ static struct option const long_options[] = { #ifdef HAVE_MINGW static struct WSAData wsa_state; -CRITICAL_SECTION mutex; int main2(int argc, char **argv); #endif @@ -378,8 +377,6 @@ int main(int argc, char **argv) { } int main2(int argc, char **argv) { - InitializeCriticalSection(&mutex); - EnterCriticalSection(&mutex); #endif char *priority = NULL;