7aca0be0f9
The definition of the splay_each() macro is somewhat complicated for
syntactic reasons. Here's what it does in a more readable way:
for (splay_node_t* node = tree->head; node;) {
type* item = node->data;
splay_node_t* next = node->next;
// RUN USER BLOCK with (item)
node = next;
}
list_each() works in the same way. Since node->next is saved before the
user block runs, this construct supports removing the current item from
within the user block. However, what it does *not* support is removing
*other items* from within the user block, especially the next item.
Indeed, that will invalide the next pointer in the above loop and
therefore result in an invalid pointer dereference.
Unfortunately, there is at least one code path where that unsupported
operation happens. It is located in ack_h(), where the authentication
protocol code detects a double connection (i.e. being connected to
another node twice). Running in the context of a socket read event, this
code will happily terminate the *other* metaconnection, resulting in its
socket being removed from the io tree. If, by misfortune, this other
metaconnection happened to have the next socket FD number (which is
quite possible due to FD reuse - albeit unlikely), and was part of the
io tree (which is quite likely because if that connection is stuck, it
will most likely have pending writes) then this will result in the next
pending io item being destroyed. Invalid pointer dereference ensues.
I did a quick audit of other uses of splay_each() and list_each() and
I believe this is the only scenario in which this "next pointer
invalidation" problem can occur in practice. While this bug has been
there since at least 6bc5d626a8
(November
2012), if not sooner, it happens quite rarely due to the very specific
set of conditions required to trigger it. Nevertheless, it does manage
to crash my central production nodes every other week or so.
373 lines
8.6 KiB
C
373 lines
8.6 KiB
C
/*
|
|
event.c -- I/O, timeout and signal event handling
|
|
Copyright (C) 2012-2013 Guus Sliepen <guus@tinc-vpn.org>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
#include "system.h"
|
|
|
|
#include "dropin.h"
|
|
#include "event.h"
|
|
#include "net.h"
|
|
#include "utils.h"
|
|
#include "xalloc.h"
|
|
|
|
struct timeval now;
|
|
|
|
#ifndef HAVE_MINGW
|
|
static fd_set readfds;
|
|
static fd_set writefds;
|
|
#else
|
|
static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
|
|
static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
|
|
static DWORD event_count = 0;
|
|
#endif
|
|
static bool running;
|
|
|
|
static int io_compare(const io_t *a, const io_t *b) {
|
|
#ifndef HAVE_MINGW
|
|
return a->fd - b->fd;
|
|
#else
|
|
return a->event - b->event;
|
|
#endif
|
|
}
|
|
|
|
static int timeout_compare(const timeout_t *a, const timeout_t *b) {
|
|
struct timeval diff;
|
|
timersub(&a->tv, &b->tv, &diff);
|
|
if(diff.tv_sec < 0)
|
|
return -1;
|
|
if(diff.tv_sec > 0)
|
|
return 1;
|
|
if(diff.tv_usec < 0)
|
|
return -1;
|
|
if(diff.tv_usec > 0)
|
|
return 1;
|
|
if(a < b)
|
|
return -1;
|
|
if(a > b)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
|
|
static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
|
|
|
|
void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
|
|
if(io->cb)
|
|
return;
|
|
|
|
io->fd = fd;
|
|
#ifdef HAVE_MINGW
|
|
if (io->fd != -1) {
|
|
io->event = WSACreateEvent();
|
|
if (io->event == WSA_INVALID_EVENT)
|
|
abort();
|
|
}
|
|
event_count++;
|
|
#endif
|
|
io->cb = cb;
|
|
io->data = data;
|
|
io->node.data = io;
|
|
|
|
io_set(io, flags);
|
|
|
|
if(!splay_insert_node(&io_tree, &io->node))
|
|
abort();
|
|
}
|
|
|
|
#ifdef HAVE_MINGW
|
|
void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
|
|
io->event = event;
|
|
io_add(io, cb, data, -1, 0);
|
|
}
|
|
#endif
|
|
|
|
void io_set(io_t *io, int flags) {
|
|
if (flags == io->flags)
|
|
return;
|
|
io->flags = flags;
|
|
if (io->fd == -1)
|
|
return;
|
|
|
|
#ifndef HAVE_MINGW
|
|
if(flags & IO_READ)
|
|
FD_SET(io->fd, &readfds);
|
|
else
|
|
FD_CLR(io->fd, &readfds);
|
|
|
|
if(flags & IO_WRITE)
|
|
FD_SET(io->fd, &writefds);
|
|
else
|
|
FD_CLR(io->fd, &writefds);
|
|
#else
|
|
long events = 0;
|
|
if (flags & IO_WRITE)
|
|
events |= WRITE_EVENTS;
|
|
if (flags & IO_READ)
|
|
events |= READ_EVENTS;
|
|
if (WSAEventSelect(io->fd, io->event, events) != 0)
|
|
abort();
|
|
#endif
|
|
}
|
|
|
|
void io_del(io_t *io) {
|
|
if(!io->cb)
|
|
return;
|
|
|
|
io_set(io, 0);
|
|
#ifdef HAVE_MINGW
|
|
if (io->fd != -1 && WSACloseEvent(io->event) == FALSE)
|
|
abort();
|
|
event_count--;
|
|
#endif
|
|
|
|
splay_unlink_node(&io_tree, &io->node);
|
|
io->cb = NULL;
|
|
}
|
|
|
|
void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
|
|
timeout->cb = cb;
|
|
timeout->data = data;
|
|
timeout->node.data = timeout;
|
|
|
|
timeout_set(timeout, tv);
|
|
}
|
|
|
|
void timeout_set(timeout_t *timeout, struct timeval *tv) {
|
|
if(timerisset(&timeout->tv))
|
|
splay_unlink_node(&timeout_tree, &timeout->node);
|
|
|
|
if(!now.tv_sec)
|
|
gettimeofday(&now, NULL);
|
|
|
|
timeradd(&now, tv, &timeout->tv);
|
|
|
|
if(!splay_insert_node(&timeout_tree, &timeout->node))
|
|
abort();
|
|
}
|
|
|
|
void timeout_del(timeout_t *timeout) {
|
|
if(!timeout->cb)
|
|
return;
|
|
|
|
splay_unlink_node(&timeout_tree, &timeout->node);
|
|
timeout->cb = 0;
|
|
timeout->tv = (struct timeval){0, 0};
|
|
}
|
|
|
|
#ifndef HAVE_MINGW
|
|
static int signal_compare(const signal_t *a, const signal_t *b) {
|
|
return a->signum - b->signum;
|
|
}
|
|
|
|
static io_t signalio;
|
|
static int pipefd[2] = {-1, -1};
|
|
static splay_tree_t signal_tree = {.compare = (splay_compare_t)signal_compare};
|
|
|
|
static void signal_handler(int signum) {
|
|
unsigned char num = signum;
|
|
write(pipefd[1], &num, 1);
|
|
}
|
|
|
|
static void signalio_handler(void *data, int flags) {
|
|
unsigned char signum;
|
|
if(read(pipefd[0], &signum, 1) != 1)
|
|
return;
|
|
|
|
signal_t *sig = splay_search(&signal_tree, &((signal_t){.signum = signum}));
|
|
if(sig)
|
|
sig->cb(sig->data);
|
|
}
|
|
|
|
static void pipe_init(void) {
|
|
if(!pipe(pipefd))
|
|
io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
|
|
}
|
|
|
|
void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
|
|
if(sig->cb)
|
|
return;
|
|
|
|
sig->cb = cb;
|
|
sig->data = data;
|
|
sig->signum = signum;
|
|
sig->node.data = sig;
|
|
|
|
if(pipefd[0] == -1)
|
|
pipe_init();
|
|
|
|
signal(sig->signum, signal_handler);
|
|
|
|
if(!splay_insert_node(&signal_tree, &sig->node))
|
|
abort();
|
|
}
|
|
|
|
void signal_del(signal_t *sig) {
|
|
if(!sig->cb)
|
|
return;
|
|
|
|
signal(sig->signum, SIG_DFL);
|
|
|
|
splay_unlink_node(&signal_tree, &sig->node);
|
|
sig->cb = NULL;
|
|
}
|
|
#endif
|
|
|
|
static struct timeval * get_time_remaining(struct timeval *diff) {
|
|
gettimeofday(&now, NULL);
|
|
struct timeval *tv = NULL;
|
|
|
|
while(timeout_tree.head) {
|
|
timeout_t *timeout = timeout_tree.head->data;
|
|
timersub(&timeout->tv, &now, diff);
|
|
|
|
if(diff->tv_sec < 0) {
|
|
timeout->cb(timeout->data);
|
|
if(timercmp(&timeout->tv, &now, <))
|
|
timeout_del(timeout);
|
|
} else {
|
|
tv = diff;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return tv;
|
|
}
|
|
|
|
bool event_loop(void) {
|
|
running = true;
|
|
|
|
#ifndef HAVE_MINGW
|
|
fd_set readable;
|
|
fd_set writable;
|
|
|
|
while(running) {
|
|
struct timeval diff;
|
|
struct timeval *tv = get_time_remaining(&diff);
|
|
memcpy(&readable, &readfds, sizeof readable);
|
|
memcpy(&writable, &writefds, sizeof writable);
|
|
|
|
int fds = 0;
|
|
|
|
if(io_tree.tail) {
|
|
io_t *last = io_tree.tail->data;
|
|
fds = last->fd + 1;
|
|
}
|
|
|
|
int n = select(fds, &readable, &writable, NULL, tv);
|
|
|
|
if(n < 0) {
|
|
if(sockwouldblock(sockerrno))
|
|
continue;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
if(!n)
|
|
continue;
|
|
|
|
for splay_each(io_t, io, &io_tree) {
|
|
if(FD_ISSET(io->fd, &writable))
|
|
io->cb(io->data, IO_WRITE);
|
|
else if(FD_ISSET(io->fd, &readable))
|
|
io->cb(io->data, IO_READ);
|
|
else
|
|
continue;
|
|
|
|
/*
|
|
There are scenarios in which the callback will remove another io_t from the tree
|
|
(e.g. closing a double connection). Since splay_each does not support that, we
|
|
need to exit the loop now. That's okay, since any remaining events will get picked
|
|
up by the next select() call.
|
|
*/
|
|
break;
|
|
}
|
|
}
|
|
#else
|
|
while (running) {
|
|
struct timeval diff;
|
|
struct timeval *tv = get_time_remaining(&diff);
|
|
DWORD timeout_ms = tv ? (tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
|
|
|
|
if (!event_count) {
|
|
Sleep(timeout_ms);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
|
|
which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
|
|
it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
|
|
is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
|
|
to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
|
|
|
|
Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
|
|
this event being fired again if ignored.
|
|
*/
|
|
io_t* writeable_io = NULL;
|
|
for splay_each(io_t, io, &io_tree)
|
|
if (io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
|
|
writeable_io = io;
|
|
break;
|
|
}
|
|
if (writeable_io) {
|
|
writeable_io->cb(writeable_io->data, IO_WRITE);
|
|
continue;
|
|
}
|
|
|
|
WSAEVENT* events = xmalloc(event_count * sizeof(*events));
|
|
DWORD event_index = 0;
|
|
for splay_each(io_t, io, &io_tree) {
|
|
events[event_index] = io->event;
|
|
event_index++;
|
|
}
|
|
|
|
DWORD result = WSAWaitForMultipleEvents(event_count, events, FALSE, timeout_ms, FALSE);
|
|
|
|
WSAEVENT event;
|
|
if (result >= WSA_WAIT_EVENT_0 && result < WSA_WAIT_EVENT_0 + event_count)
|
|
event = events[result - WSA_WAIT_EVENT_0];
|
|
free(events);
|
|
if (result == WSA_WAIT_TIMEOUT)
|
|
continue;
|
|
if (result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count)
|
|
return false;
|
|
|
|
io_t *io = splay_search(&io_tree, &((io_t){.event = event}));
|
|
if (!io)
|
|
abort();
|
|
|
|
if (io->fd == -1) {
|
|
io->cb(io->data, 0);
|
|
} else {
|
|
WSANETWORKEVENTS network_events;
|
|
if (WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0)
|
|
return false;
|
|
if (network_events.lNetworkEvents & WRITE_EVENTS)
|
|
io->cb(io->data, IO_WRITE);
|
|
if (network_events.lNetworkEvents & READ_EVENTS)
|
|
io->cb(io->data, IO_READ);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void event_exit(void) {
|
|
running = false;
|
|
}
|