From eeebff55c07c09c5bc5e62a7b2a21f68ecd1c802 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Wed, 4 Nov 2015 19:07:14 +0000 Subject: [PATCH 1/3] Use a splay tree for node UDP addresses in order to avoid collisions. This commit replaces the node UDP address hash table "cache" with a full-blown splay tree, aligning it with node_tree (name-indexed) and node_id_tree (ID-indexed). I'm doing this for two reasons. The first reason is to make sure we don't suddenly degrade to O(n) performance when two "hot" nodes end up in the same hash table bucket (collision). The second, and most important, reason, has to do with the fact that the hash table that was being used overrides elements that collide. Indeed, it turns out that there is one scenario in which the contents of node_udp_cache has *correctness* implications, not just performance implications. This has to do with the way handle_incoming_vpn_data() is implemented. Assume the following topology: A <-> B <-> C Now let's consider the perspective of tincd running on B, and let's assume the following is true: - All nodes are using the 1.1 protocol with node IDs and relaying support. - Nodes A and C have UDP addresses that hash to the same value. - Node C "wins" in the node_udp_cache (i.e. it overwrites A in the cache). - Node A has a "dynamic" UDP address (i.e. an UDP address that has been detected dynamically and cannot be deduced from edge addresses). Then, before this commit, A would be unable to relay packets through B. This is because handle_incoming_vpn_data() will fall back to try_harder(), which won't be able to match any edge addresses, doesn't check the dynamic UDP addresses, and won't be able to match any keys because this is a relayed packet which is encrypted with C's key, not B's. As a result, tinc will fail to match the source of the packet and will drop the packet with a "Received UDP packet from unknown source" message. I have seen this happen in the wild; it is actually quite likely to occur when there are more than a handful of nodes because node_udp_cache only has 256 buckets, making collisions quite likely. This problem is quite severe because it can completely prevent all packet communication between nodes - indeed, if node A tries to initiate some communication with C, it will use relaying at first, until C responds and helps A establish direct communication with it (e.g. hole punching). If relaying is broken, C will not help establish direct communication, and as a result no packets can make it through at all. The bug can be reproduced fairly easily by reproducing the topology above while changing the (hardcoded) node_udp_cache size to 1 to force a collision. One will quickly observe various issues when trying to make A talk to C. Setting IndirectData on B will make the issue even more severe and prevent all communication. Arguably, another way to fix this problem is to make try_harder() compare the packet's source address to each node's dynamic UDP addresses. However, I do not like this solution because if two "hot" nodes are contending on the same hash bucket, try_harder() will be called very often and packet routing performance will degrade closer to O(N) (where N is the total number of nodes in the graph). Using a more appropriate data structure fixes the bug without introducing this performance problem. --- src/node.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/node.c b/src/node.c index fb4b7eb4..a571ae07 100644 --- a/src/node.c +++ b/src/node.c @@ -34,7 +34,7 @@ splay_tree_t *node_tree; static splay_tree_t *node_id_tree; -static hash_t *node_udp_cache; +static splay_tree_t *node_udp_tree; static hash_t *node_id_cache; node_t *myself; @@ -47,16 +47,23 @@ static int node_id_compare(const node_t *a, const node_t *b) { return memcmp(&a->id, &b->id, sizeof(node_id_t)); } +static int node_udp_compare(const node_t *a, const node_t *b) { + int result = sockaddrcmp(&a->address, &b->address); + if (result) + return result; + return (a->name && b->name) ? strcmp(a->name, b->name) : 0; +} + void init_nodes(void) { node_tree = splay_alloc_tree((splay_compare_t) node_compare, (splay_action_t) free_node); node_id_tree = splay_alloc_tree((splay_compare_t) node_id_compare, NULL); - node_udp_cache = hash_alloc(0x100, sizeof(sockaddr_t)); + node_udp_tree = splay_alloc_tree((splay_compare_t) node_udp_compare, NULL); node_id_cache = hash_alloc(0x100, sizeof(node_id_t)); } void exit_nodes(void) { hash_free(node_id_cache); - hash_free(node_udp_cache); + splay_delete_tree(node_udp_tree); splay_delete_tree(node_id_tree); splay_delete_tree(node_tree); } @@ -116,7 +123,7 @@ void node_add(node_t *n) { } void node_del(node_t *n) { - hash_delete(node_udp_cache, &n->address); + splay_delete(node_udp_tree, n); hash_delete(node_id_cache, &n->id); for splay_each(subnet_t, s, n->subnet_tree) @@ -150,7 +157,8 @@ node_t *lookup_node_id(const node_id_t *id) { } node_t *lookup_node_udp(const sockaddr_t *sa) { - return hash_search(node_udp_cache, sa); + node_t tmp = {.address = *sa}; + return splay_search(node_udp_tree, &tmp); } void update_node_udp(node_t *n, const sockaddr_t *sa) { @@ -159,7 +167,7 @@ void update_node_udp(node_t *n, const sockaddr_t *sa) { return; } - hash_delete(node_udp_cache, &n->address); + splay_delete(node_udp_tree, n); if(sa) { n->address = *sa; @@ -170,7 +178,7 @@ void update_node_udp(node_t *n, const sockaddr_t *sa) { break; } } - hash_insert(node_udp_cache, sa, n); + splay_insert(node_udp_tree, n); free(n->hostname); n->hostname = sockaddr2hostname(&n->address); logger(DEBUG_PROTOCOL, LOG_DEBUG, "UDP address of %s set to %s", n->name, n->hostname); From 684bd659ae0c6ca623422851c245188037658698 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Wed, 4 Nov 2015 19:18:12 +0000 Subject: [PATCH 2/3] Revert "Cache node IDs in a hash table for faster lookups." This reverts commit c2319e90b16962fe899bc60abc8af0e2542bb176. As a general principle, I do not believe it is worthwhile to cache nodes. Sure, it brings lookup time down from O(log n) to O(1), but considering that the scalability target of tinc is around 1000 nodes and log2(1000) is 10, that looks like premature optimization; tree lookups should already be very fast. Therefore, I believe it makes sense to remove the cache as a code cleanup initiative. --- src/node.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/node.c b/src/node.c index a571ae07..bd94ed0b 100644 --- a/src/node.c +++ b/src/node.c @@ -35,7 +35,6 @@ splay_tree_t *node_tree; static splay_tree_t *node_id_tree; static splay_tree_t *node_udp_tree; -static hash_t *node_id_cache; node_t *myself; @@ -58,11 +57,9 @@ void init_nodes(void) { node_tree = splay_alloc_tree((splay_compare_t) node_compare, (splay_action_t) free_node); node_id_tree = splay_alloc_tree((splay_compare_t) node_id_compare, NULL); node_udp_tree = splay_alloc_tree((splay_compare_t) node_udp_compare, NULL); - node_id_cache = hash_alloc(0x100, sizeof(node_id_t)); } void exit_nodes(void) { - hash_free(node_id_cache); splay_delete_tree(node_udp_tree); splay_delete_tree(node_id_tree); splay_delete_tree(node_tree); @@ -124,7 +121,6 @@ void node_add(node_t *n) { void node_del(node_t *n) { splay_delete(node_udp_tree, n); - hash_delete(node_id_cache, &n->id); for splay_each(subnet_t, s, n->subnet_tree) subnet_del(n, s); @@ -145,15 +141,8 @@ node_t *lookup_node(char *name) { } node_t *lookup_node_id(const node_id_t *id) { - node_t *n = hash_search(node_id_cache, id); - if(!n) { - node_t tmp = {.id = *id}; - n = splay_search(node_id_tree, &tmp); - if(n) - hash_insert(node_id_cache, id, n); - } - - return n; + node_t n = {.id = *id}; + return splay_search(node_id_tree, &n); } node_t *lookup_node_udp(const sockaddr_t *sa) { From bdd84660c756437cf3bc8f64adf612055acc84ea Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Sat, 7 Nov 2015 11:04:13 +0000 Subject: [PATCH 3/3] Make sure the packet source MAC address is always set. When tinc is used in router mode with a TAP device, Ethernet (MAC) headers are not present in packets flowing over the VPN; it is the node's responsibility to fill out this header before handing the packet over to the TAP interface (which expects such headers). Currently, tinc fills out the destination MAC address of the packet (otherwise the host would not recognize the packets, and nothing would work), but it does not fill out the source MAC address. In practice this doesn't seem to cause any real issues (the host doesn't care about the source address), but it does look weird when looking at the packets with a sniffer, and it also result in the following valgrind warning: ==13651== Syscall param write(buf) points to uninitialised byte(s) ==13651== at 0x5C4B620: __write_nocancel (syscall-template.S:81) ==13651== by 0x1445AA: write_packet (device.c:183) ==13651== by 0x118C7C: send_packet (net_packet.c:1259) ==13651== by 0x12B70A: route_ipv4 (route.c:443) ==13651== by 0x12D5F8: route (route.c:971) ==13651== by 0x1152BC: receive_packet (net_packet.c:250) ==13651== by 0x117E1B: receive_sptps_record (net_packet.c:904) ==13651== by 0x1309A8: sptps_receive_data_datagram (sptps.c:488) ==13651== by 0x130A90: sptps_receive_data (sptps.c:508) ==13651== by 0x115569: receive_udppacket (net_packet.c:286) ==13651== by 0x119856: handle_incoming_vpn_data (net_packet.c:1499) ==13651== by 0x10F3DA: event_loop (event.c:287) ==13651== Address 0xffeffea3a is on thread 1's stack ==13651== in frame #6, created by receive_sptps_record (net_packet.c:821) ==13651== This commit fixes the issue by filling out the source MAC address. It is generated by negating the last byte of the device MAC address, which is consistent with what route_arp() does. In addition, this commit stops route_arp() from filling out the Ethernet header of the packet - this is the responsibility of send_packet(), not route(). --- src/net_packet.c | 6 +++++- src/route.c | 6 ++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/net_packet.c b/src/net_packet.c index fc5720a2..6f72e05b 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -1252,8 +1252,12 @@ void send_packet(node_t *n, vpn_packet_t *packet) { // If it's for myself, write it to the tun/tap device. if(n == myself) { - if(overwrite_mac) + if(overwrite_mac) { memcpy(DATA(packet), mymac.x, ETH_ALEN); + // Use an arbitrary fake source address. + memcpy(DATA(packet) + ETH_ALEN, DATA(packet), ETH_ALEN); + DATA(packet)[ETH_ALEN * 2 - 1] ^= 0xFF; + } n->out_packets++; n->out_bytes += packet->len; devops.write(packet); diff --git a/src/route.c b/src/route.c index 70c5806b..68a66492 100644 --- a/src/route.c +++ b/src/route.c @@ -784,15 +784,13 @@ static void route_arp(node_t *source, vpn_packet_t *packet) { if(subnet->owner == myself) return; /* silently ignore */ - memcpy(DATA(packet), DATA(packet) + ETH_ALEN, ETH_ALEN); /* copy destination address */ - DATA(packet)[ETH_ALEN * 2 - 1] ^= 0xFF; /* mangle source address so it looks like it's not from us */ - memcpy(&addr, arp.arp_tpa, sizeof addr); /* save protocol addr */ memcpy(arp.arp_tpa, arp.arp_spa, sizeof addr); /* swap destination and source protocol address */ memcpy(arp.arp_spa, &addr, sizeof addr); /* ... */ memcpy(arp.arp_tha, arp.arp_sha, ETH_ALEN); /* set target hard/proto addr */ - memcpy(arp.arp_sha, DATA(packet) + ETH_ALEN, ETH_ALEN); /* add fake source hard addr */ + memcpy(arp.arp_sha, DATA(packet) + ETH_ALEN, ETH_ALEN); /* set source hard/proto addr */ + arp.arp_sha[ETH_ALEN - 1] ^= 0xFF; /* for consistency with route_packet() */ arp.arp_op = htons(ARPOP_REPLY); /* Copy structs on stack back to packet */