Allow weight to be assigned to Subnets.

Tinc allows multiple nodes to own the same Subnet, but did not have a sensible
way to decide which one to send packets to. Tinc also did not check the
reachability of nodes when deciding where to route packets to, so it would not
automatically fail over to a reachable node.

Tinc now assigns a weight to each Subnet. The default weight is 10, with lower
weights having higher priority.  The Subnets are now internally sorted in the
same way as the kernel's routing table, and the Subnets are search linearly,
skipping those of unreachable nodes. A small cache of recently used addresses
is used to speed up the lookup functions.
This commit is contained in:
Guus Sliepen 2009-03-05 13:34:13 +01:00
parent 76a1bcaffc
commit 5674bba5c5
5 changed files with 148 additions and 67 deletions

View file

@ -394,7 +394,7 @@ Either the PEM format is used, or exactly one of the above two options must be s
in each host configuration file, in each host configuration file,
if you want to be able to establish a connection with that host. if you want to be able to establish a connection with that host.
.It Va Subnet Li = Ar address Ns Op Li / Ns Ar prefixlength .It Va Subnet Li = Ar address Ns Op Li / Ns Ar prefixlength Ns Op Li # Ns Ar weight
The subnet which this tinc daemon will serve. The subnet which this tinc daemon will serve.
.Nm tinc .Nm tinc
tries to look up which other daemon it should send a packet to by searching the appropriate subnet. tries to look up which other daemon it should send a packet to by searching the appropriate subnet.
@ -416,6 +416,13 @@ Read a networking HOWTO/FAQ/guide if you don't understand this.
IPv6 subnets are notated like fec0:0:0:1:0:0:0:0/64. IPv6 subnets are notated like fec0:0:0:1:0:0:0:0/64.
MAC addresses are notated like 0:1a:2b:3c:4d:5e. MAC addresses are notated like 0:1a:2b:3c:4d:5e.
.Pp
A Subnet can be given a weight to indicate its priority over identical Subnets
owned by different nodes. The default weight is 10. Lower values indicate
higher priority. Packets will be sent to the node with the highest priority,
unless that node is not reachable, in which case the node with the next highest
priority will be tried, and so on.
.It Va TCPOnly Li = yes | no Pq no .It Va TCPOnly Li = yes | no Pq no
If this variable is set to yes, If this variable is set to yes,
then the packets are tunnelled over the TCP connection instead of a UDP connection. then the packets are tunnelled over the TCP connection instead of a UDP connection.

View file

@ -1015,7 +1015,7 @@ in each host configuration file, if you want to be able to establish a
connection with that host. connection with that host.
@cindex Subnet @cindex Subnet
@item Subnet = <@var{address}[/@var{prefixlength}]> @item Subnet = <@var{address}[/@var{prefixlength}[#@var{weight}]]>
The subnet which this tinc daemon will serve. The subnet which this tinc daemon will serve.
Tinc tries to look up which other daemon it should send a packet to by searching the appropiate subnet. Tinc tries to look up which other daemon it should send a packet to by searching the appropiate subnet.
If the packet matches a subnet, If the packet matches a subnet,
@ -1039,6 +1039,12 @@ example: netmask 255.255.255.0 would become /24, 255.255.252.0 becomes
/22. This conforms to standard CIDR notation as described in /22. This conforms to standard CIDR notation as described in
@uref{ftp://ftp.isi.edu/in-notes/rfc1519.txt, RFC1519} @uref{ftp://ftp.isi.edu/in-notes/rfc1519.txt, RFC1519}
A Subnet can be given a weight to indicate its priority over identical Subnets
owned by different nodes. The default weight is 10. Lower values indicate
higher priority. Packets will be sent to the node with the highest priority,
unless that node is not reachable, in which case the node with the next highest
priority will be tried, and so on.
@cindex TCPonly @cindex TCPonly
@item TCPonly = <yes|no> (no) @item TCPonly = <yes|no> (no)
If this variable is set to yes, then the packets are tunnelled over a If this variable is set to yes, then the packets are tunnelled over a

View file

@ -313,6 +313,7 @@ void sssp_bfs(void)
void graph(void) void graph(void)
{ {
subnet_cache_flush();
sssp_bfs(); sssp_bfs();
mst_kruskal(); mst_kruskal();
graph_changed = true; graph_changed = true;

View file

@ -37,6 +37,23 @@
avl_tree_t *subnet_tree; avl_tree_t *subnet_tree;
/* Subnet lookup cache */
static ipv4_t cache_ipv4_address[2];
static subnet_t *cache_ipv4_subnet[2];
static bool cache_ipv4_valid[2];
static int cache_ipv4_slot;
static ipv6_t cache_ipv6_address[2];
static subnet_t *cache_ipv6_subnet[2];
static bool cache_ipv6_valid[2];
static int cache_ipv6_slot;
void subnet_cache_flush() {
cache_ipv4_valid[0] = cache_ipv4_valid[1] = false;
cache_ipv6_valid[0] = cache_ipv6_valid[1] = false;
}
/* Subnet comparison */ /* Subnet comparison */
static int subnet_compare_mac(const subnet_t *a, const subnet_t *b) static int subnet_compare_mac(const subnet_t *a, const subnet_t *b)
@ -45,6 +62,11 @@ static int subnet_compare_mac(const subnet_t *a, const subnet_t *b)
result = memcmp(&a->net.mac.address, &b->net.mac.address, sizeof(mac_t)); result = memcmp(&a->net.mac.address, &b->net.mac.address, sizeof(mac_t));
if(result)
return result;
result = a->weight - b->weight;
if(result || !a->owner || !b->owner) if(result || !a->owner || !b->owner)
return result; return result;
@ -55,12 +77,17 @@ static int subnet_compare_ipv4(const subnet_t *a, const subnet_t *b)
{ {
int result; int result;
result = memcmp(&a->net.ipv4.address, &b->net.ipv4.address, sizeof(ipv4_t)); result = b->net.ipv4.prefixlength - a->net.ipv4.prefixlength;
if(result) if(result)
return result; return result;
result = a->net.ipv4.prefixlength - b->net.ipv4.prefixlength; result = memcmp(&a->net.ipv4.address, &b->net.ipv4.address, sizeof(ipv4_t));
if(result)
return result;
result = a->weight - b->weight;
if(result || !a->owner || !b->owner) if(result || !a->owner || !b->owner)
return result; return result;
@ -72,12 +99,17 @@ static int subnet_compare_ipv6(const subnet_t *a, const subnet_t *b)
{ {
int result; int result;
result = b->net.ipv6.prefixlength - a->net.ipv6.prefixlength;
if(result)
return result;
result = memcmp(&a->net.ipv6.address, &b->net.ipv6.address, sizeof(ipv6_t)); result = memcmp(&a->net.ipv6.address, &b->net.ipv6.address, sizeof(ipv6_t));
if(result) if(result)
return result; return result;
result = a->net.ipv6.prefixlength - b->net.ipv6.prefixlength; result = a->weight - b->weight;
if(result || !a->owner || !b->owner) if(result || !a->owner || !b->owner)
return result; return result;
@ -118,6 +150,8 @@ void init_subnets(void)
cp(); cp();
subnet_tree = avl_alloc_tree((avl_compare_t) subnet_compare, (avl_action_t) free_subnet); subnet_tree = avl_alloc_tree((avl_compare_t) subnet_compare, (avl_action_t) free_subnet);
subnet_cache_flush();
} }
void exit_subnets(void) void exit_subnets(void)
@ -167,6 +201,8 @@ void subnet_add(node_t *n, subnet_t *subnet)
avl_insert(subnet_tree, subnet); avl_insert(subnet_tree, subnet);
avl_insert(n->subnet_tree, subnet); avl_insert(n->subnet_tree, subnet);
subnet_cache_flush();
} }
void subnet_del(node_t *n, subnet_t *subnet) void subnet_del(node_t *n, subnet_t *subnet)
@ -175,6 +211,8 @@ void subnet_del(node_t *n, subnet_t *subnet)
avl_delete(n->subnet_tree, subnet); avl_delete(n->subnet_tree, subnet);
avl_delete(subnet_tree, subnet); avl_delete(subnet_tree, subnet);
subnet_cache_flush();
} }
/* Ascii representation of subnets */ /* Ascii representation of subnets */
@ -183,16 +221,18 @@ bool str2net(subnet_t *subnet, const char *subnetstr)
{ {
int i, l; int i, l;
uint16_t x[8]; uint16_t x[8];
int weight = 10;
cp(); cp();
if(sscanf(subnetstr, "%hu.%hu.%hu.%hu/%d", if(sscanf(subnetstr, "%hu.%hu.%hu.%hu/%d#%d",
&x[0], &x[1], &x[2], &x[3], &l) == 5) { &x[0], &x[1], &x[2], &x[3], &l, &weight) >= 5) {
if(l < 0 || l > 32) if(l < 0 || l > 32)
return false; return false;
subnet->type = SUBNET_IPV4; subnet->type = SUBNET_IPV4;
subnet->net.ipv4.prefixlength = l; subnet->net.ipv4.prefixlength = l;
subnet->weight = weight;
for(i = 0; i < 4; i++) { for(i = 0; i < 4; i++) {
if(x[i] > 255) if(x[i] > 255)
@ -203,14 +243,15 @@ bool str2net(subnet_t *subnet, const char *subnetstr)
return true; return true;
} }
if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d", if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d#%d",
&x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7], &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7],
&l) == 9) { &l, &weight) >= 9) {
if(l < 0 || l > 128) if(l < 0 || l > 128)
return false; return false;
subnet->type = SUBNET_IPV6; subnet->type = SUBNET_IPV6;
subnet->net.ipv6.prefixlength = l; subnet->net.ipv6.prefixlength = l;
subnet->weight = weight;
for(i = 0; i < 8; i++) for(i = 0; i < 8; i++)
subnet->net.ipv6.address.x[i] = htons(x[i]); subnet->net.ipv6.address.x[i] = htons(x[i]);
@ -218,9 +259,10 @@ bool str2net(subnet_t *subnet, const char *subnetstr)
return true; return true;
} }
if(sscanf(subnetstr, "%hu.%hu.%hu.%hu", &x[0], &x[1], &x[2], &x[3]) == 4) { if(sscanf(subnetstr, "%hu.%hu.%hu.%hu#%d", &x[0], &x[1], &x[2], &x[3], &weight) >= 4) {
subnet->type = SUBNET_IPV4; subnet->type = SUBNET_IPV4;
subnet->net.ipv4.prefixlength = 32; subnet->net.ipv4.prefixlength = 32;
subnet->weight = weight;
for(i = 0; i < 4; i++) { for(i = 0; i < 4; i++) {
if(x[i] > 255) if(x[i] > 255)
@ -231,10 +273,11 @@ bool str2net(subnet_t *subnet, const char *subnetstr)
return true; return true;
} }
if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx#%d",
&x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7]) == 8) { &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7], &weight) >= 8) {
subnet->type = SUBNET_IPV6; subnet->type = SUBNET_IPV6;
subnet->net.ipv6.prefixlength = 128; subnet->net.ipv6.prefixlength = 128;
subnet->weight = weight;
for(i = 0; i < 8; i++) for(i = 0; i < 8; i++)
subnet->net.ipv6.address.x[i] = htons(x[i]); subnet->net.ipv6.address.x[i] = htons(x[i]);
@ -242,9 +285,10 @@ bool str2net(subnet_t *subnet, const char *subnetstr)
return true; return true;
} }
if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx", if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx#%d",
&x[0], &x[1], &x[2], &x[3], &x[4], &x[5]) == 6) { &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &weight) >= 6) {
subnet->type = SUBNET_MAC; subnet->type = SUBNET_MAC;
subnet->weight = weight;
for(i = 0; i < 6; i++) for(i = 0; i < 6; i++)
subnet->net.mac.address.x[i] = x[i]; subnet->net.mac.address.x[i] = x[i];
@ -266,24 +310,28 @@ bool net2str(char *netstr, int len, const subnet_t *subnet)
switch (subnet->type) { switch (subnet->type) {
case SUBNET_MAC: case SUBNET_MAC:
snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx", snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx#%d",
subnet->net.mac.address.x[0], subnet->net.mac.address.x[0],
subnet->net.mac.address.x[1], subnet->net.mac.address.x[1],
subnet->net.mac.address.x[2], subnet->net.mac.address.x[2],
subnet->net.mac.address.x[3], subnet->net.mac.address.x[3],
subnet->net.mac.address.x[4], subnet->net.mac.address.x[5]); subnet->net.mac.address.x[4],
subnet->net.mac.address.x[5],
subnet->weight);
break; break;
case SUBNET_IPV4: case SUBNET_IPV4:
snprintf(netstr, len, "%hu.%hu.%hu.%hu/%d", snprintf(netstr, len, "%hu.%hu.%hu.%hu/%d#%d",
subnet->net.ipv4.address.x[0], subnet->net.ipv4.address.x[0],
subnet->net.ipv4.address.x[1], subnet->net.ipv4.address.x[1],
subnet->net.ipv4.address.x[2], subnet->net.ipv4.address.x[2],
subnet->net.ipv4.address.x[3], subnet->net.ipv4.prefixlength); subnet->net.ipv4.address.x[3],
subnet->net.ipv4.prefixlength,
subnet->weight);
break; break;
case SUBNET_IPV6: case SUBNET_IPV6:
snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d", snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d#%d",
ntohs(subnet->net.ipv6.address.x[0]), ntohs(subnet->net.ipv6.address.x[0]),
ntohs(subnet->net.ipv6.address.x[1]), ntohs(subnet->net.ipv6.address.x[1]),
ntohs(subnet->net.ipv6.address.x[2]), ntohs(subnet->net.ipv6.address.x[2]),
@ -292,7 +340,8 @@ bool net2str(char *netstr, int len, const subnet_t *subnet)
ntohs(subnet->net.ipv6.address.x[5]), ntohs(subnet->net.ipv6.address.x[5]),
ntohs(subnet->net.ipv6.address.x[6]), ntohs(subnet->net.ipv6.address.x[6]),
ntohs(subnet->net.ipv6.address.x[7]), ntohs(subnet->net.ipv6.address.x[7]),
subnet->net.ipv6.prefixlength); subnet->net.ipv6.prefixlength,
subnet->weight);
break; break;
default: default:
@ -332,80 +381,96 @@ subnet_t *lookup_subnet_mac(const mac_t *address)
subnet_t *lookup_subnet_ipv4(const ipv4_t *address) subnet_t *lookup_subnet_ipv4(const ipv4_t *address)
{ {
subnet_t *p, subnet = {0}; subnet_t *p, *r = NULL, subnet = {0};
avl_node_t *n;
int i;
cp(); cp();
// Check if this address is cached
for(i = 0; i < 2; i++) {
if(!cache_ipv4_valid[i])
continue;
if(!memcmp(address, &cache_ipv4_address[i], sizeof *address))
return cache_ipv4_subnet[i];
}
// Search all subnets for a matching one
subnet.type = SUBNET_IPV4; subnet.type = SUBNET_IPV4;
subnet.net.ipv4.address = *address; subnet.net.ipv4.address = *address;
subnet.net.ipv4.prefixlength = 32; subnet.net.ipv4.prefixlength = 32;
subnet.owner = NULL; subnet.owner = NULL;
do { for(n = subnet_tree->head; n; n = n->next) {
/* Go find subnet */ p = n->data;
if(!p || p->type != subnet.type)
continue;
p = avl_search_closest_smaller(subnet_tree, &subnet); if(!maskcmp(address, &p->net.ipv4.address, p->net.ipv4.prefixlength)) {
r = p;
/* Check if the found subnet REALLY matches */ if(p->owner->status.reachable)
if(p) {
if(p->type != SUBNET_IPV4) {
p = NULL;
break; break;
}
if(!maskcmp(address, &p->net.ipv4.address, p->net.ipv4.prefixlength))
break;
else {
/* Otherwise, see if there is a bigger enclosing subnet */
subnet.net.ipv4.prefixlength = p->net.ipv4.prefixlength - 1;
if(subnet.net.ipv4.prefixlength < 0 || subnet.net.ipv4.prefixlength > 32)
return NULL;
maskcpy(&subnet.net.ipv4.address, &p->net.ipv4.address, subnet.net.ipv4.prefixlength, sizeof(ipv4_t));
}
} }
} while(p); }
return p; // Cache the result
cache_ipv4_slot = !cache_ipv4_slot;
memcpy(&cache_ipv4_address[cache_ipv4_slot], address, sizeof *address);
cache_ipv4_subnet[cache_ipv4_slot] = r;
cache_ipv4_valid[cache_ipv4_slot] = true;
return r;
} }
subnet_t *lookup_subnet_ipv6(const ipv6_t *address) subnet_t *lookup_subnet_ipv6(const ipv6_t *address)
{ {
subnet_t *p, subnet = {0}; subnet_t *p, *r = NULL, subnet = {0};
avl_node_t *n;
int i;
cp(); cp();
// Check if this address is cached
for(i = 0; i < 2; i++) {
if(!cache_ipv6_valid[i])
continue;
if(!memcmp(address, &cache_ipv6_address[i], sizeof *address))
return cache_ipv6_subnet[i];
}
// Search all subnets for a matching one
subnet.type = SUBNET_IPV6; subnet.type = SUBNET_IPV6;
subnet.net.ipv6.address = *address; subnet.net.ipv6.address = *address;
subnet.net.ipv6.prefixlength = 128; subnet.net.ipv6.prefixlength = 128;
subnet.owner = NULL; subnet.owner = NULL;
do { for(n = subnet_tree->head; n; n = n->next) {
/* Go find subnet */ p = n->data;
if(!p || p->type != subnet.type)
continue;
p = avl_search_closest_smaller(subnet_tree, &subnet); if(!maskcmp(address, &p->net.ipv6.address, p->net.ipv6.prefixlength)) {
r = p;
/* Check if the found subnet REALLY matches */ if(p->owner->status.reachable)
if(p) {
if(p->type != SUBNET_IPV6)
return NULL;
if(!maskcmp(address, &p->net.ipv6.address, p->net.ipv6.prefixlength))
break; break;
else {
/* Otherwise, see if there is a bigger enclosing subnet */
subnet.net.ipv6.prefixlength = p->net.ipv6.prefixlength - 1;
if(subnet.net.ipv6.prefixlength < 0 || subnet.net.ipv6.prefixlength > 128)
return NULL;
maskcpy(&subnet.net.ipv6.address, &p->net.ipv6.address, subnet.net.ipv6.prefixlength, sizeof(ipv6_t));
}
} }
} while(p); }
return p; // Cache the result
cache_ipv6_slot = !cache_ipv6_slot;
memcpy(&cache_ipv6_address[cache_ipv6_slot], address, sizeof *address);
cache_ipv6_subnet[cache_ipv6_slot] = r;
cache_ipv6_valid[cache_ipv6_slot] = true;
return r;
} }
void subnet_update(node_t *owner, subnet_t *subnet, bool up) { void subnet_update(node_t *owner, subnet_t *subnet, bool up) {

View file

@ -53,6 +53,7 @@ typedef struct subnet_t {
subnet_type_t type; /* subnet type (IPv4? IPv6? MAC? something even weirder?) */ subnet_type_t type; /* subnet type (IPv4? IPv6? MAC? something even weirder?) */
time_t expires; /* expiry time */ time_t expires; /* expiry time */
int weight; /* weight (higher value is higher priority) */
/* And now for the actual subnet: */ /* And now for the actual subnet: */
@ -82,5 +83,6 @@ extern subnet_t *lookup_subnet_mac(const mac_t *);
extern subnet_t *lookup_subnet_ipv4(const ipv4_t *); extern subnet_t *lookup_subnet_ipv4(const ipv4_t *);
extern subnet_t *lookup_subnet_ipv6(const ipv6_t *); extern subnet_t *lookup_subnet_ipv6(const ipv6_t *);
extern void dump_subnets(void); extern void dump_subnets(void);
extern void subnet_cache_flush(void);
#endif /* __TINC_SUBNET_H__ */ #endif /* __TINC_SUBNET_H__ */