From b1421b919090351e885ed3d06df67fb2eb69e765 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Sun, 8 Mar 2015 18:54:50 +0000 Subject: [PATCH] Add MTU_INFO protocol message. In this commit, nodes use MTU_INFO messages to provide MTU information. The issue this code is meant to address is the non-trivial problem of finding the proper MTU when UDP SPTPS relays are involved. Currently, tinc has no idea what the MTU looks like beyond the first relay, and will arbitrarily use the first relay's MTU as the limit. This will fail miserably if the MTU decreases after the first relay, forcing relays to fall back to TCP. More generally, one should keep in mind that relay paths can be arbitrarily complex, resulting in packets taking "epic journeys" through the graph, switching back and forth between UDP (with variable MTUs) and TCP multiple times along the path. A solution that was considered consists in sending standard MTU probes through the relays. This is inefficient (if there are 3 nodes on one side of relay and 3 nodes on the other side, we end up with 3*3=9 MTU discoveries taking place at the same time, while technically only 3+3=6 are needed) and would involve eyebrow-raising behaviors such as probes being sent over TCP. This commit implements an alternative solution, which consists in the packet receiver sending MTU_INFO messages to the packet sender. The message contains an MTU value which is set to maximum when the message is originally sent. The message gets altered as it travels through the metagraph, such that when the message arrives to the destination, the MTU value contained in the message can be used to send packets while making sure no relays will be forced to fall back to TCP to deliver them. The operating principles behind such a protocol message are similar to how the UDP_INFO message works, but there is a key difference that prevents us from simply reusing the same message: the UDP_INFO message only cares about relay-to-relay links (i.e. it is sent between static relays and the information it contains only makes sense between two adjacent static relays), while the MTU_INFO cares about the end-to-end MTU, including the entire relay path. Therefore, UDP_INFO messages stop when they encounter static relays, while MTU_INFO messages don't stop until they get to the original packet sender. Note that, technically, the MTU that is obtained through this mechanism can be slightly pessimistic, because it can be lowered by an intermediate node that is not being used as a relay. Since nodes have no way of knowing whether they'll be used as dynamic relays or not (and have no say in the matter), this is not a trivial problem. That said, this is highly unlikely to result in noticeable issues in realistic scenarios. --- src/net_packet.c | 10 +++-- src/protocol.c | 4 +- src/protocol.h | 6 ++- src/protocol_key.c | 4 ++ src/protocol_misc.c | 89 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 7 deletions(-) diff --git a/src/net_packet.c b/src/net_packet.c index 40cb2bac..2e47df40 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -687,9 +687,7 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void bool relay_supported = (relay->options >> 24) >= 4; bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY; - /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU. - TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop). - This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */ + /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU. */ if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) { char buf[len * 4 / 3 + 5]; @@ -1422,6 +1420,12 @@ skip_harder: n->sock = ls - listen_socket; if(direct && sockaddrcmp(&addr, &n->address)) update_node_udp(n, &addr); + + /* If the packet went through a relay, help the sender find the appropriate MTU + through the relay path. */ + + if(!direct) + send_mtu_info(myself, n, MTU); } void handle_device_data(void *data, int flags) { diff --git a/src/protocol.c b/src/protocol.c index 0ba5c0a4..4f7e6697 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -42,7 +42,7 @@ static bool (*request_handlers[])(connection_t *, const char *) = { add_edge_h, del_edge_h, key_changed_h, req_key_h, ans_key_h, tcppacket_h, control_h, NULL, NULL, NULL, /* Not "real" requests (yet) */ - udp_info_h, + udp_info_h, mtu_info_h, }; /* Request names */ @@ -53,7 +53,7 @@ static char (*request_name[]) = { "PING", "PONG", "ADD_SUBNET", "DEL_SUBNET", "ADD_EDGE", "DEL_EDGE", "KEY_CHANGED", "REQ_KEY", "ANS_KEY", "PACKET", "CONTROL", - "REQ_PUBKEY", "ANS_PUBKEY", "REQ_SPTPS", "UDP_INFO", + "REQ_PUBKEY", "ANS_PUBKEY", "REQ_SPTPS", "UDP_INFO", "MTU_INFO", }; static splay_tree_t *past_request_tree; diff --git a/src/protocol.h b/src/protocol.h index e4978f4e..c6a1485e 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -26,7 +26,7 @@ /* Protocol version. Different major versions are incompatible. */ #define PROT_MAJOR 17 -#define PROT_MINOR 5 /* Should not exceed 255! */ +#define PROT_MINOR 6 /* Should not exceed 255! */ /* Silly Windows */ @@ -49,7 +49,7 @@ typedef enum request_t { CONTROL, REQ_PUBKEY, ANS_PUBKEY, REQ_SPTPS, - UDP_INFO, + UDP_INFO, MTU_INFO, LAST /* Guardian for the highest request number */ } request_t; @@ -109,6 +109,7 @@ extern bool send_req_key(struct node_t *); extern bool send_ans_key(struct node_t *); extern bool send_tcppacket(struct connection_t *, const struct vpn_packet_t *); extern bool send_udp_info(struct node_t *, struct node_t *); +extern bool send_mtu_info(struct node_t *, struct node_t *, int); /* Request handlers */ @@ -132,5 +133,6 @@ extern bool ans_key_h(struct connection_t *, const char *); extern bool tcppacket_h(struct connection_t *, const char *); extern bool control_h(struct connection_t *, const char *); extern bool udp_info_h(struct connection_t *, const char *); +extern bool mtu_info_h(struct connection_t *, const char *); #endif /* __TINC_PROTOCOL_H__ */ diff --git a/src/protocol_key.c b/src/protocol_key.c index c46c14cf..8a12b568 100644 --- a/src/protocol_key.c +++ b/src/protocol_key.c @@ -178,6 +178,7 @@ static bool req_key_ext_h(connection_t *c, const char *request, node_t *from, in from->last_req_key = now.tv_sec; sptps_start(&from->sptps, from, false, true, myself->connection->ecdsa, from->ecdsa, label, sizeof label, send_sptps_data, receive_sptps_record); sptps_receive_data(&from->sptps, buf, len); + send_mtu_info(myself, from, MTU); return true; } @@ -194,6 +195,7 @@ static bool req_key_ext_h(connection_t *c, const char *request, node_t *from, in return true; } sptps_receive_data(&from->sptps, buf, len); + send_mtu_info(myself, from, MTU); return true; } @@ -415,6 +417,8 @@ bool ans_key_h(connection_t *c, const char *request) { } } + send_mtu_info(myself, from, MTU); + return true; } diff --git a/src/protocol_misc.c b/src/protocol_misc.c index b2bc40ee..5fde8332 100644 --- a/src/protocol_misc.c +++ b/src/protocol_misc.c @@ -237,3 +237,92 @@ bool udp_info_h(connection_t *c, const char* request) { return send_udp_info(from, to); } + +/* Transmitting MTU information */ + +bool send_mtu_info(node_t *from, node_t *to, int mtu) { + /* Skip cases where sending MTU info messages doesn't make sense. + This is done here in order to avoid repeating the same logic in multiple callsites. */ + + if(to == myself) + return true; + + if(!to->status.reachable) + return true; + + if(from == myself && to->connection) + return true; + + if((to->nexthop->options >> 24) < 6) + return true; + + /* We will send the passed-in MTU value, unless we believe ours is better. */ + + node_t *via = (from->via == myself) ? from->nexthop : from->via; + if(from->minmtu == from->maxmtu && from->via == myself) { + /* We have a direct measurement. Override the value entirely. + Note that we only do that if we are sitting as a static relay in the path; + otherwise, we can't guarantee packets will flow through us, and increasing + MTU could therefore end up being too optimistic. */ + mtu = from->minmtu; + } else if(via->minmtu == via->maxmtu) { + /* Static relay. Ensure packets will make it through the entire relay path. */ + mtu = MIN(mtu, via->minmtu); + } else if(via->nexthop->minmtu == via->nexthop->maxmtu) { + /* Dynamic relay. Ensure packets will make it through the entire relay path. */ + mtu = MIN(mtu, via->nexthop->minmtu); + } + + /* If none of the conditions above match in the steady state, it means we're using TCP, + so the MTU is irrelevant. That said, it is still important to honor the MTU that was passed in, + because other parts of the relay path might be able to use UDP, which means they care about the MTU. */ + + return send_request(to->nexthop->connection, "%d %s %s %d", MTU_INFO, from->name, to->name, mtu); +} + +bool mtu_info_h(connection_t *c, const char* request) { + char from_name[MAX_STRING_SIZE]; + char to_name[MAX_STRING_SIZE]; + int mtu; + + if(sscanf(request, "%*d "MAX_STRING" "MAX_STRING" %d", from_name, to_name, &mtu) != 3) { + logger(DEBUG_ALWAYS, LOG_ERR, "Got bad %s from %s (%s)", "MTU_INFO", c->name, c->hostname); + return false; + } + + if(mtu < 512) { + logger(DEBUG_ALWAYS, LOG_ERR, "Got bad %s from %s (%s): %s", "MTU_INFO", c->name, c->hostname, "invalid MTU"); + return false; + } + + mtu = MIN(mtu, MTU); + + if(!check_id(from_name) || !check_id(to_name)) { + logger(DEBUG_ALWAYS, LOG_ERR, "Got bad %s from %s (%s): %s", "MTU_INFO", c->name, c->hostname, "invalid name"); + return false; + } + + node_t *from = lookup_node(from_name); + if(!from) { + logger(DEBUG_ALWAYS, LOG_ERR, "Got %s from %s (%s) origin %s which does not exist in our connection list", "MTU_INFO", c->name, c->hostname, from_name); + return true; + } + + /* If we don't know the current MTU for that node, use the one we received. + Even if we're about to make our own measurements, the value we got from downstream nodes should be pretty close + so it's a good idea to use it in the mean time. */ + if(from->mtu != mtu && from->minmtu != from->maxmtu) { + logger(DEBUG_TRAFFIC, LOG_INFO, "Using provisional MTU %d for node %s (%s)", mtu, from->name, from->hostname); + from->mtu = mtu; + } + + node_t *to = lookup_node(to_name); + if(!to) { + logger(DEBUG_ALWAYS, LOG_ERR, "Got %s from %s (%s) destination %s which does not exist in our connection list", "MTU_INFO", c->name, c->hostname, to_name); + return true; + } + + /* Continue passing the MTU value (or a better one if we have it) up the chain. */ + + return send_mtu_info(from, to, mtu); +}