f26efb65f2
The conn.Bind UDP sockets' send and receive buffers are now being sized to 7MB, whereas they were previously inheriting the system defaults. The system defaults are considerably small and can result in dropped packets on high speed links. By increasing the size of these buffers we are able to achieve higher throughput in the aforementioned case. The iperf3 results below demonstrate the effect of this commit between two Linux computers with 32-core Xeon Platinum CPUs @ 2.9Ghz. There is roughly ~125us of round trip latency between them. The first result is from commit 792b49c which uses the system defaults, e.g. net.core.{r,w}mem_max = 212992. The TCP retransmits are correlated with buffer full drops on both sides. Starting Test: protocol: TCP, 1 streams, 131072 byte blocks [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-10.00 sec 4.74 GBytes 4.08 Gbits/sec 2742 285 KBytes - - - - - - - - - - - - - - - - - - - - - - - - - Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 4.74 GBytes 4.08 Gbits/sec 2742 sender [ 5] 0.00-10.04 sec 4.74 GBytes 4.06 Gbits/sec receiver The second result is after increasing SO_{SND,RCV}BUF to 7MB, i.e. applying this commit. Starting Test: protocol: TCP, 1 streams, 131072 byte blocks [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-10.00 sec 6.14 GBytes 5.27 Gbits/sec 0 3.15 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 6.14 GBytes 5.27 Gbits/sec 0 sender [ 5] 0.00-10.04 sec 6.14 GBytes 5.25 Gbits/sec receiver The specific value of 7MB is chosen as it is the max supported by a default configuration of macOS. A value greater than 7MB may further benefit throughput for environments with higher network latency and lower CPU clocks, but will also increase latency under load (bufferbloat). Some platforms will silently clamp the value to other maximums. On Linux, we use SO_{SND,RCV}BUFFORCE in case 7MB is beyond net.core.{r,w}mem_max. Co-authored-by: James Tucker <james@tailscale.com> Signed-off-by: James Tucker <james@tailscale.com> Signed-off-by: Jordan Whited <jordan@tailscale.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
57 lines
1.7 KiB
Go
57 lines
1.7 KiB
Go
/* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
*/
|
|
|
|
package conn
|
|
|
|
import (
|
|
"fmt"
|
|
"syscall"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func init() {
|
|
controlFns = append(controlFns,
|
|
|
|
// Attempt to set the socket buffer size beyond net.core.{r,w}mem_max by
|
|
// using SO_*BUFFORCE. This requires CAP_NET_ADMIN, and is allowed here to
|
|
// fail silently - the result of failure is lower performance on very fast
|
|
// links or high latency links.
|
|
func(network, address string, c syscall.RawConn) error {
|
|
return c.Control(func(fd uintptr) {
|
|
// Set up to *mem_max
|
|
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF, socketBufferSize)
|
|
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF, socketBufferSize)
|
|
// Set beyond *mem_max if CAP_NET_ADMIN
|
|
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, socketBufferSize)
|
|
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, socketBufferSize)
|
|
})
|
|
},
|
|
|
|
// Enable receiving of the packet information (IP_PKTINFO for IPv4,
|
|
// IPV6_PKTINFO for IPv6) that is used to implement sticky socket support.
|
|
func(network, address string, c syscall.RawConn) error {
|
|
var err error
|
|
switch network {
|
|
case "udp4":
|
|
c.Control(func(fd uintptr) {
|
|
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_PKTINFO, 1)
|
|
})
|
|
case "udp6":
|
|
c.Control(func(fd uintptr) {
|
|
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1)
|
|
})
|
|
default:
|
|
err = fmt.Errorf("unhandled network: %s: %w", network, unix.EINVAL)
|
|
}
|
|
return err
|
|
},
|
|
)
|
|
}
|