4201e08f1d
After reducing UDP stack traversal overhead via GSO and GRO, runtime.chanrecv() began to account for a high percentage (20% in one environment) of perf samples during a throughput benchmark. The individual packet channel ops with the crypto goroutines was the primary contributor to this overhead. Updating these channels to pass vectors, which the device package already handles at its ends, reduced this overhead substantially, and improved throughput. The iperf3 results below demonstrate the effect of this commit between two Linux computers with i5-12400 CPUs. There is roughly ~13us of round trip latency between them. The first result is with UDP GSO and GRO, and with single element channels. Starting Test: protocol: TCP, 1 streams, 131072 byte blocks [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-10.00 sec 12.3 GBytes 10.6 Gbits/sec 232 3.15 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 12.3 GBytes 10.6 Gbits/sec 232 sender [ 5] 0.00-10.04 sec 12.3 GBytes 10.6 Gbits/sec receiver The second result is with channels updated to pass a slice of elements. Starting Test: protocol: TCP, 1 streams, 131072 byte blocks [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-10.00 sec 13.2 GBytes 11.3 Gbits/sec 182 3.15 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 13.2 GBytes 11.3 Gbits/sec 182 sender [ 5] 0.00-10.04 sec 13.2 GBytes 11.3 Gbits/sec receiver Reviewed-by: Adrian Dewhurst <adrian@tailscale.com> Signed-off-by: Jordan Whited <jordan@tailscale.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
138 lines
3.5 KiB
Go
138 lines
3.5 KiB
Go
/* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
*/
|
|
|
|
package device
|
|
|
|
import (
|
|
"runtime"
|
|
"sync"
|
|
)
|
|
|
|
// An outboundQueue is a channel of QueueOutboundElements awaiting encryption.
|
|
// An outboundQueue is ref-counted using its wg field.
|
|
// An outboundQueue created with newOutboundQueue has one reference.
|
|
// Every additional writer must call wg.Add(1).
|
|
// Every completed writer must call wg.Done().
|
|
// When no further writers will be added,
|
|
// call wg.Done to remove the initial reference.
|
|
// When the refcount hits 0, the queue's channel is closed.
|
|
type outboundQueue struct {
|
|
c chan *[]*QueueOutboundElement
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
func newOutboundQueue() *outboundQueue {
|
|
q := &outboundQueue{
|
|
c: make(chan *[]*QueueOutboundElement, QueueOutboundSize),
|
|
}
|
|
q.wg.Add(1)
|
|
go func() {
|
|
q.wg.Wait()
|
|
close(q.c)
|
|
}()
|
|
return q
|
|
}
|
|
|
|
// A inboundQueue is similar to an outboundQueue; see those docs.
|
|
type inboundQueue struct {
|
|
c chan *[]*QueueInboundElement
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
func newInboundQueue() *inboundQueue {
|
|
q := &inboundQueue{
|
|
c: make(chan *[]*QueueInboundElement, QueueInboundSize),
|
|
}
|
|
q.wg.Add(1)
|
|
go func() {
|
|
q.wg.Wait()
|
|
close(q.c)
|
|
}()
|
|
return q
|
|
}
|
|
|
|
// A handshakeQueue is similar to an outboundQueue; see those docs.
|
|
type handshakeQueue struct {
|
|
c chan QueueHandshakeElement
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
func newHandshakeQueue() *handshakeQueue {
|
|
q := &handshakeQueue{
|
|
c: make(chan QueueHandshakeElement, QueueHandshakeSize),
|
|
}
|
|
q.wg.Add(1)
|
|
go func() {
|
|
q.wg.Wait()
|
|
close(q.c)
|
|
}()
|
|
return q
|
|
}
|
|
|
|
type autodrainingInboundQueue struct {
|
|
c chan *[]*QueueInboundElement
|
|
}
|
|
|
|
// newAutodrainingInboundQueue returns a channel that will be drained when it gets GC'd.
|
|
// It is useful in cases in which is it hard to manage the lifetime of the channel.
|
|
// The returned channel must not be closed. Senders should signal shutdown using
|
|
// some other means, such as sending a sentinel nil values.
|
|
func newAutodrainingInboundQueue(device *Device) *autodrainingInboundQueue {
|
|
q := &autodrainingInboundQueue{
|
|
c: make(chan *[]*QueueInboundElement, QueueInboundSize),
|
|
}
|
|
runtime.SetFinalizer(q, device.flushInboundQueue)
|
|
return q
|
|
}
|
|
|
|
func (device *Device) flushInboundQueue(q *autodrainingInboundQueue) {
|
|
for {
|
|
select {
|
|
case elems := <-q.c:
|
|
for _, elem := range *elems {
|
|
elem.Lock()
|
|
device.PutMessageBuffer(elem.buffer)
|
|
device.PutInboundElement(elem)
|
|
}
|
|
device.PutInboundElementsSlice(elems)
|
|
default:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
type autodrainingOutboundQueue struct {
|
|
c chan *[]*QueueOutboundElement
|
|
}
|
|
|
|
// newAutodrainingOutboundQueue returns a channel that will be drained when it gets GC'd.
|
|
// It is useful in cases in which is it hard to manage the lifetime of the channel.
|
|
// The returned channel must not be closed. Senders should signal shutdown using
|
|
// some other means, such as sending a sentinel nil values.
|
|
// All sends to the channel must be best-effort, because there may be no receivers.
|
|
func newAutodrainingOutboundQueue(device *Device) *autodrainingOutboundQueue {
|
|
q := &autodrainingOutboundQueue{
|
|
c: make(chan *[]*QueueOutboundElement, QueueOutboundSize),
|
|
}
|
|
runtime.SetFinalizer(q, device.flushOutboundQueue)
|
|
return q
|
|
}
|
|
|
|
func (device *Device) flushOutboundQueue(q *autodrainingOutboundQueue) {
|
|
for {
|
|
select {
|
|
case elems := <-q.c:
|
|
for _, elem := range *elems {
|
|
elem.Lock()
|
|
device.PutMessageBuffer(elem.buffer)
|
|
device.PutOutboundElement(elem)
|
|
}
|
|
device.PutOutboundElementsSlice(elems)
|
|
default:
|
|
return
|
|
}
|
|
}
|
|
}
|