wireguard-go/device/peer.go

297 lines
7.3 KiB
Go
Raw Normal View History

2019-01-02 01:55:51 +01:00
/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/
2019-03-03 04:04:41 +01:00
package device
import (
"container/list"
2017-06-26 13:14:02 +02:00
"errors"
"sync"
"sync/atomic"
"time"
2024-01-07 20:03:11 +01:00
"gitea.hbanafa.com/hesham/wireguard-go/conn"
)
type Peer struct {
isRunning atomic.Bool
keypairs Keypairs
handshake Handshake
device *Device
stopping sync.WaitGroup // routines pending stop
txBytes atomic.Uint64 // bytes send to peer (endpoint)
rxBytes atomic.Uint64 // bytes received from peer
lastHandshakeNano atomic.Int64 // nano seconds since epoch
endpoint struct {
sync.Mutex
val conn.Endpoint
clearSrcOnTx bool // signal to val.ClearSrc() prior to next packet transmission
disableRoaming bool
}
timers struct {
retransmitHandshake *Timer
sendKeepalive *Timer
newHandshake *Timer
zeroKeyMaterial *Timer
persistentKeepalive *Timer
handshakeAttempts atomic.Uint32
needAnotherKeepalive atomic.Bool
sentLastMinuteHandshake atomic.Bool
}
device: remove mutex from Peer send/receive The immediate motivation for this change is an observed deadlock. 1. A goroutine calls peer.Stop. That calls peer.queue.Lock(). 2. Another goroutine is in RoutineSequentialReceiver. It receives an elem from peer.queue.inbound. 3. The peer.Stop goroutine calls close(peer.queue.inbound), close(peer.queue.outbound), and peer.stopping.Wait(). It blocks waiting for RoutineSequentialReceiver and RoutineSequentialSender to exit. 4. The RoutineSequentialReceiver goroutine calls peer.SendStagedPackets(). SendStagedPackets attempts peer.queue.RLock(). That blocks forever because the peer.Stop goroutine holds a write lock on that mutex. A background motivation for this change is that it can be expensive to have a mutex in the hot code path of RoutineSequential*. The mutex was necessary to avoid attempting to send elems on a closed channel. This commit removes that danger by never closing the channel. Instead, we send a sentinel nil value on the channel to indicate to the receiver that it should exit. The only problem with this is that if the receiver exits, we could write an elem into the channel which would never get received. If it never gets received, it cannot get returned to the device pools. To work around this, we use a finalizer. When the channel can be GC'd, the finalizer drains any remaining elements from the channel and restores them to the device pool. After that change, peer.queue.RWMutex no longer makes sense where it is. It is only used to prevent concurrent calls to Start and Stop. Move it to a more sensible location and make it a plain sync.Mutex. Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
2021-02-08 22:02:52 +01:00
state struct {
sync.Mutex // protects against concurrent Start/Stop
device: remove mutex from Peer send/receive The immediate motivation for this change is an observed deadlock. 1. A goroutine calls peer.Stop. That calls peer.queue.Lock(). 2. Another goroutine is in RoutineSequentialReceiver. It receives an elem from peer.queue.inbound. 3. The peer.Stop goroutine calls close(peer.queue.inbound), close(peer.queue.outbound), and peer.stopping.Wait(). It blocks waiting for RoutineSequentialReceiver and RoutineSequentialSender to exit. 4. The RoutineSequentialReceiver goroutine calls peer.SendStagedPackets(). SendStagedPackets attempts peer.queue.RLock(). That blocks forever because the peer.Stop goroutine holds a write lock on that mutex. A background motivation for this change is that it can be expensive to have a mutex in the hot code path of RoutineSequential*. The mutex was necessary to avoid attempting to send elems on a closed channel. This commit removes that danger by never closing the channel. Instead, we send a sentinel nil value on the channel to indicate to the receiver that it should exit. The only problem with this is that if the receiver exits, we could write an elem into the channel which would never get received. If it never gets received, it cannot get returned to the device pools. To work around this, we use a finalizer. When the channel can be GC'd, the finalizer drains any remaining elements from the channel and restores them to the device pool. After that change, peer.queue.RWMutex no longer makes sense where it is. It is only used to prevent concurrent calls to Start and Stop. Move it to a more sensible location and make it a plain sync.Mutex. Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
2021-02-08 22:02:52 +01:00
}
queue struct {
staged chan *QueueOutboundElementsContainer // staged packets before a handshake is available
outbound *autodrainingOutboundQueue // sequential ordering of udp transmission
inbound *autodrainingInboundQueue // sequential ordering of tun writing
}
cookieGenerator CookieGenerator
trieEntries list.List
persistentKeepaliveInterval atomic.Uint32
2017-06-24 15:34:17 +02:00
}
func (device *Device) NewPeer(pk NoisePublicKey) (*Peer, error) {
if device.isClosed() {
2018-05-13 19:33:41 +02:00
return nil, errors.New("device closed")
}
// lock resources
device.staticIdentity.RLock()
defer device.staticIdentity.RUnlock()
device.peers.Lock()
defer device.peers.Unlock()
// check if over limit
if len(device.peers.keyMap) >= MaxPeers {
2018-05-13 19:33:41 +02:00
return nil, errors.New("too many peers")
}
2017-06-26 13:14:02 +02:00
// create peer
peer := new(Peer)
2017-07-01 23:29:22 +02:00
2018-05-13 23:14:43 +02:00
peer.cookieGenerator.Init(pk)
2017-07-01 23:29:22 +02:00
peer.device = device
peer.queue.outbound = newAutodrainingOutboundQueue(device)
peer.queue.inbound = newAutodrainingInboundQueue(device)
peer.queue.staged = make(chan *QueueOutboundElementsContainer, QueueStagedSize)
2017-07-08 23:51:26 +02:00
// map public key
_, ok := device.peers.keyMap[pk]
2017-06-26 13:14:02 +02:00
if ok {
2018-05-13 19:33:41 +02:00
return nil, errors.New("adding existing peer")
2017-06-26 13:14:02 +02:00
}
2017-06-24 15:34:17 +02:00
// pre-compute DH
2017-06-26 13:14:02 +02:00
handshake := &peer.handshake
handshake.mutex.Lock()
handshake.precomputedStaticStatic, _ = device.staticIdentity.privateKey.sharedSecret(pk)
2019-08-05 16:57:41 +02:00
handshake.remoteStatic = pk
2017-06-26 13:14:02 +02:00
handshake.mutex.Unlock()
2017-06-24 15:34:17 +02:00
// reset endpoint
peer.endpoint.Lock()
peer.endpoint.val = nil
peer.endpoint.disableRoaming = false
peer.endpoint.clearSrcOnTx = false
peer.endpoint.Unlock()
// init timers
peer.timersInit()
2020-03-18 06:06:56 +01:00
// add
device.peers.keyMap[pk] = peer
2019-08-05 16:57:41 +02:00
return peer, nil
}
func (peer *Peer) SendBuffers(buffers [][]byte) error {
peer.device.net.RLock()
defer peer.device.net.RUnlock()
2017-12-29 17:42:09 +01:00
if peer.device.isClosed() {
return nil
2018-02-02 20:45:25 +01:00
}
peer.endpoint.Lock()
endpoint := peer.endpoint.val
if endpoint == nil {
peer.endpoint.Unlock()
2018-05-13 19:33:41 +02:00
return errors.New("no known endpoint for peer")
}
if peer.endpoint.clearSrcOnTx {
endpoint.ClearSrc()
peer.endpoint.clearSrcOnTx = false
}
peer.endpoint.Unlock()
2017-12-29 17:42:09 +01:00
err := peer.device.net.bind.Send(buffers, endpoint)
if err == nil {
var totalLen uint64
for _, b := range buffers {
totalLen += uint64(len(b))
}
peer.txBytes.Add(totalLen)
}
return err
}
func (peer *Peer) String() string {
// The awful goo that follows is identical to:
//
// base64Key := base64.StdEncoding.EncodeToString(peer.handshake.remoteStatic[:])
// abbreviatedKey := base64Key[0:4] + "…" + base64Key[39:43]
// return fmt.Sprintf("peer(%s)", abbreviatedKey)
//
// except that it is considerably more efficient.
src := peer.handshake.remoteStatic
b64 := func(input byte) byte {
return input + 'A' + byte(((25-int(input))>>8)&6) - byte(((51-int(input))>>8)&75) - byte(((61-int(input))>>8)&15) + byte(((62-int(input))>>8)&3)
}
b := []byte("peer(____…____)")
const first = len("peer(")
const second = len("peer(____…")
b[first+0] = b64((src[0] >> 2) & 63)
b[first+1] = b64(((src[0] << 4) | (src[1] >> 4)) & 63)
b[first+2] = b64(((src[1] << 2) | (src[2] >> 6)) & 63)
b[first+3] = b64(src[2] & 63)
b[second+0] = b64(src[29] & 63)
b[second+1] = b64((src[30] >> 2) & 63)
b[second+2] = b64(((src[30] << 4) | (src[31] >> 4)) & 63)
b[second+3] = b64((src[31] << 2) & 63)
return string(b)
}
func (peer *Peer) Start() {
// should never start a peer on a closed device
if peer.device.isClosed() {
return
}
// prevent simultaneous start/stop operations
peer.state.Lock()
defer peer.state.Unlock()
2018-02-02 20:45:25 +01:00
if peer.isRunning.Load() {
2018-02-02 20:45:25 +01:00
return
}
2018-02-04 19:18:44 +01:00
device := peer.device
device.log.Verbosef("%v - Starting", peer)
// reset routine state
peer.stopping.Wait()
peer.stopping.Add(2)
2017-12-29 17:42:09 +01:00
peer.handshake.mutex.Lock()
peer.handshake.lastSentHandshake = time.Now().Add(-(RekeyTimeout + time.Second))
peer.handshake.mutex.Unlock()
peer.device.queue.encryption.wg.Add(1) // keep encryption queue open for our writes
peer.timersStart()
2018-02-02 20:45:25 +01:00
device.flushInboundQueue(peer.queue.inbound)
device.flushOutboundQueue(peer.queue.outbound)
// Use the device batch size, not the bind batch size, as the device size is
// the size of the batch pools.
batchSize := peer.device.BatchSize()
go peer.RoutineSequentialSender(batchSize)
go peer.RoutineSequentialReceiver(batchSize)
2017-12-29 17:42:09 +01:00
peer.isRunning.Store(true)
2017-12-29 17:42:09 +01:00
}
2018-05-13 23:14:43 +02:00
func (peer *Peer) ZeroAndFlushAll() {
device := peer.device
// clear key pairs
keypairs := &peer.keypairs
keypairs.Lock()
2018-05-13 23:14:43 +02:00
device.DeleteKeypair(keypairs.previous)
device.DeleteKeypair(keypairs.current)
device.DeleteKeypair(keypairs.next.Load())
2018-05-13 23:14:43 +02:00
keypairs.previous = nil
keypairs.current = nil
keypairs.next.Store(nil)
keypairs.Unlock()
2018-05-13 23:14:43 +02:00
// clear handshake state
handshake := &peer.handshake
handshake.mutex.Lock()
device.indexTable.Delete(handshake.localIndex)
handshake.Clear()
handshake.mutex.Unlock()
peer.FlushStagedPackets()
2018-05-13 23:14:43 +02:00
}
func (peer *Peer) ExpireCurrentKeypairs() {
handshake := &peer.handshake
handshake.mutex.Lock()
peer.device.indexTable.Delete(handshake.localIndex)
handshake.Clear()
peer.handshake.lastSentHandshake = time.Now().Add(-(RekeyTimeout + time.Second))
handshake.mutex.Unlock()
keypairs := &peer.keypairs
keypairs.Lock()
if keypairs.current != nil {
keypairs.current.sendNonce.Store(RejectAfterMessages)
}
if next := keypairs.next.Load(); next != nil {
next.sendNonce.Store(RejectAfterMessages)
}
keypairs.Unlock()
}
2017-12-29 17:42:09 +01:00
func (peer *Peer) Stop() {
peer.state.Lock()
defer peer.state.Unlock()
2018-02-02 20:45:25 +01:00
if !peer.isRunning.Swap(false) {
return
}
peer.device.log.Verbosef("%v - Stopping", peer)
peer.timersStop()
device: remove mutex from Peer send/receive The immediate motivation for this change is an observed deadlock. 1. A goroutine calls peer.Stop. That calls peer.queue.Lock(). 2. Another goroutine is in RoutineSequentialReceiver. It receives an elem from peer.queue.inbound. 3. The peer.Stop goroutine calls close(peer.queue.inbound), close(peer.queue.outbound), and peer.stopping.Wait(). It blocks waiting for RoutineSequentialReceiver and RoutineSequentialSender to exit. 4. The RoutineSequentialReceiver goroutine calls peer.SendStagedPackets(). SendStagedPackets attempts peer.queue.RLock(). That blocks forever because the peer.Stop goroutine holds a write lock on that mutex. A background motivation for this change is that it can be expensive to have a mutex in the hot code path of RoutineSequential*. The mutex was necessary to avoid attempting to send elems on a closed channel. This commit removes that danger by never closing the channel. Instead, we send a sentinel nil value on the channel to indicate to the receiver that it should exit. The only problem with this is that if the receiver exits, we could write an elem into the channel which would never get received. If it never gets received, it cannot get returned to the device pools. To work around this, we use a finalizer. When the channel can be GC'd, the finalizer drains any remaining elements from the channel and restores them to the device pool. After that change, peer.queue.RWMutex no longer makes sense where it is. It is only used to prevent concurrent calls to Start and Stop. Move it to a more sensible location and make it a plain sync.Mutex. Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
2021-02-08 22:02:52 +01:00
// Signal that RoutineSequentialSender and RoutineSequentialReceiver should exit.
peer.queue.inbound.c <- nil
peer.queue.outbound.c <- nil
peer.stopping.Wait()
peer.device.queue.encryption.wg.Done() // no more writes to encryption queue from us
2018-05-13 23:14:43 +02:00
peer.ZeroAndFlushAll()
}
2018-05-26 02:59:26 +02:00
func (peer *Peer) SetEndpointFromPacket(endpoint conn.Endpoint) {
peer.endpoint.Lock()
defer peer.endpoint.Unlock()
if peer.endpoint.disableRoaming {
return
}
peer.endpoint.clearSrcOnTx = false
peer.endpoint.val = endpoint
}
func (peer *Peer) markEndpointSrcForClearing() {
peer.endpoint.Lock()
defer peer.endpoint.Unlock()
if peer.endpoint.val == nil {
2018-05-26 02:59:26 +02:00
return
}
peer.endpoint.clearSrcOnTx = true
2018-05-26 02:59:26 +02:00
}