tun: windows: implement ring buffers

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2019-07-11 10:35:47 +02:00 committed by Jason A. Donenfeld
parent a961aacc9f
commit 2e24e7dcae

View File

@ -8,9 +8,9 @@ package tun
import ( import (
"errors" "errors"
"fmt" "fmt"
"io"
"os" "os"
"sync" "sync"
"sync/atomic"
"time" "time"
"unsafe" "unsafe"
@ -20,39 +20,54 @@ import (
) )
const ( const (
packetExchangeAlignment uint32 = 4 // Number of bytes packets are aligned to in exchange buffers packetAlignment uint32 = 4 // Number of bytes packets are aligned to in rings
packetSizeMax uint32 = 0xf000 - packetExchangeAlignment // Maximum packet size packetSizeMax uint32 = 0xffff // Maximum packet size
packetExchangeSize uint32 = 0x100000 // Exchange buffer size (defaults to 1MiB) packetCapacity uint32 = 0x100000 // Ring capacity (defaults to 1MiB, must be a power of 2)
retryRate = 4 // Number of retries per second to reopen device pipe packetTrailingSize uint32 = uint32(unsafe.Sizeof(packetHeader{})) + ((packetSizeMax + (packetAlignment - 1)) &^ (packetAlignment - 1)) - packetAlignment
retryTimeout = 30 // Number of seconds to tolerate adapter unavailable
ioctlRegisterRings uint32 = (0x22 /*FILE_DEVICE_UNKNOWN*/ << 16) | (0x800 << 2) | 0 /*METHOD_BUFFERED*/ | (0x3 /*FILE_READ_DATA | FILE_WRITE_DATA*/ << 14)
retryRate = 4 // Number of retries per second to reopen device pipe
retryTimeout = 30 // Number of seconds to tolerate adapter unavailable
) )
type exchgBufRead struct { type packetHeader struct {
data [packetExchangeSize]byte size uint32
offset uint32
avail uint32
} }
type exchgBufWrite struct { type packet struct {
data [packetExchangeSize]byte packetHeader
offset uint32 data [packetSizeMax]byte
}
type ring struct {
head uint32
tail uint32
alertable int32
data [packetCapacity + packetTrailingSize]byte
}
type ringDescriptor struct {
send, receive struct {
size uint32
ring *ring
tailMoved windows.Handle
}
} }
type NativeTun struct { type NativeTun struct {
wt *wintun.Wintun wt *wintun.Wintun
tunFileRead *os.File tunDev windows.Handle
tunFileWrite *os.File tunLock sync.Mutex
tunLock sync.Mutex close bool
close bool rings ringDescriptor
rdBuff *exchgBufRead events chan Event
wrBuff *exchgBufWrite errors chan error
events chan Event forcedMTU int
errors chan error
forcedMTU int
} }
func packetAlign(size uint32) uint32 { func packetAlign(size uint32) uint32 {
return (size + (packetExchangeAlignment - 1)) &^ (packetExchangeAlignment - 1) return (size + (packetAlignment - 1)) &^ (packetAlignment - 1)
} }
var shouldRetryOpen = windows.RtlGetVersion().MajorVersion < 10 var shouldRetryOpen = windows.RtlGetVersion().MajorVersion < 10
@ -102,14 +117,32 @@ func CreateTUNWithRequestedGUID(ifname string, requestedGUID *windows.GUID) (Dev
return nil, fmt.Errorf("Unable to set name of Wintun interface: %v", err) return nil, fmt.Errorf("Unable to set name of Wintun interface: %v", err)
} }
return &NativeTun{ tun := &NativeTun{
wt: wt, wt: wt,
rdBuff: &exchgBufRead{}, tunDev: windows.InvalidHandle,
wrBuff: &exchgBufWrite{},
events: make(chan Event, 10), events: make(chan Event, 10),
errors: make(chan error, 1), errors: make(chan error, 1),
forcedMTU: 1500, forcedMTU: 1500,
}, nil }
tun.rings.send.size = uint32(unsafe.Sizeof(ring{}))
tun.rings.send.ring = &ring{}
tun.rings.send.tailMoved, err = windows.CreateEvent(nil, 0, 0, nil)
if err != nil {
wt.DeleteInterface()
return nil, fmt.Errorf("Error creating event: %v", err)
}
tun.rings.receive.size = uint32(unsafe.Sizeof(ring{}))
tun.rings.receive.ring = &ring{}
tun.rings.receive.tailMoved, err = windows.CreateEvent(nil, 0, 0, nil)
if err != nil {
windows.CloseHandle(tun.rings.send.tailMoved)
wt.DeleteInterface()
return nil, fmt.Errorf("Error creating event: %v", err)
}
return tun, nil
} }
func (tun *NativeTun) openTUN() error { func (tun *NativeTun) openTUN() error {
@ -119,9 +152,12 @@ func (tun *NativeTun) openTUN() error {
} }
var err error var err error
name := tun.wt.DataFileName() name, err := windows.UTF16PtrFromString(tun.wt.DataFileName())
for tun.tunFileRead == nil { if err != nil {
tun.tunFileRead, err = os.OpenFile(name, os.O_RDONLY, 0) return err
}
for tun.tunDev == windows.InvalidHandle {
tun.tunDev, err = windows.CreateFile(name, windows.GENERIC_READ|windows.GENERIC_WRITE, 0, nil, windows.OPEN_EXISTING, 0, 0)
if err != nil { if err != nil {
if retries > 0 && !tun.close { if retries > 0 && !tun.close {
time.Sleep(time.Second / retryRate) time.Sleep(time.Second / retryRate)
@ -130,72 +166,51 @@ func (tun *NativeTun) openTUN() error {
} }
return err return err
} }
}
for tun.tunFileWrite == nil { atomic.StoreUint32(&tun.rings.send.ring.head, 0)
tun.tunFileWrite, err = os.OpenFile(name, os.O_WRONLY, 0) atomic.StoreUint32(&tun.rings.send.ring.tail, 0)
atomic.StoreInt32(&tun.rings.send.ring.alertable, 0)
atomic.StoreUint32(&tun.rings.receive.ring.head, 0)
atomic.StoreUint32(&tun.rings.receive.ring.tail, 0)
atomic.StoreInt32(&tun.rings.receive.ring.alertable, 0)
var bytesReturned uint32
err = windows.DeviceIoControl(tun.tunDev, ioctlRegisterRings, (*byte)(unsafe.Pointer(&tun.rings)), uint32(unsafe.Sizeof(tun.rings)), nil, 0, &bytesReturned, nil)
if err != nil { if err != nil {
if retries > 0 && !tun.close { return fmt.Errorf("Error registering rings: %v", err)
time.Sleep(time.Second / retryRate)
retries--
continue
}
return err
} }
} }
return nil return nil
} }
func (tun *NativeTun) closeTUN() (err error) { func (tun *NativeTun) closeTUN() (err error) {
for tun.tunFileRead != nil { for tun.tunDev != windows.InvalidHandle {
tun.tunLock.Lock() tun.tunLock.Lock()
if tun.tunFileRead == nil { if tun.tunDev == windows.InvalidHandle {
tun.tunLock.Unlock() tun.tunLock.Unlock()
break break
} }
t := tun.tunFileRead t := tun.tunDev
tun.tunFileRead = nil tun.tunDev = windows.InvalidHandle
windows.CancelIoEx(windows.Handle(t.Fd()), nil) err = windows.CloseHandle(t)
err = t.Close()
tun.tunLock.Unlock() tun.tunLock.Unlock()
break break
} }
for tun.tunFileWrite != nil {
tun.tunLock.Lock()
if tun.tunFileWrite == nil {
tun.tunLock.Unlock()
break
}
t := tun.tunFileWrite
tun.tunFileWrite = nil
windows.CancelIoEx(windows.Handle(t.Fd()), nil)
err2 := t.Close()
tun.tunLock.Unlock()
if err == nil {
err = err2
}
break
}
return return
} }
func (tun *NativeTun) getTUN() (read *os.File, write *os.File, err error) { func (tun *NativeTun) getTUN() (handle windows.Handle, err error) {
read, write = tun.tunFileRead, tun.tunFileWrite handle = tun.tunDev
if read == nil || write == nil { if handle == windows.InvalidHandle {
read, write = nil, nil
tun.tunLock.Lock() tun.tunLock.Lock()
if tun.tunFileRead != nil && tun.tunFileWrite != nil { if tun.tunDev != windows.InvalidHandle {
read, write = tun.tunFileRead, tun.tunFileWrite handle = tun.tunDev
tun.tunLock.Unlock()
return
}
err = tun.closeTUN()
if err != nil {
tun.tunLock.Unlock() tun.tunLock.Unlock()
return return
} }
err = tun.openTUN() err = tun.openTUN()
if err == nil { if err == nil {
read, write = tun.tunFileRead, tun.tunFileWrite handle = tun.tunDev
} }
tun.tunLock.Unlock() tun.tunLock.Unlock()
return return
@ -217,18 +232,30 @@ func (tun *NativeTun) Events() chan Event {
func (tun *NativeTun) Close() error { func (tun *NativeTun) Close() error {
tun.close = true tun.close = true
err1 := tun.closeTUN() windows.SetEvent(tun.rings.send.tailMoved) // wake the reader if it's sleeping
var err, err2 error
err = tun.closeTUN()
if tun.events != nil { if tun.events != nil {
close(tun.events) close(tun.events)
} }
_, err2 := tun.wt.DeleteInterface() err2 = windows.CloseHandle(tun.rings.receive.tailMoved)
if err1 == nil { if err == nil {
err1 = err2 err = err2
} }
return err1 err2 = windows.CloseHandle(tun.rings.send.tailMoved)
if err == nil {
err = err2
}
_, err2 = tun.wt.DeleteInterface()
if err == nil {
err = err2
}
return err
} }
func (tun *NativeTun) MTU() (int, error) { func (tun *NativeTun) MTU() (int, error) {
@ -240,6 +267,8 @@ func (tun *NativeTun) ForceMTU(mtu int) {
tun.forcedMTU = mtu tun.forcedMTU = mtu
} }
// Note: Read() and Write() assume the caller comes only from a single thread; there's no locking.
func (tun *NativeTun) Read(buff []byte, offset int) (int, error) { func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
select { select {
case err := <-tun.errors: case err := <-tun.errors:
@ -248,142 +277,111 @@ func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
} }
retries := maybeRetry(1000) retries := maybeRetry(1000)
for { for !tun.close {
if tun.rdBuff.offset+packetExchangeAlignment <= tun.rdBuff.avail { _, err := tun.getTUN()
// Get packet from the exchange buffer.
packet := tun.rdBuff.data[tun.rdBuff.offset:]
size := *(*uint32)(unsafe.Pointer(&packet[0]))
pSize := packetAlign(size) + packetExchangeAlignment
if packetSizeMax < size || tun.rdBuff.avail < tun.rdBuff.offset+pSize {
// Invalid packet size.
tun.rdBuff.avail = 0
continue
}
packet = packet[packetExchangeAlignment : packetExchangeAlignment+size]
// Copy data.
copy(buff[offset:], packet)
tun.rdBuff.offset += pSize
return int(size), nil
}
// Get TUN data pipe.
file, _, err := tun.getTUN()
if err != nil { if err != nil {
return 0, err return 0, err
} }
n, err := file.Read(tun.rdBuff.data[:]) buffHead := atomic.LoadUint32(&tun.rings.send.ring.head)
if err != nil { if buffHead >= packetCapacity {
tun.rdBuff.offset = 0 return 0, errors.New("send ring head out of bounds")
tun.rdBuff.avail = 0
pe, ok := err.(*os.PathError)
if tun.close {
return 0, os.ErrClosed
}
if retries > 0 && ok && (pe.Err == windows.ERROR_HANDLE_EOF || pe.Err == windows.ERROR_OPERATION_ABORTED) {
retries--
tun.closeTUN()
time.Sleep(time.Millisecond * 2)
continue
}
return 0, err
} }
if n == 0 {
if retries == 0 { buffTail := atomic.LoadUint32(&tun.rings.send.ring.tail)
return 0, io.ErrShortBuffer if buffHead == buffTail {
} windows.WaitForSingleObject(tun.rings.send.tailMoved, windows.INFINITE)
retries--
continue continue
} }
tun.rdBuff.offset = 0 if buffTail >= packetCapacity {
tun.rdBuff.avail = uint32(n) if retries > 0 {
} tun.closeTUN()
} time.Sleep(time.Millisecond * 2)
retries--
continue
}
return 0, errors.New("send ring tail out of bounds")
}
retries = maybeRetry(1000)
// Note: flush() and putTunPacket() assume the caller comes only from a single thread; there's no locking. buffContent := tun.rings.send.ring.wrap(buffTail - buffHead)
if buffContent < uint32(unsafe.Sizeof(packetHeader{})) {
return 0, errors.New("incomplete packet header in send ring")
}
packet := (*packet)(unsafe.Pointer(&tun.rings.send.ring.data[buffHead]))
if packet.size > packetSizeMax {
return 0, errors.New("packet too big in send ring")
}
alignedPacketSize := packetAlign(uint32(unsafe.Sizeof(packetHeader{})) + packet.size)
if alignedPacketSize > buffContent {
return 0, errors.New("incomplete packet in send ring")
}
copy(buff[offset:], packet.data[:packet.size])
buffHead = tun.rings.send.ring.wrap(buffHead + alignedPacketSize)
atomic.StoreUint32(&tun.rings.send.ring.head, buffHead)
return int(packet.size), nil
}
return 0, os.ErrClosed
}
func (tun *NativeTun) Flush() error { func (tun *NativeTun) Flush() error {
if tun.wrBuff.offset == 0 {
return nil
}
defer func() {
tun.wrBuff.offset = 0
}()
retries := maybeRetry(1000)
for {
// Get TUN data pipe.
_, file, err := tun.getTUN()
if err != nil {
return err
}
for {
_, err = file.Write(tun.wrBuff.data[:tun.wrBuff.offset])
if err != nil {
pe, ok := err.(*os.PathError)
if tun.close {
return os.ErrClosed
}
if retries > 0 && ok && pe.Err == windows.ERROR_OPERATION_ABORTED { // Adapter is paused or in low-power state.
retries--
time.Sleep(time.Millisecond * 2)
continue
}
if retries > 0 && ok && pe.Err == windows.ERROR_HANDLE_EOF { // Adapter is going down.
retries--
tun.closeTUN()
time.Sleep(time.Millisecond * 2)
break
}
return err
}
return nil
}
}
}
func (tun *NativeTun) putTunPacket(buff []byte) error {
size := uint32(len(buff))
if size == 0 {
return errors.New("Empty packet")
}
if size > packetSizeMax {
return errors.New("Packet too big")
}
pSize := packetAlign(size) + packetExchangeAlignment
if tun.wrBuff.offset+pSize >= packetExchangeSize {
// Exchange buffer is full -> flush first.
err := tun.Flush()
if err != nil {
return err
}
}
// Write packet to the exchange buffer.
packet := tun.wrBuff.data[tun.wrBuff.offset : tun.wrBuff.offset+pSize]
*(*uint32)(unsafe.Pointer(&packet[0])) = size
packet = packet[packetExchangeAlignment : packetExchangeAlignment+size]
copy(packet, buff)
tun.wrBuff.offset += pSize
return nil return nil
} }
func (tun *NativeTun) Write(buff []byte, offset int) (int, error) { func (tun *NativeTun) Write(buff []byte, offset int) (int, error) {
err := tun.putTunPacket(buff[offset:]) retries := maybeRetry(1000)
if err != nil { for {
return 0, err _, err := tun.getTUN()
if err != nil {
return 0, err
}
packetSize := uint32(len(buff) - offset)
alignedPacketSize := packetAlign(uint32(unsafe.Sizeof(packetHeader{})) + packetSize)
buffHead := atomic.LoadUint32(&tun.rings.receive.ring.head)
if buffHead >= packetCapacity {
if retries > 0 {
tun.closeTUN()
time.Sleep(time.Millisecond * 2)
retries--
continue
}
return 0, errors.New("receive ring head out of bounds")
}
retries = maybeRetry(1000)
buffTail := atomic.LoadUint32(&tun.rings.receive.ring.tail)
if buffTail >= packetCapacity {
return 0, errors.New("receive ring tail out of bounds")
}
buffSpace := tun.rings.receive.ring.wrap(buffHead - buffTail - packetAlignment)
if alignedPacketSize > buffSpace {
return 0, errors.New("receive ring full")
}
packet := (*packet)(unsafe.Pointer(&tun.rings.receive.ring.data[buffTail]))
packet.size = packetSize
copy(packet.data[:packetSize], buff[offset:])
atomic.StoreUint32(&tun.rings.receive.ring.tail, tun.rings.receive.ring.wrap(buffTail+alignedPacketSize))
if atomic.LoadInt32(&tun.rings.receive.ring.alertable) != 0 {
windows.SetEvent(tun.rings.receive.tailMoved)
}
return int(packetSize), nil
} }
return len(buff) - offset, nil
} }
//
// LUID returns Windows adapter instance ID. // LUID returns Windows adapter instance ID.
//
func (tun *NativeTun) LUID() uint64 { func (tun *NativeTun) LUID() uint64 {
return tun.wt.LUID() return tun.wt.LUID()
} }
// wrap returns value modulo ring capacity
func (rb *ring) wrap(value uint32) uint32 {
return value & (packetCapacity - 1)
}