2019-01-02 01:55:51 +01:00
|
|
|
/* SPDX-License-Identifier: MIT
|
2018-05-03 15:04:00 +02:00
|
|
|
*
|
2022-09-20 17:21:32 +02:00
|
|
|
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
2018-05-03 15:04:00 +02:00
|
|
|
*/
|
|
|
|
|
2018-05-23 02:10:54 +02:00
|
|
|
package tun
|
2017-06-04 21:48:15 +02:00
|
|
|
|
2017-07-20 15:06:24 +02:00
|
|
|
/* Implementation of the TUN device interface for linux
|
|
|
|
*/
|
|
|
|
|
2017-06-04 21:48:15 +02:00
|
|
|
import (
|
2021-05-20 18:26:01 +02:00
|
|
|
"errors"
|
2017-08-17 00:25:39 +02:00
|
|
|
"fmt"
|
2017-06-04 21:48:15 +02:00
|
|
|
"os"
|
2018-05-21 03:31:44 +02:00
|
|
|
"sync"
|
2019-03-07 01:51:41 +01:00
|
|
|
"syscall"
|
2017-11-29 18:46:31 +01:00
|
|
|
"time"
|
2017-06-04 21:48:15 +02:00
|
|
|
"unsafe"
|
2019-05-14 09:09:52 +02:00
|
|
|
|
|
|
|
"golang.org/x/sys/unix"
|
2024-01-07 20:03:11 +01:00
|
|
|
"gitea.hbanafa.com/hesham/wireguard-go/conn"
|
|
|
|
"gitea.hbanafa.com/hesham/wireguard-go/rwcancel"
|
2017-06-04 21:48:15 +02:00
|
|
|
)
|
|
|
|
|
2017-08-17 00:25:39 +02:00
|
|
|
const (
|
2018-02-13 16:43:07 +01:00
|
|
|
cloneDevicePath = "/dev/net/tun"
|
|
|
|
ifReqSize = unix.IFNAMSIZ + 64
|
2017-08-17 00:25:39 +02:00
|
|
|
)
|
2017-06-04 21:48:15 +02:00
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
type NativeTun struct {
|
2018-10-17 21:26:53 +02:00
|
|
|
tunFile *os.File
|
2019-06-10 23:33:40 +02:00
|
|
|
index int32 // if index
|
|
|
|
errors chan error // async error handling
|
|
|
|
events chan Event // device related events
|
2018-05-21 03:31:44 +02:00
|
|
|
netlinkSock int
|
|
|
|
netlinkCancel *rwcancel.RWCancel
|
|
|
|
hackListenerClosed sync.Mutex
|
2018-05-14 03:43:56 +02:00
|
|
|
statusListenersShutdown chan struct{}
|
2023-03-03 00:08:28 +01:00
|
|
|
batchSize int
|
|
|
|
vnetHdr bool
|
2023-11-01 03:53:35 +01:00
|
|
|
udpGSO bool
|
2020-02-28 18:10:16 +01:00
|
|
|
|
2021-02-18 23:53:22 +01:00
|
|
|
closeOnce sync.Once
|
|
|
|
|
2020-02-28 18:10:16 +01:00
|
|
|
nameOnce sync.Once // guards calling initNameCache, which sets following fields
|
|
|
|
nameCache string // name of interface
|
|
|
|
nameErr error
|
2023-03-03 00:08:28 +01:00
|
|
|
|
|
|
|
readOpMu sync.Mutex // readOpMu guards readBuff
|
|
|
|
readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
|
|
|
|
|
2023-11-01 03:53:35 +01:00
|
|
|
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcpGROTable
|
|
|
|
toWrite []int
|
|
|
|
tcpGROTable *tcpGROTable
|
|
|
|
udpGROTable *udpGROTable
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) File() *os.File {
|
2018-10-17 21:26:53 +02:00
|
|
|
return tun.tunFile
|
2017-11-14 18:26:28 +01:00
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) routineHackListener() {
|
2018-05-21 03:31:44 +02:00
|
|
|
defer tun.hackListenerClosed.Unlock()
|
2017-11-29 21:12:09 +01:00
|
|
|
/* This is needed for the detection to work across network namespaces
|
2017-11-29 18:46:31 +01:00
|
|
|
* If you are reading this and know a better method, please get in touch.
|
|
|
|
*/
|
2021-03-11 17:23:11 +01:00
|
|
|
last := 0
|
|
|
|
const (
|
|
|
|
up = 1
|
|
|
|
down = 2
|
|
|
|
)
|
2017-11-29 18:46:31 +01:00
|
|
|
for {
|
2019-03-07 01:51:41 +01:00
|
|
|
sysconn, err := tun.tunFile.SyscallConn()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err2 := sysconn.Control(func(fd uintptr) {
|
|
|
|
_, err = unix.Write(int(fd), nil)
|
|
|
|
})
|
|
|
|
if err2 != nil {
|
|
|
|
return
|
|
|
|
}
|
2017-11-29 18:46:31 +01:00
|
|
|
switch err {
|
|
|
|
case unix.EINVAL:
|
2021-03-11 17:23:11 +01:00
|
|
|
if last != up {
|
|
|
|
// If the tunnel is up, it reports that write() is
|
|
|
|
// allowed but we provided invalid data.
|
|
|
|
tun.events <- EventUp
|
|
|
|
last = up
|
|
|
|
}
|
2017-11-29 18:46:31 +01:00
|
|
|
case unix.EIO:
|
2021-03-11 17:23:11 +01:00
|
|
|
if last != down {
|
|
|
|
// If the tunnel is down, it reports that no I/O
|
|
|
|
// is possible, without checking our provided data.
|
|
|
|
tun.events <- EventDown
|
|
|
|
last = down
|
|
|
|
}
|
2017-11-29 18:46:31 +01:00
|
|
|
default:
|
2018-05-14 02:14:33 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
select {
|
2018-05-20 04:03:11 +02:00
|
|
|
case <-time.After(time.Second):
|
2019-11-06 09:28:02 +01:00
|
|
|
// nothing
|
2018-05-14 03:43:56 +02:00
|
|
|
case <-tun.statusListenersShutdown:
|
2018-05-14 02:14:33 +02:00
|
|
|
return
|
2017-11-29 18:46:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-14 02:14:33 +02:00
|
|
|
func createNetlinkSocket() (int, error) {
|
2022-07-02 06:28:52 +02:00
|
|
|
sock, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, unix.NETLINK_ROUTE)
|
2018-02-13 16:43:07 +01:00
|
|
|
if err != nil {
|
2018-05-14 02:14:33 +02:00
|
|
|
return -1, err
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
2018-04-27 02:23:48 +02:00
|
|
|
saddr := &unix.SockaddrNetlink{
|
|
|
|
Family: unix.AF_NETLINK,
|
2020-03-04 17:21:54 +01:00
|
|
|
Groups: unix.RTMGRP_LINK | unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR,
|
2018-04-27 02:23:48 +02:00
|
|
|
}
|
|
|
|
err = unix.Bind(sock, saddr)
|
|
|
|
if err != nil {
|
2018-05-14 02:14:33 +02:00
|
|
|
return -1, err
|
2018-04-27 02:23:48 +02:00
|
|
|
}
|
2018-05-14 02:14:33 +02:00
|
|
|
return sock, nil
|
|
|
|
}
|
2018-04-27 02:23:48 +02:00
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) routineNetlinkListener() {
|
2018-05-20 06:38:39 +02:00
|
|
|
defer func() {
|
|
|
|
unix.Close(tun.netlinkSock)
|
2018-05-21 03:31:44 +02:00
|
|
|
tun.hackListenerClosed.Lock()
|
2018-05-20 06:38:39 +02:00
|
|
|
close(tun.events)
|
2021-02-09 20:18:21 +01:00
|
|
|
tun.netlinkCancel.Close()
|
2018-05-20 06:38:39 +02:00
|
|
|
}()
|
2018-05-14 14:08:03 +02:00
|
|
|
|
2017-08-17 00:25:39 +02:00
|
|
|
for msg := make([]byte, 1<<16); ; {
|
2018-05-14 14:08:03 +02:00
|
|
|
var err error
|
|
|
|
var msgn int
|
|
|
|
for {
|
|
|
|
msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
|
2018-05-24 15:29:16 +02:00
|
|
|
if err == nil || !rwcancel.RetryAfterError(err) {
|
2018-05-14 14:08:03 +02:00
|
|
|
break
|
|
|
|
}
|
|
|
|
if !tun.netlinkCancel.ReadyRead() {
|
2021-01-27 15:56:49 +01:00
|
|
|
tun.errors <- fmt.Errorf("netlink socket closed: %w", err)
|
2018-05-14 14:08:03 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2017-08-17 00:25:39 +02:00
|
|
|
if err != nil {
|
2021-01-27 15:56:49 +01:00
|
|
|
tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err)
|
2017-08-17 00:25:39 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-05-14 03:43:56 +02:00
|
|
|
select {
|
|
|
|
case <-tun.statusListenersShutdown:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2019-11-06 09:28:02 +01:00
|
|
|
wasEverUp := false
|
2017-08-17 00:25:39 +02:00
|
|
|
for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
|
|
|
|
|
|
|
|
hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
|
|
|
|
|
|
|
|
if int(hdr.Len) > len(remain) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
switch hdr.Type {
|
|
|
|
case unix.NLMSG_DONE:
|
|
|
|
remain = []byte{}
|
|
|
|
|
|
|
|
case unix.RTM_NEWLINK:
|
|
|
|
info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
|
2017-08-22 14:57:32 +02:00
|
|
|
remain = remain[hdr.Len:]
|
2017-08-17 00:25:39 +02:00
|
|
|
|
2017-08-17 12:58:18 +02:00
|
|
|
if info.Index != tun.index {
|
|
|
|
// not our interface
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2017-08-17 00:25:39 +02:00
|
|
|
if info.Flags&unix.IFF_RUNNING != 0 {
|
2019-06-10 23:33:40 +02:00
|
|
|
tun.events <- EventUp
|
2019-11-06 09:28:02 +01:00
|
|
|
wasEverUp = true
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if info.Flags&unix.IFF_RUNNING == 0 {
|
2019-11-06 09:28:02 +01:00
|
|
|
// Don't emit EventDown before we've ever emitted EventUp.
|
|
|
|
// This avoids a startup race with HackListener, which
|
|
|
|
// might detect Up before we have finished reporting Down.
|
|
|
|
if wasEverUp {
|
|
|
|
tun.events <- EventDown
|
|
|
|
}
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
|
|
|
|
2019-06-10 23:33:40 +02:00
|
|
|
tun.events <- EventMTUUpdate
|
2017-08-17 00:25:39 +02:00
|
|
|
|
|
|
|
default:
|
|
|
|
remain = remain[hdr.Len:]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
|
|
|
|
2019-02-27 01:06:43 +01:00
|
|
|
func getIFIndex(name string) (int32, error) {
|
|
|
|
fd, err := unix.Socket(
|
2017-08-17 00:25:39 +02:00
|
|
|
unix.AF_INET,
|
2022-07-02 06:28:52 +02:00
|
|
|
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
2017-08-17 00:25:39 +02:00
|
|
|
0,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
defer unix.Close(fd)
|
|
|
|
|
2018-02-13 16:43:07 +01:00
|
|
|
var ifr [ifReqSize]byte
|
2017-08-17 00:25:39 +02:00
|
|
|
copy(ifr[:], name)
|
|
|
|
_, _, errno := unix.Syscall(
|
|
|
|
unix.SYS_IOCTL,
|
|
|
|
uintptr(fd),
|
|
|
|
uintptr(unix.SIOCGIFINDEX),
|
|
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
|
|
)
|
|
|
|
|
|
|
|
if errno != 0 {
|
|
|
|
return 0, errno
|
|
|
|
}
|
|
|
|
|
2018-05-23 02:10:54 +02:00
|
|
|
return *(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])), nil
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) setMTU(n int) error {
|
2020-02-28 18:10:16 +01:00
|
|
|
name, err := tun.Name()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-07-15 16:27:59 +02:00
|
|
|
// open datagram socket
|
2017-07-20 15:06:24 +02:00
|
|
|
fd, err := unix.Socket(
|
|
|
|
unix.AF_INET,
|
2022-07-02 06:28:52 +02:00
|
|
|
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
2017-07-15 16:27:59 +02:00
|
|
|
0,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-07-20 15:06:24 +02:00
|
|
|
defer unix.Close(fd)
|
2017-07-18 14:15:29 +02:00
|
|
|
|
2017-07-15 16:27:59 +02:00
|
|
|
// do ioctl call
|
2018-02-13 16:43:07 +01:00
|
|
|
var ifr [ifReqSize]byte
|
2020-02-28 18:10:16 +01:00
|
|
|
copy(ifr[:], name)
|
2018-05-23 02:10:54 +02:00
|
|
|
*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = uint32(n)
|
2017-07-20 15:06:24 +02:00
|
|
|
_, _, errno := unix.Syscall(
|
|
|
|
unix.SYS_IOCTL,
|
2017-07-15 16:27:59 +02:00
|
|
|
uintptr(fd),
|
2017-07-20 15:06:24 +02:00
|
|
|
uintptr(unix.SIOCSIFMTU),
|
2017-07-15 16:27:59 +02:00
|
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
|
|
)
|
|
|
|
|
|
|
|
if errno != 0 {
|
2021-01-27 15:56:49 +01:00
|
|
|
return fmt.Errorf("failed to set MTU of TUN device: %w", errno)
|
2017-07-15 16:27:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) MTU() (int, error) {
|
2020-02-28 18:10:16 +01:00
|
|
|
name, err := tun.Name()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2017-07-11 22:48:58 +02:00
|
|
|
// open datagram socket
|
2017-07-20 15:06:24 +02:00
|
|
|
fd, err := unix.Socket(
|
|
|
|
unix.AF_INET,
|
2022-07-02 06:28:52 +02:00
|
|
|
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
2017-07-11 22:48:58 +02:00
|
|
|
0,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2017-07-20 15:06:24 +02:00
|
|
|
defer unix.Close(fd)
|
2017-07-18 14:15:29 +02:00
|
|
|
|
2017-07-11 22:48:58 +02:00
|
|
|
// do ioctl call
|
|
|
|
|
2018-02-13 16:43:07 +01:00
|
|
|
var ifr [ifReqSize]byte
|
2020-02-28 18:10:16 +01:00
|
|
|
copy(ifr[:], name)
|
2017-07-20 15:06:24 +02:00
|
|
|
_, _, errno := unix.Syscall(
|
|
|
|
unix.SYS_IOCTL,
|
2017-07-11 22:48:58 +02:00
|
|
|
uintptr(fd),
|
2017-07-20 15:06:24 +02:00
|
|
|
uintptr(unix.SIOCGIFMTU),
|
2017-07-11 22:48:58 +02:00
|
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
|
|
)
|
|
|
|
if errno != 0 {
|
2021-01-27 15:56:49 +01:00
|
|
|
return 0, fmt.Errorf("failed to get MTU of TUN device: %w", errno)
|
2017-07-11 22:48:58 +02:00
|
|
|
}
|
|
|
|
|
2018-05-23 02:10:54 +02:00
|
|
|
return int(*(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ]))), nil
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) Name() (string, error) {
|
2020-02-28 18:10:16 +01:00
|
|
|
tun.nameOnce.Do(tun.initNameCache)
|
|
|
|
return tun.nameCache, tun.nameErr
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tun *NativeTun) initNameCache() {
|
|
|
|
tun.nameCache, tun.nameErr = tun.nameSlow()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tun *NativeTun) nameSlow() (string, error) {
|
2019-03-07 01:51:41 +01:00
|
|
|
sysconn, err := tun.tunFile.SyscallConn()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2018-04-18 16:39:14 +02:00
|
|
|
var ifr [ifReqSize]byte
|
2019-03-07 01:51:41 +01:00
|
|
|
var errno syscall.Errno
|
|
|
|
err = sysconn.Control(func(fd uintptr) {
|
|
|
|
_, _, errno = unix.Syscall(
|
|
|
|
unix.SYS_IOCTL,
|
|
|
|
fd,
|
|
|
|
uintptr(unix.TUNGETIFF),
|
|
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
if err != nil {
|
2021-01-27 15:56:49 +01:00
|
|
|
return "", fmt.Errorf("failed to get name of TUN device: %w", err)
|
2019-03-07 01:51:41 +01:00
|
|
|
}
|
2018-04-18 16:39:14 +02:00
|
|
|
if errno != 0 {
|
2021-01-27 15:56:49 +01:00
|
|
|
return "", fmt.Errorf("failed to get name of TUN device: %w", errno)
|
2018-04-18 16:39:14 +02:00
|
|
|
}
|
2022-06-01 11:33:54 +02:00
|
|
|
return unix.ByteSliceToString(ifr[:]), nil
|
2018-04-18 16:39:14 +02:00
|
|
|
}
|
|
|
|
|
2023-03-13 17:55:05 +01:00
|
|
|
func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
2023-03-03 00:08:28 +01:00
|
|
|
tun.writeOpMu.Lock()
|
|
|
|
defer func() {
|
2023-11-01 03:53:35 +01:00
|
|
|
tun.tcpGROTable.reset()
|
|
|
|
tun.udpGROTable.reset()
|
2023-03-03 00:08:28 +01:00
|
|
|
tun.writeOpMu.Unlock()
|
|
|
|
}()
|
|
|
|
var (
|
2023-03-16 21:27:51 +01:00
|
|
|
errs error
|
2023-03-03 00:08:28 +01:00
|
|
|
total int
|
|
|
|
)
|
|
|
|
tun.toWrite = tun.toWrite[:0]
|
|
|
|
if tun.vnetHdr {
|
2023-11-01 03:53:35 +01:00
|
|
|
err := handleGRO(bufs, offset, tun.tcpGROTable, tun.udpGROTable, tun.udpGSO, &tun.toWrite)
|
2023-03-03 00:08:28 +01:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
offset -= virtioNetHdrLen
|
2018-02-28 12:40:56 +01:00
|
|
|
} else {
|
2023-03-13 17:55:05 +01:00
|
|
|
for i := range bufs {
|
2023-03-03 00:08:28 +01:00
|
|
|
tun.toWrite = append(tun.toWrite, i)
|
|
|
|
}
|
|
|
|
}
|
2023-03-13 17:55:05 +01:00
|
|
|
for _, bufsI := range tun.toWrite {
|
|
|
|
n, err := tun.tunFile.Write(bufs[bufsI][offset:])
|
2023-03-03 00:08:28 +01:00
|
|
|
if errors.Is(err, syscall.EBADFD) {
|
|
|
|
return total, os.ErrClosed
|
|
|
|
}
|
|
|
|
if err != nil {
|
2023-03-16 21:27:51 +01:00
|
|
|
errs = errors.Join(errs, err)
|
2018-02-28 12:40:56 +01:00
|
|
|
} else {
|
2023-03-03 00:08:28 +01:00
|
|
|
total += n
|
|
|
|
}
|
|
|
|
}
|
2023-03-16 21:27:51 +01:00
|
|
|
return total, errs
|
2023-03-03 00:08:28 +01:00
|
|
|
}
|
|
|
|
|
2023-03-13 17:55:05 +01:00
|
|
|
// handleVirtioRead splits in into bufs, leaving offset bytes at the front of
|
|
|
|
// each buffer. It mutates sizes to reflect the size of each element of bufs,
|
2023-03-03 00:08:28 +01:00
|
|
|
// and returns the number of packets read.
|
2023-03-13 17:55:05 +01:00
|
|
|
func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, error) {
|
2023-03-03 00:08:28 +01:00
|
|
|
var hdr virtioNetHdr
|
|
|
|
err := hdr.decode(in)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
in = in[virtioNetHdrLen:]
|
|
|
|
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_NONE {
|
|
|
|
if hdr.flags&unix.VIRTIO_NET_HDR_F_NEEDS_CSUM != 0 {
|
|
|
|
// This means CHECKSUM_PARTIAL in skb context. We are responsible
|
|
|
|
// for computing the checksum starting at hdr.csumStart and placing
|
|
|
|
// at hdr.csumOffset.
|
|
|
|
err = gsoNoneChecksum(in, hdr.csumStart, hdr.csumOffset)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
2023-03-13 17:55:05 +01:00
|
|
|
if len(in) > len(bufs[0][offset:]) {
|
|
|
|
return 0, fmt.Errorf("read len %d overflows bufs element len %d", len(in), len(bufs[0][offset:]))
|
2018-02-28 12:40:56 +01:00
|
|
|
}
|
2023-03-13 17:55:05 +01:00
|
|
|
n := copy(bufs[0][offset:], in)
|
2023-03-03 00:08:28 +01:00
|
|
|
sizes[0] = n
|
|
|
|
return 1, nil
|
|
|
|
}
|
2023-11-01 03:53:35 +01:00
|
|
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
2023-03-03 00:08:28 +01:00
|
|
|
return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
|
|
|
|
}
|
|
|
|
|
|
|
|
ipVersion := in[0] >> 4
|
|
|
|
switch ipVersion {
|
|
|
|
case 4:
|
2023-11-01 03:53:35 +01:00
|
|
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
2023-03-03 00:08:28 +01:00
|
|
|
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
|
|
|
}
|
|
|
|
case 6:
|
2023-11-01 03:53:35 +01:00
|
|
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
2023-03-03 00:08:28 +01:00
|
|
|
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't trust hdr.hdrLen from the kernel as it can be equal to the length
|
|
|
|
// of the entire first packet when the kernel is handling it as part of a
|
2023-11-01 03:53:35 +01:00
|
|
|
// FORWARD path. Instead, parse the transport header length and add it onto
|
2023-03-03 00:08:28 +01:00
|
|
|
// csumStart, which is synonymous for IP header length.
|
2023-11-01 03:53:35 +01:00
|
|
|
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
|
|
|
hdr.hdrLen = hdr.csumStart + 8
|
|
|
|
} else {
|
|
|
|
if len(in) <= int(hdr.csumStart+12) {
|
|
|
|
return 0, errors.New("packet is too short")
|
|
|
|
}
|
|
|
|
|
|
|
|
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
|
|
|
if tcpHLen < 20 || tcpHLen > 60 {
|
|
|
|
// A TCP header must be between 20 and 60 bytes in length.
|
|
|
|
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
|
|
|
}
|
|
|
|
hdr.hdrLen = hdr.csumStart + tcpHLen
|
2023-03-03 00:08:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(in) < int(hdr.hdrLen) {
|
|
|
|
return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
|
2017-12-04 21:39:06 +01:00
|
|
|
}
|
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
if hdr.hdrLen < hdr.csumStart {
|
|
|
|
return 0, fmt.Errorf("virtioNetHdr.hdrLen (%d) < virtioNetHdr.csumStart (%d)", hdr.hdrLen, hdr.csumStart)
|
2021-05-20 18:26:01 +02:00
|
|
|
}
|
2023-03-03 00:08:28 +01:00
|
|
|
cSumAt := int(hdr.csumStart + hdr.csumOffset)
|
|
|
|
if cSumAt+1 >= len(in) {
|
|
|
|
return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
|
|
|
|
}
|
|
|
|
|
2023-11-01 03:53:35 +01:00
|
|
|
return gsoSplit(in, hdr, bufs, sizes, offset, ipVersion == 6)
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
|
|
|
|
2023-03-13 17:55:05 +01:00
|
|
|
func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
|
2023-03-03 00:08:28 +01:00
|
|
|
tun.readOpMu.Lock()
|
|
|
|
defer tun.readOpMu.Unlock()
|
2017-08-17 00:25:39 +02:00
|
|
|
select {
|
2023-03-03 00:08:28 +01:00
|
|
|
case err := <-tun.errors:
|
|
|
|
return 0, err
|
2017-08-17 00:25:39 +02:00
|
|
|
default:
|
2023-03-13 17:55:05 +01:00
|
|
|
readInto := bufs[0][offset:]
|
2023-03-03 00:08:28 +01:00
|
|
|
if tun.vnetHdr {
|
|
|
|
readInto = tun.readBuff[:]
|
|
|
|
}
|
|
|
|
n, err := tun.tunFile.Read(readInto)
|
|
|
|
if errors.Is(err, syscall.EBADFD) {
|
|
|
|
err = os.ErrClosed
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if tun.vnetHdr {
|
2023-03-13 17:55:05 +01:00
|
|
|
return handleVirtioRead(readInto[:n], bufs, sizes, offset)
|
2018-02-28 12:40:56 +01:00
|
|
|
} else {
|
2023-03-03 00:08:28 +01:00
|
|
|
sizes[0] = n
|
|
|
|
return 1, nil
|
2017-12-04 21:39:06 +01:00
|
|
|
}
|
2017-08-17 00:25:39 +02:00
|
|
|
}
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
|
|
|
|
2023-02-08 19:42:07 +01:00
|
|
|
func (tun *NativeTun) Events() <-chan Event {
|
2017-08-07 15:25:04 +02:00
|
|
|
return tun.events
|
|
|
|
}
|
|
|
|
|
2019-03-01 00:05:57 +01:00
|
|
|
func (tun *NativeTun) Close() error {
|
2021-02-18 23:53:22 +01:00
|
|
|
var err1, err2 error
|
|
|
|
tun.closeOnce.Do(func() {
|
|
|
|
if tun.statusListenersShutdown != nil {
|
|
|
|
close(tun.statusListenersShutdown)
|
|
|
|
if tun.netlinkCancel != nil {
|
|
|
|
err1 = tun.netlinkCancel.Cancel()
|
|
|
|
}
|
|
|
|
} else if tun.events != nil {
|
|
|
|
close(tun.events)
|
2018-05-21 14:16:46 +02:00
|
|
|
}
|
2021-02-18 23:53:22 +01:00
|
|
|
err2 = tun.tunFile.Close()
|
|
|
|
})
|
2018-05-14 02:14:33 +02:00
|
|
|
if err1 != nil {
|
|
|
|
return err1
|
|
|
|
}
|
2019-03-07 01:51:41 +01:00
|
|
|
return err2
|
2017-08-07 15:25:04 +02:00
|
|
|
}
|
|
|
|
|
2023-03-02 23:48:02 +01:00
|
|
|
func (tun *NativeTun) BatchSize() int {
|
2023-03-03 00:08:28 +01:00
|
|
|
return tun.batchSize
|
2023-03-02 23:48:02 +01:00
|
|
|
}
|
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
const (
|
|
|
|
// TODO: support TSO with ECN bits
|
2023-11-01 03:53:35 +01:00
|
|
|
tunTCPOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
|
|
|
tunUDPOffloads = unix.TUN_F_USO4 | unix.TUN_F_USO6
|
2023-03-03 00:08:28 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func (tun *NativeTun) initFromFlags(name string) error {
|
|
|
|
sc, err := tun.tunFile.SyscallConn()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if e := sc.Control(func(fd uintptr) {
|
|
|
|
var (
|
|
|
|
ifr *unix.Ifreq
|
|
|
|
)
|
|
|
|
ifr, err = unix.NewIfreq(name)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = unix.IoctlIfreq(int(fd), unix.TUNGETIFF, ifr)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
got := ifr.Uint16()
|
|
|
|
if got&unix.IFF_VNET_HDR != 0 {
|
2023-11-01 03:53:35 +01:00
|
|
|
// tunTCPOffloads were added in Linux v2.6. We require their support
|
|
|
|
// if IFF_VNET_HDR is set.
|
|
|
|
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads)
|
2023-03-03 00:08:28 +01:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
tun.vnetHdr = true
|
2023-03-04 15:25:46 +01:00
|
|
|
tun.batchSize = conn.IdealBatchSize
|
2023-11-01 03:53:35 +01:00
|
|
|
// tunUDPOffloads were added in Linux v6.2. We do not return an
|
|
|
|
// error if they are unsupported at runtime.
|
|
|
|
tun.udpGSO = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads|tunUDPOffloads) == nil
|
2023-03-03 00:08:28 +01:00
|
|
|
} else {
|
|
|
|
tun.batchSize = 1
|
|
|
|
}
|
|
|
|
}); e != nil {
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreateTUN creates a Device with the provided name and MTU.
|
2019-06-10 23:33:40 +02:00
|
|
|
func CreateTUN(name string, mtu int) (Device, error) {
|
2022-07-02 06:28:52 +02:00
|
|
|
nfd, err := unix.Open(cloneDevicePath, unix.O_RDWR|unix.O_CLOEXEC, 0)
|
2019-02-27 04:10:01 +01:00
|
|
|
if err != nil {
|
2020-03-18 21:23:00 +01:00
|
|
|
if os.IsNotExist(err) {
|
|
|
|
return nil, fmt.Errorf("CreateTUN(%q) failed; %s does not exist", name, cloneDevicePath)
|
|
|
|
}
|
2019-02-27 04:10:01 +01:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
ifr, err := unix.NewIfreq(name)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
2023-03-03 00:08:28 +01:00
|
|
|
// IFF_VNET_HDR enables the "tun status hack" via routineHackListener()
|
|
|
|
// where a null write will return EINVAL indicating the TUN is up.
|
|
|
|
ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_VNET_HDR)
|
|
|
|
err = unix.IoctlIfreq(nfd, unix.TUNSETIFF, ifr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
2019-03-07 01:51:41 +01:00
|
|
|
|
2021-09-23 12:05:13 +02:00
|
|
|
err = unix.SetNonblock(nfd, true)
|
2019-03-07 01:51:41 +01:00
|
|
|
if err != nil {
|
2021-09-23 12:05:13 +02:00
|
|
|
unix.Close(nfd)
|
2019-03-07 01:51:41 +01:00
|
|
|
return nil, err
|
|
|
|
}
|
2017-06-04 21:48:15 +02:00
|
|
|
|
2021-09-23 12:05:13 +02:00
|
|
|
// Note that the above -- open,ioctl,nonblock -- must happen prior to handing it to netpoll as below this line.
|
|
|
|
|
|
|
|
fd := os.NewFile(uintptr(nfd), cloneDevicePath)
|
2019-02-27 04:10:01 +01:00
|
|
|
return CreateTUNFromFile(fd, mtu)
|
2018-05-05 03:36:09 +02:00
|
|
|
}
|
2017-07-11 22:48:58 +02:00
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
// CreateTUNFromFile creates a Device from an os.File with the provided MTU.
|
2019-06-10 23:33:40 +02:00
|
|
|
func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
|
2019-03-01 00:05:57 +01:00
|
|
|
tun := &NativeTun{
|
2018-10-17 21:26:53 +02:00
|
|
|
tunFile: file,
|
2019-06-10 23:33:40 +02:00
|
|
|
events: make(chan Event, 5),
|
2018-05-14 03:43:56 +02:00
|
|
|
errors: make(chan error, 5),
|
2018-05-14 04:19:25 +02:00
|
|
|
statusListenersShutdown: make(chan struct{}),
|
2023-11-01 03:53:35 +01:00
|
|
|
tcpGROTable: newTCPGROTable(),
|
|
|
|
udpGROTable: newUDPGROTable(),
|
2023-03-04 15:25:46 +01:00
|
|
|
toWrite: make([]int, 0, conn.IdealBatchSize),
|
2017-07-15 16:27:59 +02:00
|
|
|
}
|
2018-05-05 03:36:09 +02:00
|
|
|
|
2020-02-28 18:10:16 +01:00
|
|
|
name, err := tun.Name()
|
2018-05-05 03:36:09 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-07-15 16:27:59 +02:00
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
err = tun.initFromFlags(name)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-08-17 00:25:39 +02:00
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
// start event listener
|
2020-02-28 18:10:16 +01:00
|
|
|
tun.index, err = getIFIndex(name)
|
2017-08-17 00:25:39 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-05-14 12:27:29 +02:00
|
|
|
tun.netlinkSock, err = createNetlinkSocket()
|
2018-05-14 02:14:33 +02:00
|
|
|
if err != nil {
|
2018-05-14 14:08:03 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
tun.netlinkCancel, err = rwcancel.NewRWCancel(tun.netlinkSock)
|
|
|
|
if err != nil {
|
2019-02-27 02:20:17 +01:00
|
|
|
unix.Close(tun.netlinkSock)
|
2018-05-14 02:14:33 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-05-21 03:31:44 +02:00
|
|
|
tun.hackListenerClosed.Lock()
|
2018-05-23 02:10:54 +02:00
|
|
|
go tun.routineNetlinkListener()
|
|
|
|
go tun.routineHackListener() // cross namespace
|
2018-05-14 12:27:29 +02:00
|
|
|
|
2018-05-23 02:10:54 +02:00
|
|
|
err = tun.setMTU(mtu)
|
2018-05-14 02:14:33 +02:00
|
|
|
if err != nil {
|
2019-02-27 02:20:17 +01:00
|
|
|
unix.Close(tun.netlinkSock)
|
2018-05-14 02:14:33 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-05-14 12:27:29 +02:00
|
|
|
return tun, nil
|
2017-06-04 21:48:15 +02:00
|
|
|
}
|
2019-03-03 05:20:13 +01:00
|
|
|
|
2023-03-03 00:08:28 +01:00
|
|
|
// CreateUnmonitoredTUNFromFD creates a Device from the provided file
|
|
|
|
// descriptor.
|
2019-06-10 23:33:40 +02:00
|
|
|
func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) {
|
2019-03-07 01:51:41 +01:00
|
|
|
err := unix.SetNonblock(fd, true)
|
|
|
|
if err != nil {
|
|
|
|
return nil, "", err
|
|
|
|
}
|
|
|
|
file := os.NewFile(uintptr(fd), "/dev/tun")
|
2019-03-03 05:20:13 +01:00
|
|
|
tun := &NativeTun{
|
2023-11-01 03:53:35 +01:00
|
|
|
tunFile: file,
|
|
|
|
events: make(chan Event, 5),
|
|
|
|
errors: make(chan error, 5),
|
|
|
|
tcpGROTable: newTCPGROTable(),
|
|
|
|
udpGROTable: newUDPGROTable(),
|
|
|
|
toWrite: make([]int, 0, conn.IdealBatchSize),
|
2019-03-03 05:20:13 +01:00
|
|
|
}
|
|
|
|
name, err := tun.Name()
|
|
|
|
if err != nil {
|
|
|
|
return nil, "", err
|
|
|
|
}
|
2023-03-03 00:08:28 +01:00
|
|
|
err = tun.initFromFlags(name)
|
|
|
|
if err != nil {
|
|
|
|
return nil, "", err
|
|
|
|
}
|
|
|
|
return tun, name, err
|
2019-03-03 05:20:13 +01:00
|
|
|
}
|