tun: implement UDP GSO/GRO for Linux
Implement UDP GSO and GRO for the Linux tun.Device, which is made
possible by virtio extensions in the kernel's TUN driver starting in
v6.2.
secnetperf, a QUIC benchmark utility from microsoft/msquic@8e1eb1a, is
used to demonstrate the effect of this commit between two Linux
computers with i5-12400 CPUs. There is roughly ~13us of round trip
latency between them. secnetperf was invoked with the following command
line options:
-stats:1 -exec:maxtput -test:tput -download:10000 -timed:1 -encrypt:0
The first result is from commit 2e0774f
without UDP GSO/GRO on the TUN.
[conn][0x55739a144980] STATS: EcnCapable=0 RTT=3973 us
SendTotalPackets=55859 SendSuspectedLostPackets=61
SendSpuriousLostPackets=59 SendCongestionCount=27
SendEcnCongestionCount=0 RecvTotalPackets=2779122
RecvReorderedPackets=0 RecvDroppedPackets=0
RecvDuplicatePackets=0 RecvDecryptionFailures=0
Result: 3654977571 bytes @ 2922821 kbps (10003.972 ms).
The second result is with UDP GSO/GRO on the TUN.
[conn][0x56493dfd09a0] STATS: EcnCapable=0 RTT=1216 us
SendTotalPackets=165033 SendSuspectedLostPackets=64
SendSpuriousLostPackets=61 SendCongestionCount=53
SendEcnCongestionCount=0 RecvTotalPackets=11845268
RecvReorderedPackets=25267 RecvDroppedPackets=0
RecvDuplicatePackets=0 RecvDecryptionFailures=0
Result: 15574671184 bytes @ 12458214 kbps (10001.222 ms).
Signed-off-by: Jordan Whited <jordan@tailscale.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:
parent
1cf89f5339
commit
d0bc03c707
@ -57,22 +57,23 @@ const (
|
|||||||
virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{}))
|
virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{}))
|
||||||
)
|
)
|
||||||
|
|
||||||
// flowKey represents the key for a flow.
|
// tcpFlowKey represents the key for a TCP flow.
|
||||||
type flowKey struct {
|
type tcpFlowKey struct {
|
||||||
srcAddr, dstAddr [16]byte
|
srcAddr, dstAddr [16]byte
|
||||||
srcPort, dstPort uint16
|
srcPort, dstPort uint16
|
||||||
rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows.
|
rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows.
|
||||||
|
isV6 bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// tcpGROTable holds flow and coalescing information for the purposes of GRO.
|
// tcpGROTable holds flow and coalescing information for the purposes of TCP GRO.
|
||||||
type tcpGROTable struct {
|
type tcpGROTable struct {
|
||||||
itemsByFlow map[flowKey][]tcpGROItem
|
itemsByFlow map[tcpFlowKey][]tcpGROItem
|
||||||
itemsPool [][]tcpGROItem
|
itemsPool [][]tcpGROItem
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTCPGROTable() *tcpGROTable {
|
func newTCPGROTable() *tcpGROTable {
|
||||||
t := &tcpGROTable{
|
t := &tcpGROTable{
|
||||||
itemsByFlow: make(map[flowKey][]tcpGROItem, conn.IdealBatchSize),
|
itemsByFlow: make(map[tcpFlowKey][]tcpGROItem, conn.IdealBatchSize),
|
||||||
itemsPool: make([][]tcpGROItem, conn.IdealBatchSize),
|
itemsPool: make([][]tcpGROItem, conn.IdealBatchSize),
|
||||||
}
|
}
|
||||||
for i := range t.itemsPool {
|
for i := range t.itemsPool {
|
||||||
@ -81,14 +82,15 @@ func newTCPGROTable() *tcpGROTable {
|
|||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey {
|
func newTCPFlowKey(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset int) tcpFlowKey {
|
||||||
key := flowKey{}
|
key := tcpFlowKey{}
|
||||||
addrSize := dstAddr - srcAddr
|
addrSize := dstAddrOffset - srcAddrOffset
|
||||||
copy(key.srcAddr[:], pkt[srcAddr:dstAddr])
|
copy(key.srcAddr[:], pkt[srcAddrOffset:dstAddrOffset])
|
||||||
copy(key.dstAddr[:], pkt[dstAddr:dstAddr+addrSize])
|
copy(key.dstAddr[:], pkt[dstAddrOffset:dstAddrOffset+addrSize])
|
||||||
key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:])
|
key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:])
|
||||||
key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:])
|
key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:])
|
||||||
key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:])
|
key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:])
|
||||||
|
key.isV6 = addrSize == 16
|
||||||
return key
|
return key
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +98,7 @@ func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey {
|
|||||||
// returning the packets found for the flow, or inserting a new one if none
|
// returning the packets found for the flow, or inserting a new one if none
|
||||||
// is found.
|
// is found.
|
||||||
func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) {
|
func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) {
|
||||||
key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
key := newTCPFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
||||||
items, ok := t.itemsByFlow[key]
|
items, ok := t.itemsByFlow[key]
|
||||||
if ok {
|
if ok {
|
||||||
return items, ok
|
return items, ok
|
||||||
@ -108,7 +110,7 @@ func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, t
|
|||||||
|
|
||||||
// insert an item in the table for the provided packet and packet metadata.
|
// insert an item in the table for the provided packet and packet metadata.
|
||||||
func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) {
|
func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) {
|
||||||
key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
key := newTCPFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
||||||
item := tcpGROItem{
|
item := tcpGROItem{
|
||||||
key: key,
|
key: key,
|
||||||
bufsIndex: uint16(bufsIndex),
|
bufsIndex: uint16(bufsIndex),
|
||||||
@ -131,7 +133,7 @@ func (t *tcpGROTable) updateAt(item tcpGROItem, i int) {
|
|||||||
items[i] = item
|
items[i] = item
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tcpGROTable) deleteAt(key flowKey, i int) {
|
func (t *tcpGROTable) deleteAt(key tcpFlowKey, i int) {
|
||||||
items, _ := t.itemsByFlow[key]
|
items, _ := t.itemsByFlow[key]
|
||||||
items = append(items[:i], items[i+1:]...)
|
items = append(items[:i], items[i+1:]...)
|
||||||
t.itemsByFlow[key] = items
|
t.itemsByFlow[key] = items
|
||||||
@ -140,7 +142,7 @@ func (t *tcpGROTable) deleteAt(key flowKey, i int) {
|
|||||||
// tcpGROItem represents bookkeeping data for a TCP packet during the lifetime
|
// tcpGROItem represents bookkeeping data for a TCP packet during the lifetime
|
||||||
// of a GRO evaluation across a vector of packets.
|
// of a GRO evaluation across a vector of packets.
|
||||||
type tcpGROItem struct {
|
type tcpGROItem struct {
|
||||||
key flowKey
|
key tcpFlowKey
|
||||||
sentSeq uint32 // the sequence number
|
sentSeq uint32 // the sequence number
|
||||||
bufsIndex uint16 // the index into the original bufs slice
|
bufsIndex uint16 // the index into the original bufs slice
|
||||||
numMerged uint16 // the number of packets merged into this item
|
numMerged uint16 // the number of packets merged into this item
|
||||||
@ -164,6 +166,103 @@ func (t *tcpGROTable) reset() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// udpFlowKey represents the key for a UDP flow.
|
||||||
|
type udpFlowKey struct {
|
||||||
|
srcAddr, dstAddr [16]byte
|
||||||
|
srcPort, dstPort uint16
|
||||||
|
isV6 bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// udpGROTable holds flow and coalescing information for the purposes of UDP GRO.
|
||||||
|
type udpGROTable struct {
|
||||||
|
itemsByFlow map[udpFlowKey][]udpGROItem
|
||||||
|
itemsPool [][]udpGROItem
|
||||||
|
}
|
||||||
|
|
||||||
|
func newUDPGROTable() *udpGROTable {
|
||||||
|
u := &udpGROTable{
|
||||||
|
itemsByFlow: make(map[udpFlowKey][]udpGROItem, conn.IdealBatchSize),
|
||||||
|
itemsPool: make([][]udpGROItem, conn.IdealBatchSize),
|
||||||
|
}
|
||||||
|
for i := range u.itemsPool {
|
||||||
|
u.itemsPool[i] = make([]udpGROItem, 0, conn.IdealBatchSize)
|
||||||
|
}
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
func newUDPFlowKey(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset int) udpFlowKey {
|
||||||
|
key := udpFlowKey{}
|
||||||
|
addrSize := dstAddrOffset - srcAddrOffset
|
||||||
|
copy(key.srcAddr[:], pkt[srcAddrOffset:dstAddrOffset])
|
||||||
|
copy(key.dstAddr[:], pkt[dstAddrOffset:dstAddrOffset+addrSize])
|
||||||
|
key.srcPort = binary.BigEndian.Uint16(pkt[udphOffset:])
|
||||||
|
key.dstPort = binary.BigEndian.Uint16(pkt[udphOffset+2:])
|
||||||
|
key.isV6 = addrSize == 16
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
// lookupOrInsert looks up a flow for the provided packet and metadata,
|
||||||
|
// returning the packets found for the flow, or inserting a new one if none
|
||||||
|
// is found.
|
||||||
|
func (u *udpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex int) ([]udpGROItem, bool) {
|
||||||
|
key := newUDPFlowKey(pkt, srcAddrOffset, dstAddrOffset, udphOffset)
|
||||||
|
items, ok := u.itemsByFlow[key]
|
||||||
|
if ok {
|
||||||
|
return items, ok
|
||||||
|
}
|
||||||
|
// TODO: insert() performs another map lookup. This could be rearranged to avoid.
|
||||||
|
u.insert(pkt, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex, false)
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert an item in the table for the provided packet and packet metadata.
|
||||||
|
func (u *udpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex int, cSumKnownInvalid bool) {
|
||||||
|
key := newUDPFlowKey(pkt, srcAddrOffset, dstAddrOffset, udphOffset)
|
||||||
|
item := udpGROItem{
|
||||||
|
key: key,
|
||||||
|
bufsIndex: uint16(bufsIndex),
|
||||||
|
gsoSize: uint16(len(pkt[udphOffset+udphLen:])),
|
||||||
|
iphLen: uint8(udphOffset),
|
||||||
|
cSumKnownInvalid: cSumKnownInvalid,
|
||||||
|
}
|
||||||
|
items, ok := u.itemsByFlow[key]
|
||||||
|
if !ok {
|
||||||
|
items = u.newItems()
|
||||||
|
}
|
||||||
|
items = append(items, item)
|
||||||
|
u.itemsByFlow[key] = items
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *udpGROTable) updateAt(item udpGROItem, i int) {
|
||||||
|
items, _ := u.itemsByFlow[item.key]
|
||||||
|
items[i] = item
|
||||||
|
}
|
||||||
|
|
||||||
|
// udpGROItem represents bookkeeping data for a UDP packet during the lifetime
|
||||||
|
// of a GRO evaluation across a vector of packets.
|
||||||
|
type udpGROItem struct {
|
||||||
|
key udpFlowKey
|
||||||
|
bufsIndex uint16 // the index into the original bufs slice
|
||||||
|
numMerged uint16 // the number of packets merged into this item
|
||||||
|
gsoSize uint16 // payload size
|
||||||
|
iphLen uint8 // ip header len
|
||||||
|
cSumKnownInvalid bool // UDP header checksum validity; a false value DOES NOT imply valid, just unknown.
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *udpGROTable) newItems() []udpGROItem {
|
||||||
|
var items []udpGROItem
|
||||||
|
items, u.itemsPool = u.itemsPool[len(u.itemsPool)-1], u.itemsPool[:len(u.itemsPool)-1]
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *udpGROTable) reset() {
|
||||||
|
for k, items := range u.itemsByFlow {
|
||||||
|
items = items[:0]
|
||||||
|
u.itemsPool = append(u.itemsPool, items)
|
||||||
|
delete(u.itemsByFlow, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// canCoalesce represents the outcome of checking if two TCP packets are
|
// canCoalesce represents the outcome of checking if two TCP packets are
|
||||||
// candidates for coalescing.
|
// candidates for coalescing.
|
||||||
type canCoalesce int
|
type canCoalesce int
|
||||||
@ -174,6 +273,61 @@ const (
|
|||||||
coalesceAppend canCoalesce = 1
|
coalesceAppend canCoalesce = 1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ipHeadersCanCoalesce returns true if the IP headers found in pktA and pktB
|
||||||
|
// meet all requirements to be merged as part of a GRO operation, otherwise it
|
||||||
|
// returns false.
|
||||||
|
func ipHeadersCanCoalesce(pktA, pktB []byte) bool {
|
||||||
|
if len(pktA) < 9 || len(pktB) < 9 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if pktA[0]>>4 == 6 {
|
||||||
|
if pktA[0] != pktB[0] || pktA[1]>>4 != pktB[1]>>4 {
|
||||||
|
// cannot coalesce with unequal Traffic class values
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if pktA[7] != pktB[7] {
|
||||||
|
// cannot coalesce with unequal Hop limit values
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if pktA[1] != pktB[1] {
|
||||||
|
// cannot coalesce with unequal ToS values
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if pktA[6]>>5 != pktB[6]>>5 {
|
||||||
|
// cannot coalesce with unequal DF or reserved bits. MF is checked
|
||||||
|
// further up the stack.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if pktA[8] != pktB[8] {
|
||||||
|
// cannot coalesce with unequal TTL values
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// udpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet
|
||||||
|
// described by item. iphLen and gsoSize describe pkt. bufs is the vector of
|
||||||
|
// packets involved in the current GRO evaluation. bufsOffset is the offset at
|
||||||
|
// which packet data begins within bufs.
|
||||||
|
func udpPacketsCanCoalesce(pkt []byte, iphLen uint8, gsoSize uint16, item udpGROItem, bufs [][]byte, bufsOffset int) canCoalesce {
|
||||||
|
pktTarget := bufs[item.bufsIndex][bufsOffset:]
|
||||||
|
if !ipHeadersCanCoalesce(pkt, pktTarget) {
|
||||||
|
return coalesceUnavailable
|
||||||
|
}
|
||||||
|
if len(pktTarget[iphLen+udphLen:])%int(item.gsoSize) != 0 {
|
||||||
|
// A smaller than gsoSize packet has been appended previously.
|
||||||
|
// Nothing can come after a smaller packet on the end.
|
||||||
|
return coalesceUnavailable
|
||||||
|
}
|
||||||
|
if gsoSize > item.gsoSize {
|
||||||
|
// We cannot have a larger packet following a smaller one.
|
||||||
|
return coalesceUnavailable
|
||||||
|
}
|
||||||
|
return coalesceAppend
|
||||||
|
}
|
||||||
|
|
||||||
// tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet
|
// tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet
|
||||||
// described by item. This function makes considerations that match the kernel's
|
// described by item. This function makes considerations that match the kernel's
|
||||||
// GRO self tests, which can be found in tools/testing/selftests/net/gro.c.
|
// GRO self tests, which can be found in tools/testing/selftests/net/gro.c.
|
||||||
@ -189,29 +343,8 @@ func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet
|
|||||||
return coalesceUnavailable
|
return coalesceUnavailable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if pkt[0]>>4 == 6 {
|
if !ipHeadersCanCoalesce(pkt, pktTarget) {
|
||||||
if pkt[0] != pktTarget[0] || pkt[1]>>4 != pktTarget[1]>>4 {
|
return coalesceUnavailable
|
||||||
// cannot coalesce with unequal Traffic class values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[7] != pktTarget[7] {
|
|
||||||
// cannot coalesce with unequal Hop limit values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if pkt[1] != pktTarget[1] {
|
|
||||||
// cannot coalesce with unequal ToS values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[6]>>5 != pktTarget[6]>>5 {
|
|
||||||
// cannot coalesce with unequal DF or reserved bits. MF is checked
|
|
||||||
// further up the stack.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[8] != pktTarget[8] {
|
|
||||||
// cannot coalesce with unequal TTL values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// seq adjacency
|
// seq adjacency
|
||||||
lhsLen := item.gsoSize
|
lhsLen := item.gsoSize
|
||||||
@ -252,16 +385,16 @@ func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet
|
|||||||
return coalesceUnavailable
|
return coalesceUnavailable
|
||||||
}
|
}
|
||||||
|
|
||||||
func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool {
|
func checksumValid(pkt []byte, iphLen, proto uint8, isV6 bool) bool {
|
||||||
srcAddrAt := ipv4SrcAddrOffset
|
srcAddrAt := ipv4SrcAddrOffset
|
||||||
addrSize := 4
|
addrSize := 4
|
||||||
if isV6 {
|
if isV6 {
|
||||||
srcAddrAt = ipv6SrcAddrOffset
|
srcAddrAt = ipv6SrcAddrOffset
|
||||||
addrSize = 16
|
addrSize = 16
|
||||||
}
|
}
|
||||||
tcpTotalLen := uint16(len(pkt) - int(iphLen))
|
lenForPseudo := uint16(len(pkt) - int(iphLen))
|
||||||
tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], tcpTotalLen)
|
cSum := pseudoHeaderChecksumNoFold(proto, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], lenForPseudo)
|
||||||
return ^checksum(pkt[iphLen:], tcpCSumNoFold) == 0
|
return ^checksum(pkt[iphLen:], cSum) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// coalesceResult represents the result of attempting to coalesce two TCP
|
// coalesceResult represents the result of attempting to coalesce two TCP
|
||||||
@ -276,8 +409,36 @@ const (
|
|||||||
coalesceSuccess
|
coalesceSuccess
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// coalesceUDPPackets attempts to coalesce pkt with the packet described by
|
||||||
|
// item, and returns the outcome.
|
||||||
|
func coalesceUDPPackets(pkt []byte, item *udpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult {
|
||||||
|
pktHead := bufs[item.bufsIndex][bufsOffset:] // the packet that will end up at the front
|
||||||
|
headersLen := item.iphLen + udphLen
|
||||||
|
coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen)
|
||||||
|
|
||||||
|
if cap(pktHead)-bufsOffset < coalescedLen {
|
||||||
|
// We don't want to allocate a new underlying array if capacity is
|
||||||
|
// too small.
|
||||||
|
return coalesceInsufficientCap
|
||||||
|
}
|
||||||
|
if item.numMerged == 0 {
|
||||||
|
if item.cSumKnownInvalid || !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_UDP, isV6) {
|
||||||
|
return coalesceItemInvalidCSum
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !checksumValid(pkt, item.iphLen, unix.IPPROTO_UDP, isV6) {
|
||||||
|
return coalescePktInvalidCSum
|
||||||
|
}
|
||||||
|
extendBy := len(pkt) - int(headersLen)
|
||||||
|
bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...)
|
||||||
|
copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:])
|
||||||
|
|
||||||
|
item.numMerged++
|
||||||
|
return coalesceSuccess
|
||||||
|
}
|
||||||
|
|
||||||
// coalesceTCPPackets attempts to coalesce pkt with the packet described by
|
// coalesceTCPPackets attempts to coalesce pkt with the packet described by
|
||||||
// item, returning the outcome. This function may swap bufs elements in the
|
// item, and returns the outcome. This function may swap bufs elements in the
|
||||||
// event of a prepend as item's bufs index is already being tracked for writing
|
// event of a prepend as item's bufs index is already being tracked for writing
|
||||||
// to a Device.
|
// to a Device.
|
||||||
func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult {
|
func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult {
|
||||||
@ -297,11 +458,11 @@ func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize
|
|||||||
return coalescePSHEnding
|
return coalescePSHEnding
|
||||||
}
|
}
|
||||||
if item.numMerged == 0 {
|
if item.numMerged == 0 {
|
||||||
if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
|
if !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_TCP, isV6) {
|
||||||
return coalesceItemInvalidCSum
|
return coalesceItemInvalidCSum
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !tcpChecksumValid(pkt, item.iphLen, isV6) {
|
if !checksumValid(pkt, item.iphLen, unix.IPPROTO_TCP, isV6) {
|
||||||
return coalescePktInvalidCSum
|
return coalescePktInvalidCSum
|
||||||
}
|
}
|
||||||
item.sentSeq = seq
|
item.sentSeq = seq
|
||||||
@ -319,11 +480,11 @@ func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize
|
|||||||
return coalesceInsufficientCap
|
return coalesceInsufficientCap
|
||||||
}
|
}
|
||||||
if item.numMerged == 0 {
|
if item.numMerged == 0 {
|
||||||
if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
|
if !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_TCP, isV6) {
|
||||||
return coalesceItemInvalidCSum
|
return coalesceItemInvalidCSum
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !tcpChecksumValid(pkt, item.iphLen, isV6) {
|
if !checksumValid(pkt, item.iphLen, unix.IPPROTO_TCP, isV6) {
|
||||||
return coalescePktInvalidCSum
|
return coalescePktInvalidCSum
|
||||||
}
|
}
|
||||||
if pshSet {
|
if pshSet {
|
||||||
@ -354,52 +515,52 @@ const (
|
|||||||
maxUint16 = 1<<16 - 1
|
maxUint16 = 1<<16 - 1
|
||||||
)
|
)
|
||||||
|
|
||||||
type tcpGROResult int
|
type groResult int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
tcpGROResultNoop tcpGROResult = iota
|
groResultNoop groResult = iota
|
||||||
tcpGROResultTableInsert
|
groResultTableInsert
|
||||||
tcpGROResultCoalesced
|
groResultCoalesced
|
||||||
)
|
)
|
||||||
|
|
||||||
// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with
|
// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with
|
||||||
// existing packets tracked in table. It returns a tcpGROResultNoop when no
|
// existing packets tracked in table. It returns a groResultNoop when no
|
||||||
// action was taken, tcpGROResultTableInsert when the evaluated packet was
|
// action was taken, groResultTableInsert when the evaluated packet was
|
||||||
// inserted into table, and tcpGROResultCoalesced when the evaluated packet was
|
// inserted into table, and groResultCoalesced when the evaluated packet was
|
||||||
// coalesced with another packet in table.
|
// coalesced with another packet in table.
|
||||||
func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) tcpGROResult {
|
func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) groResult {
|
||||||
pkt := bufs[pktI][offset:]
|
pkt := bufs[pktI][offset:]
|
||||||
if len(pkt) > maxUint16 {
|
if len(pkt) > maxUint16 {
|
||||||
// A valid IPv4 or IPv6 packet will never exceed this.
|
// A valid IPv4 or IPv6 packet will never exceed this.
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
iphLen := int((pkt[0] & 0x0F) * 4)
|
iphLen := int((pkt[0] & 0x0F) * 4)
|
||||||
if isV6 {
|
if isV6 {
|
||||||
iphLen = 40
|
iphLen = 40
|
||||||
ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
|
ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
|
||||||
if ipv6HPayloadLen != len(pkt)-iphLen {
|
if ipv6HPayloadLen != len(pkt)-iphLen {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
|
totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
|
||||||
if totalLen != len(pkt) {
|
if totalLen != len(pkt) {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(pkt) < iphLen {
|
if len(pkt) < iphLen {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
tcphLen := int((pkt[iphLen+12] >> 4) * 4)
|
tcphLen := int((pkt[iphLen+12] >> 4) * 4)
|
||||||
if tcphLen < 20 || tcphLen > 60 {
|
if tcphLen < 20 || tcphLen > 60 {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
if len(pkt) < iphLen+tcphLen {
|
if len(pkt) < iphLen+tcphLen {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
if !isV6 {
|
if !isV6 {
|
||||||
if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
|
if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
|
||||||
// no GRO support for fragmented segments for now
|
// no GRO support for fragmented segments for now
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tcpFlags := pkt[iphLen+tcpFlagsOffset]
|
tcpFlags := pkt[iphLen+tcpFlagsOffset]
|
||||||
@ -407,14 +568,14 @@ func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool)
|
|||||||
// not a candidate if any non-ACK flags (except PSH+ACK) are set
|
// not a candidate if any non-ACK flags (except PSH+ACK) are set
|
||||||
if tcpFlags != tcpFlagACK {
|
if tcpFlags != tcpFlagACK {
|
||||||
if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH {
|
if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
pshSet = true
|
pshSet = true
|
||||||
}
|
}
|
||||||
gsoSize := uint16(len(pkt) - tcphLen - iphLen)
|
gsoSize := uint16(len(pkt) - tcphLen - iphLen)
|
||||||
// not a candidate if payload len is 0
|
// not a candidate if payload len is 0
|
||||||
if gsoSize < 1 {
|
if gsoSize < 1 {
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
}
|
}
|
||||||
seq := binary.BigEndian.Uint32(pkt[iphLen+4:])
|
seq := binary.BigEndian.Uint32(pkt[iphLen+4:])
|
||||||
srcAddrOffset := ipv4SrcAddrOffset
|
srcAddrOffset := ipv4SrcAddrOffset
|
||||||
@ -425,7 +586,7 @@ func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool)
|
|||||||
}
|
}
|
||||||
items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
||||||
if !existing {
|
if !existing {
|
||||||
return tcpGROResultNoop
|
return groResultTableInsert
|
||||||
}
|
}
|
||||||
for i := len(items) - 1; i >= 0; i-- {
|
for i := len(items) - 1; i >= 0; i-- {
|
||||||
// In the best case of packets arriving in order iterating in reverse is
|
// In the best case of packets arriving in order iterating in reverse is
|
||||||
@ -443,54 +604,25 @@ func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool)
|
|||||||
switch result {
|
switch result {
|
||||||
case coalesceSuccess:
|
case coalesceSuccess:
|
||||||
table.updateAt(item, i)
|
table.updateAt(item, i)
|
||||||
return tcpGROResultCoalesced
|
return groResultCoalesced
|
||||||
case coalesceItemInvalidCSum:
|
case coalesceItemInvalidCSum:
|
||||||
// delete the item with an invalid csum
|
// delete the item with an invalid csum
|
||||||
table.deleteAt(item.key, i)
|
table.deleteAt(item.key, i)
|
||||||
case coalescePktInvalidCSum:
|
case coalescePktInvalidCSum:
|
||||||
// no point in inserting an item that we can't coalesce
|
// no point in inserting an item that we can't coalesce
|
||||||
return tcpGROResultNoop
|
return groResultNoop
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// failed to coalesce with any other packets; store the item in the flow
|
// failed to coalesce with any other packets; store the item in the flow
|
||||||
table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
||||||
return tcpGROResultTableInsert
|
return groResultTableInsert
|
||||||
}
|
}
|
||||||
|
|
||||||
func isTCP4NoIPOptions(b []byte) bool {
|
// applyTCPCoalesceAccounting updates bufs to account for coalescing based on the
|
||||||
if len(b) < 40 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]>>4 != 4 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]&0x0F != 5 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[9] != unix.IPPROTO_TCP {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func isTCP6NoEH(b []byte) bool {
|
|
||||||
if len(b) < 60 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]>>4 != 6 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[6] != unix.IPPROTO_TCP {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// applyCoalesceAccounting updates bufs to account for coalescing based on the
|
|
||||||
// metadata found in table.
|
// metadata found in table.
|
||||||
func applyCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable, isV6 bool) error {
|
func applyTCPCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable) error {
|
||||||
for _, items := range table.itemsByFlow {
|
for _, items := range table.itemsByFlow {
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if item.numMerged > 0 {
|
if item.numMerged > 0 {
|
||||||
@ -505,7 +637,7 @@ func applyCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable, isV6
|
|||||||
|
|
||||||
// Recalculate the total len (IPv4) or payload len (IPv6).
|
// Recalculate the total len (IPv4) or payload len (IPv6).
|
||||||
// Recalculate the (IPv4) header checksum.
|
// Recalculate the (IPv4) header checksum.
|
||||||
if isV6 {
|
if item.key.isV6 {
|
||||||
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
|
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
|
||||||
binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len
|
binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len
|
||||||
} else {
|
} else {
|
||||||
@ -525,7 +657,7 @@ func applyCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable, isV6
|
|||||||
// this with computation of the tcp header and payload checksum.
|
// this with computation of the tcp header and payload checksum.
|
||||||
addrLen := 4
|
addrLen := 4
|
||||||
addrOffset := ipv4SrcAddrOffset
|
addrOffset := ipv4SrcAddrOffset
|
||||||
if isV6 {
|
if item.key.isV6 {
|
||||||
addrLen = 16
|
addrLen = 16
|
||||||
addrOffset = ipv6SrcAddrOffset
|
addrOffset = ipv6SrcAddrOffset
|
||||||
}
|
}
|
||||||
@ -546,54 +678,245 @@ func applyCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable, isV6
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyUDPCoalesceAccounting updates bufs to account for coalescing based on the
|
||||||
|
// metadata found in table.
|
||||||
|
func applyUDPCoalesceAccounting(bufs [][]byte, offset int, table *udpGROTable) error {
|
||||||
|
for _, items := range table.itemsByFlow {
|
||||||
|
for _, item := range items {
|
||||||
|
if item.numMerged > 0 {
|
||||||
|
hdr := virtioNetHdr{
|
||||||
|
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb
|
||||||
|
hdrLen: uint16(item.iphLen + udphLen),
|
||||||
|
gsoSize: item.gsoSize,
|
||||||
|
csumStart: uint16(item.iphLen),
|
||||||
|
csumOffset: 6,
|
||||||
|
}
|
||||||
|
pkt := bufs[item.bufsIndex][offset:]
|
||||||
|
|
||||||
|
// Recalculate the total len (IPv4) or payload len (IPv6).
|
||||||
|
// Recalculate the (IPv4) header checksum.
|
||||||
|
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_UDP_L4
|
||||||
|
if item.key.isV6 {
|
||||||
|
binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len
|
||||||
|
} else {
|
||||||
|
pkt[10], pkt[11] = 0, 0
|
||||||
|
binary.BigEndian.PutUint16(pkt[2:], uint16(len(pkt))) // set new total length
|
||||||
|
iphCSum := ^checksum(pkt[:item.iphLen], 0) // compute IPv4 header checksum
|
||||||
|
binary.BigEndian.PutUint16(pkt[10:], iphCSum) // set IPv4 header checksum field
|
||||||
|
}
|
||||||
|
err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recalculate the UDP len field value
|
||||||
|
binary.BigEndian.PutUint16(pkt[item.iphLen+4:], uint16(len(pkt[item.iphLen:])))
|
||||||
|
|
||||||
|
// Calculate the pseudo header checksum and place it at the UDP
|
||||||
|
// checksum offset. Downstream checksum offloading will combine
|
||||||
|
// this with computation of the udp header and payload checksum.
|
||||||
|
addrLen := 4
|
||||||
|
addrOffset := ipv4SrcAddrOffset
|
||||||
|
if item.key.isV6 {
|
||||||
|
addrLen = 16
|
||||||
|
addrOffset = ipv6SrcAddrOffset
|
||||||
|
}
|
||||||
|
srcAddrAt := offset + addrOffset
|
||||||
|
srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen]
|
||||||
|
dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2]
|
||||||
|
psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_UDP, srcAddr, dstAddr, uint16(len(pkt)-int(item.iphLen)))
|
||||||
|
binary.BigEndian.PutUint16(pkt[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum))
|
||||||
|
} else {
|
||||||
|
hdr := virtioNetHdr{}
|
||||||
|
err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type groCandidateType uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
notGROCandidate groCandidateType = iota
|
||||||
|
tcp4GROCandidate
|
||||||
|
tcp6GROCandidate
|
||||||
|
udp4GROCandidate
|
||||||
|
udp6GROCandidate
|
||||||
|
)
|
||||||
|
|
||||||
|
func packetIsGROCandidate(b []byte, canUDPGRO bool) groCandidateType {
|
||||||
|
if len(b) < 28 {
|
||||||
|
return notGROCandidate
|
||||||
|
}
|
||||||
|
if b[0]>>4 == 4 {
|
||||||
|
if b[0]&0x0F != 5 {
|
||||||
|
// IPv4 packets w/IP options do not coalesce
|
||||||
|
return notGROCandidate
|
||||||
|
}
|
||||||
|
if b[9] == unix.IPPROTO_TCP && len(b) >= 40 {
|
||||||
|
return tcp4GROCandidate
|
||||||
|
}
|
||||||
|
if b[9] == unix.IPPROTO_UDP && canUDPGRO {
|
||||||
|
return udp4GROCandidate
|
||||||
|
}
|
||||||
|
} else if b[0]>>4 == 6 {
|
||||||
|
if b[6] == unix.IPPROTO_TCP && len(b) >= 60 {
|
||||||
|
return tcp6GROCandidate
|
||||||
|
}
|
||||||
|
if b[6] == unix.IPPROTO_UDP && len(b) >= 48 && canUDPGRO {
|
||||||
|
return udp6GROCandidate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return notGROCandidate
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
udphLen = 8
|
||||||
|
)
|
||||||
|
|
||||||
|
// udpGRO evaluates the UDP packet at pktI in bufs for coalescing with
|
||||||
|
// existing packets tracked in table. It returns a groResultNoop when no
|
||||||
|
// action was taken, groResultTableInsert when the evaluated packet was
|
||||||
|
// inserted into table, and groResultCoalesced when the evaluated packet was
|
||||||
|
// coalesced with another packet in table.
|
||||||
|
func udpGRO(bufs [][]byte, offset int, pktI int, table *udpGROTable, isV6 bool) groResult {
|
||||||
|
pkt := bufs[pktI][offset:]
|
||||||
|
if len(pkt) > maxUint16 {
|
||||||
|
// A valid IPv4 or IPv6 packet will never exceed this.
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
iphLen := int((pkt[0] & 0x0F) * 4)
|
||||||
|
if isV6 {
|
||||||
|
iphLen = 40
|
||||||
|
ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
|
||||||
|
if ipv6HPayloadLen != len(pkt)-iphLen {
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
|
||||||
|
if totalLen != len(pkt) {
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(pkt) < iphLen {
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
if len(pkt) < iphLen+udphLen {
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
if !isV6 {
|
||||||
|
if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
|
||||||
|
// no GRO support for fragmented segments for now
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gsoSize := uint16(len(pkt) - udphLen - iphLen)
|
||||||
|
// not a candidate if payload len is 0
|
||||||
|
if gsoSize < 1 {
|
||||||
|
return groResultNoop
|
||||||
|
}
|
||||||
|
srcAddrOffset := ipv4SrcAddrOffset
|
||||||
|
addrLen := 4
|
||||||
|
if isV6 {
|
||||||
|
srcAddrOffset = ipv6SrcAddrOffset
|
||||||
|
addrLen = 16
|
||||||
|
}
|
||||||
|
items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, pktI)
|
||||||
|
if !existing {
|
||||||
|
return groResultTableInsert
|
||||||
|
}
|
||||||
|
// With UDP we only check the last item, otherwise we could reorder packets
|
||||||
|
// for a given flow. We must also always insert a new item, or successfully
|
||||||
|
// coalesce with an existing item, for the same reason.
|
||||||
|
item := items[len(items)-1]
|
||||||
|
can := udpPacketsCanCoalesce(pkt, uint8(iphLen), gsoSize, item, bufs, offset)
|
||||||
|
var pktCSumKnownInvalid bool
|
||||||
|
if can == coalesceAppend {
|
||||||
|
result := coalesceUDPPackets(pkt, &item, bufs, offset, isV6)
|
||||||
|
switch result {
|
||||||
|
case coalesceSuccess:
|
||||||
|
table.updateAt(item, len(items)-1)
|
||||||
|
return groResultCoalesced
|
||||||
|
case coalesceItemInvalidCSum:
|
||||||
|
// If the existing item has an invalid csum we take no action. A new
|
||||||
|
// item will be stored after it, and the existing item will never be
|
||||||
|
// revisited as part of future coalescing candidacy checks.
|
||||||
|
case coalescePktInvalidCSum:
|
||||||
|
// We must insert a new item, but we also mark it as invalid csum
|
||||||
|
// to prevent a repeat checksum validation.
|
||||||
|
pktCSumKnownInvalid = true
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// failed to coalesce with any other packets; store the item in the flow
|
||||||
|
table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, pktI, pktCSumKnownInvalid)
|
||||||
|
return groResultTableInsert
|
||||||
|
}
|
||||||
|
|
||||||
// handleGRO evaluates bufs for GRO, and writes the indices of the resulting
|
// handleGRO evaluates bufs for GRO, and writes the indices of the resulting
|
||||||
// packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be
|
// packets into toWrite. toWrite, tcpTable, and udpTable should initially be
|
||||||
// empty (but non-nil), and are passed in to save allocs as the caller may reset
|
// empty (but non-nil), and are passed in to save allocs as the caller may reset
|
||||||
// and recycle them across vectors of packets.
|
// and recycle them across vectors of packets. canUDPGRO indicates if UDP GRO is
|
||||||
func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toWrite *[]int) error {
|
// supported.
|
||||||
|
func handleGRO(bufs [][]byte, offset int, tcpTable *tcpGROTable, udpTable *udpGROTable, canUDPGRO bool, toWrite *[]int) error {
|
||||||
for i := range bufs {
|
for i := range bufs {
|
||||||
if offset < virtioNetHdrLen || offset > len(bufs[i])-1 {
|
if offset < virtioNetHdrLen || offset > len(bufs[i])-1 {
|
||||||
return errors.New("invalid offset")
|
return errors.New("invalid offset")
|
||||||
}
|
}
|
||||||
var result tcpGROResult
|
var result groResult
|
||||||
switch {
|
switch packetIsGROCandidate(bufs[i][offset:], canUDPGRO) {
|
||||||
case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce
|
case tcp4GROCandidate:
|
||||||
result = tcpGRO(bufs, offset, i, tcp4Table, false)
|
result = tcpGRO(bufs, offset, i, tcpTable, false)
|
||||||
case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce
|
case tcp6GROCandidate:
|
||||||
result = tcpGRO(bufs, offset, i, tcp6Table, true)
|
result = tcpGRO(bufs, offset, i, tcpTable, true)
|
||||||
|
case udp4GROCandidate:
|
||||||
|
result = udpGRO(bufs, offset, i, udpTable, false)
|
||||||
|
case udp6GROCandidate:
|
||||||
|
result = udpGRO(bufs, offset, i, udpTable, true)
|
||||||
}
|
}
|
||||||
switch result {
|
switch result {
|
||||||
case tcpGROResultNoop:
|
case groResultNoop:
|
||||||
hdr := virtioNetHdr{}
|
hdr := virtioNetHdr{}
|
||||||
err := hdr.encode(bufs[i][offset-virtioNetHdrLen:])
|
err := hdr.encode(bufs[i][offset-virtioNetHdrLen:])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fallthrough
|
fallthrough
|
||||||
case tcpGROResultTableInsert:
|
case groResultTableInsert:
|
||||||
*toWrite = append(*toWrite, i)
|
*toWrite = append(*toWrite, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
err4 := applyCoalesceAccounting(bufs, offset, tcp4Table, false)
|
errTCP := applyTCPCoalesceAccounting(bufs, offset, tcpTable)
|
||||||
err6 := applyCoalesceAccounting(bufs, offset, tcp6Table, true)
|
errUDP := applyUDPCoalesceAccounting(bufs, offset, udpTable)
|
||||||
return errors.Join(err4, err6)
|
return errors.Join(errTCP, errUDP)
|
||||||
}
|
}
|
||||||
|
|
||||||
// tcpTSO splits packets from in into outBuffs, writing the size of each
|
// gsoSplit splits packets from in into outBuffs, writing the size of each
|
||||||
// element into sizes. It returns the number of buffers populated, and/or an
|
// element into sizes. It returns the number of buffers populated, and/or an
|
||||||
// error.
|
// error.
|
||||||
func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int) (int, error) {
|
func gsoSplit(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int, isV6 bool) (int, error) {
|
||||||
iphLen := int(hdr.csumStart)
|
iphLen := int(hdr.csumStart)
|
||||||
srcAddrOffset := ipv6SrcAddrOffset
|
srcAddrOffset := ipv6SrcAddrOffset
|
||||||
addrLen := 16
|
addrLen := 16
|
||||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
if !isV6 {
|
||||||
in[10], in[11] = 0, 0 // clear ipv4 header checksum
|
in[10], in[11] = 0, 0 // clear ipv4 header checksum
|
||||||
srcAddrOffset = ipv4SrcAddrOffset
|
srcAddrOffset = ipv4SrcAddrOffset
|
||||||
addrLen = 4
|
addrLen = 4
|
||||||
}
|
}
|
||||||
tcpCSumAt := int(hdr.csumStart + hdr.csumOffset)
|
transportCsumAt := int(hdr.csumStart + hdr.csumOffset)
|
||||||
in[tcpCSumAt], in[tcpCSumAt+1] = 0, 0 // clear tcp checksum
|
in[transportCsumAt], in[transportCsumAt+1] = 0, 0 // clear tcp/udp checksum
|
||||||
firstTCPSeqNum := binary.BigEndian.Uint32(in[hdr.csumStart+4:])
|
var firstTCPSeqNum uint32
|
||||||
|
var protocol uint8
|
||||||
|
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 || hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
||||||
|
protocol = unix.IPPROTO_TCP
|
||||||
|
firstTCPSeqNum = binary.BigEndian.Uint32(in[hdr.csumStart+4:])
|
||||||
|
} else {
|
||||||
|
protocol = unix.IPPROTO_UDP
|
||||||
|
}
|
||||||
nextSegmentDataAt := int(hdr.hdrLen)
|
nextSegmentDataAt := int(hdr.hdrLen)
|
||||||
i := 0
|
i := 0
|
||||||
for ; nextSegmentDataAt < len(in); i++ {
|
for ; nextSegmentDataAt < len(in); i++ {
|
||||||
@ -610,7 +933,7 @@ func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffs
|
|||||||
out := outBuffs[i][outOffset:]
|
out := outBuffs[i][outOffset:]
|
||||||
|
|
||||||
copy(out, in[:iphLen])
|
copy(out, in[:iphLen])
|
||||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
if !isV6 {
|
||||||
// For IPv4 we are responsible for incrementing the ID field,
|
// For IPv4 we are responsible for incrementing the ID field,
|
||||||
// updating the total len field, and recalculating the header
|
// updating the total len field, and recalculating the header
|
||||||
// checksum.
|
// checksum.
|
||||||
@ -627,25 +950,32 @@ func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffs
|
|||||||
binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen))
|
binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TCP header
|
// copy transport header
|
||||||
copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen])
|
copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen])
|
||||||
tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i))
|
|
||||||
binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq)
|
if protocol == unix.IPPROTO_TCP {
|
||||||
if nextSegmentEnd != len(in) {
|
// set TCP seq and adjust TCP flags
|
||||||
// FIN and PSH should only be set on last segment
|
tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i))
|
||||||
clearFlags := tcpFlagFIN | tcpFlagPSH
|
binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq)
|
||||||
out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags
|
if nextSegmentEnd != len(in) {
|
||||||
|
// FIN and PSH should only be set on last segment
|
||||||
|
clearFlags := tcpFlagFIN | tcpFlagPSH
|
||||||
|
out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// set UDP header len
|
||||||
|
binary.BigEndian.PutUint16(out[hdr.csumStart+4:], uint16(segmentDataLen)+(hdr.hdrLen-hdr.csumStart))
|
||||||
}
|
}
|
||||||
|
|
||||||
// payload
|
// payload
|
||||||
copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd])
|
copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd])
|
||||||
|
|
||||||
// TCP checksum
|
// transport checksum
|
||||||
tcpHLen := int(hdr.hdrLen - hdr.csumStart)
|
transportHeaderLen := int(hdr.hdrLen - hdr.csumStart)
|
||||||
tcpLenForPseudo := uint16(tcpHLen + segmentDataLen)
|
lenForPseudo := uint16(transportHeaderLen + segmentDataLen)
|
||||||
tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], tcpLenForPseudo)
|
transportCSumNoFold := pseudoHeaderChecksumNoFold(protocol, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], lenForPseudo)
|
||||||
tcpCSum := ^checksum(out[hdr.csumStart:totalLen], tcpCSumNoFold)
|
transportCSum := ^checksum(out[hdr.csumStart:totalLen], transportCSumNoFold)
|
||||||
binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], tcpCSum)
|
binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], transportCSum)
|
||||||
|
|
||||||
nextSegmentDataAt += int(hdr.gsoSize)
|
nextSegmentDataAt += int(hdr.gsoSize)
|
||||||
}
|
}
|
752
tun/offload_linux_test.go
Normal file
752
tun/offload_linux_test.go
Normal file
@ -0,0 +1,752 @@
|
|||||||
|
/* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package tun
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/netip"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
"golang.zx2c4.com/wireguard/conn"
|
||||||
|
"gvisor.dev/gvisor/pkg/tcpip"
|
||||||
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
offset = virtioNetHdrLen
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ip4PortA = netip.MustParseAddrPort("192.0.2.1:1")
|
||||||
|
ip4PortB = netip.MustParseAddrPort("192.0.2.2:1")
|
||||||
|
ip4PortC = netip.MustParseAddrPort("192.0.2.3:1")
|
||||||
|
ip6PortA = netip.MustParseAddrPort("[2001:db8::1]:1")
|
||||||
|
ip6PortB = netip.MustParseAddrPort("[2001:db8::2]:1")
|
||||||
|
ip6PortC = netip.MustParseAddrPort("[2001:db8::3]:1")
|
||||||
|
)
|
||||||
|
|
||||||
|
func udp4PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, payloadLen int, ipFn func(*header.IPv4Fields)) []byte {
|
||||||
|
totalLen := 28 + payloadLen
|
||||||
|
b := make([]byte, offset+int(totalLen), 65535)
|
||||||
|
ipv4H := header.IPv4(b[offset:])
|
||||||
|
srcAs4 := srcIPPort.Addr().As4()
|
||||||
|
dstAs4 := dstIPPort.Addr().As4()
|
||||||
|
ipFields := &header.IPv4Fields{
|
||||||
|
SrcAddr: tcpip.AddrFromSlice(srcAs4[:]),
|
||||||
|
DstAddr: tcpip.AddrFromSlice(dstAs4[:]),
|
||||||
|
Protocol: unix.IPPROTO_UDP,
|
||||||
|
TTL: 64,
|
||||||
|
TotalLength: uint16(totalLen),
|
||||||
|
}
|
||||||
|
if ipFn != nil {
|
||||||
|
ipFn(ipFields)
|
||||||
|
}
|
||||||
|
ipv4H.Encode(ipFields)
|
||||||
|
udpH := header.UDP(b[offset+20:])
|
||||||
|
udpH.Encode(&header.UDPFields{
|
||||||
|
SrcPort: srcIPPort.Port(),
|
||||||
|
DstPort: dstIPPort.Port(),
|
||||||
|
Length: uint16(payloadLen + udphLen),
|
||||||
|
})
|
||||||
|
ipv4H.SetChecksum(^ipv4H.CalculateChecksum())
|
||||||
|
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_UDP, ipv4H.SourceAddress(), ipv4H.DestinationAddress(), uint16(udphLen+payloadLen))
|
||||||
|
udpH.SetChecksum(^udpH.CalculateChecksum(pseudoCsum))
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func udp6Packet(srcIPPort, dstIPPort netip.AddrPort, payloadLen int) []byte {
|
||||||
|
return udp6PacketMutateIPFields(srcIPPort, dstIPPort, payloadLen, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func udp6PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, payloadLen int, ipFn func(*header.IPv6Fields)) []byte {
|
||||||
|
totalLen := 48 + payloadLen
|
||||||
|
b := make([]byte, offset+int(totalLen), 65535)
|
||||||
|
ipv6H := header.IPv6(b[offset:])
|
||||||
|
srcAs16 := srcIPPort.Addr().As16()
|
||||||
|
dstAs16 := dstIPPort.Addr().As16()
|
||||||
|
ipFields := &header.IPv6Fields{
|
||||||
|
SrcAddr: tcpip.AddrFromSlice(srcAs16[:]),
|
||||||
|
DstAddr: tcpip.AddrFromSlice(dstAs16[:]),
|
||||||
|
TransportProtocol: unix.IPPROTO_UDP,
|
||||||
|
HopLimit: 64,
|
||||||
|
PayloadLength: uint16(payloadLen + udphLen),
|
||||||
|
}
|
||||||
|
if ipFn != nil {
|
||||||
|
ipFn(ipFields)
|
||||||
|
}
|
||||||
|
ipv6H.Encode(ipFields)
|
||||||
|
udpH := header.UDP(b[offset+40:])
|
||||||
|
udpH.Encode(&header.UDPFields{
|
||||||
|
SrcPort: srcIPPort.Port(),
|
||||||
|
DstPort: dstIPPort.Port(),
|
||||||
|
Length: uint16(payloadLen + udphLen),
|
||||||
|
})
|
||||||
|
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_UDP, ipv6H.SourceAddress(), ipv6H.DestinationAddress(), uint16(udphLen+payloadLen))
|
||||||
|
udpH.SetChecksum(^udpH.CalculateChecksum(pseudoCsum))
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func udp4Packet(srcIPPort, dstIPPort netip.AddrPort, payloadLen int) []byte {
|
||||||
|
return udp4PacketMutateIPFields(srcIPPort, dstIPPort, payloadLen, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func tcp4PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32, ipFn func(*header.IPv4Fields)) []byte {
|
||||||
|
totalLen := 40 + segmentSize
|
||||||
|
b := make([]byte, offset+int(totalLen), 65535)
|
||||||
|
ipv4H := header.IPv4(b[offset:])
|
||||||
|
srcAs4 := srcIPPort.Addr().As4()
|
||||||
|
dstAs4 := dstIPPort.Addr().As4()
|
||||||
|
ipFields := &header.IPv4Fields{
|
||||||
|
SrcAddr: tcpip.AddrFromSlice(srcAs4[:]),
|
||||||
|
DstAddr: tcpip.AddrFromSlice(dstAs4[:]),
|
||||||
|
Protocol: unix.IPPROTO_TCP,
|
||||||
|
TTL: 64,
|
||||||
|
TotalLength: uint16(totalLen),
|
||||||
|
}
|
||||||
|
if ipFn != nil {
|
||||||
|
ipFn(ipFields)
|
||||||
|
}
|
||||||
|
ipv4H.Encode(ipFields)
|
||||||
|
tcpH := header.TCP(b[offset+20:])
|
||||||
|
tcpH.Encode(&header.TCPFields{
|
||||||
|
SrcPort: srcIPPort.Port(),
|
||||||
|
DstPort: dstIPPort.Port(),
|
||||||
|
SeqNum: seq,
|
||||||
|
AckNum: 1,
|
||||||
|
DataOffset: 20,
|
||||||
|
Flags: flags,
|
||||||
|
WindowSize: 3000,
|
||||||
|
})
|
||||||
|
ipv4H.SetChecksum(^ipv4H.CalculateChecksum())
|
||||||
|
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_TCP, ipv4H.SourceAddress(), ipv4H.DestinationAddress(), uint16(20+segmentSize))
|
||||||
|
tcpH.SetChecksum(^tcpH.CalculateChecksum(pseudoCsum))
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func tcp4Packet(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32) []byte {
|
||||||
|
return tcp4PacketMutateIPFields(srcIPPort, dstIPPort, flags, segmentSize, seq, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func tcp6PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32, ipFn func(*header.IPv6Fields)) []byte {
|
||||||
|
totalLen := 60 + segmentSize
|
||||||
|
b := make([]byte, offset+int(totalLen), 65535)
|
||||||
|
ipv6H := header.IPv6(b[offset:])
|
||||||
|
srcAs16 := srcIPPort.Addr().As16()
|
||||||
|
dstAs16 := dstIPPort.Addr().As16()
|
||||||
|
ipFields := &header.IPv6Fields{
|
||||||
|
SrcAddr: tcpip.AddrFromSlice(srcAs16[:]),
|
||||||
|
DstAddr: tcpip.AddrFromSlice(dstAs16[:]),
|
||||||
|
TransportProtocol: unix.IPPROTO_TCP,
|
||||||
|
HopLimit: 64,
|
||||||
|
PayloadLength: uint16(segmentSize + 20),
|
||||||
|
}
|
||||||
|
if ipFn != nil {
|
||||||
|
ipFn(ipFields)
|
||||||
|
}
|
||||||
|
ipv6H.Encode(ipFields)
|
||||||
|
tcpH := header.TCP(b[offset+40:])
|
||||||
|
tcpH.Encode(&header.TCPFields{
|
||||||
|
SrcPort: srcIPPort.Port(),
|
||||||
|
DstPort: dstIPPort.Port(),
|
||||||
|
SeqNum: seq,
|
||||||
|
AckNum: 1,
|
||||||
|
DataOffset: 20,
|
||||||
|
Flags: flags,
|
||||||
|
WindowSize: 3000,
|
||||||
|
})
|
||||||
|
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_TCP, ipv6H.SourceAddress(), ipv6H.DestinationAddress(), uint16(20+segmentSize))
|
||||||
|
tcpH.SetChecksum(^tcpH.CalculateChecksum(pseudoCsum))
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func tcp6Packet(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32) []byte {
|
||||||
|
return tcp6PacketMutateIPFields(srcIPPort, dstIPPort, flags, segmentSize, seq, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_handleVirtioRead(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
hdr virtioNetHdr
|
||||||
|
pktIn []byte
|
||||||
|
wantLens []int
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"tcp4",
|
||||||
|
virtioNetHdr{
|
||||||
|
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
||||||
|
gsoType: unix.VIRTIO_NET_HDR_GSO_TCPV4,
|
||||||
|
gsoSize: 100,
|
||||||
|
hdrLen: 40,
|
||||||
|
csumStart: 20,
|
||||||
|
csumOffset: 16,
|
||||||
|
},
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck|header.TCPFlagPsh, 200, 1),
|
||||||
|
[]int{140, 140},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tcp6",
|
||||||
|
virtioNetHdr{
|
||||||
|
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
||||||
|
gsoType: unix.VIRTIO_NET_HDR_GSO_TCPV6,
|
||||||
|
gsoSize: 100,
|
||||||
|
hdrLen: 60,
|
||||||
|
csumStart: 40,
|
||||||
|
csumOffset: 16,
|
||||||
|
},
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck|header.TCPFlagPsh, 200, 1),
|
||||||
|
[]int{160, 160},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp4",
|
||||||
|
virtioNetHdr{
|
||||||
|
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
||||||
|
gsoType: unix.VIRTIO_NET_HDR_GSO_UDP_L4,
|
||||||
|
gsoSize: 100,
|
||||||
|
hdrLen: 28,
|
||||||
|
csumStart: 20,
|
||||||
|
csumOffset: 6,
|
||||||
|
},
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 200),
|
||||||
|
[]int{128, 128},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp6",
|
||||||
|
virtioNetHdr{
|
||||||
|
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
||||||
|
gsoType: unix.VIRTIO_NET_HDR_GSO_UDP_L4,
|
||||||
|
gsoSize: 100,
|
||||||
|
hdrLen: 48,
|
||||||
|
csumStart: 40,
|
||||||
|
csumOffset: 6,
|
||||||
|
},
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 200),
|
||||||
|
[]int{148, 148},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
out := make([][]byte, conn.IdealBatchSize)
|
||||||
|
sizes := make([]int, conn.IdealBatchSize)
|
||||||
|
for i := range out {
|
||||||
|
out[i] = make([]byte, 65535)
|
||||||
|
}
|
||||||
|
tt.hdr.encode(tt.pktIn)
|
||||||
|
n, err := handleVirtioRead(tt.pktIn, out, sizes, offset)
|
||||||
|
if err != nil {
|
||||||
|
if tt.wantErr {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Fatalf("got err: %v", err)
|
||||||
|
}
|
||||||
|
if n != len(tt.wantLens) {
|
||||||
|
t.Fatalf("got %d packets, wanted %d", n, len(tt.wantLens))
|
||||||
|
}
|
||||||
|
for i := range tt.wantLens {
|
||||||
|
if tt.wantLens[i] != sizes[i] {
|
||||||
|
t.Fatalf("wantLens[%d]: %d != outSizes: %d", i, tt.wantLens[i], sizes[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func flipTCP4Checksum(b []byte) []byte {
|
||||||
|
at := virtioNetHdrLen + 20 + 16 // 20 byte ipv4 header; tcp csum offset is 16
|
||||||
|
b[at] ^= 0xFF
|
||||||
|
b[at+1] ^= 0xFF
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func flipUDP4Checksum(b []byte) []byte {
|
||||||
|
at := virtioNetHdrLen + 20 + 6 // 20 byte ipv4 header; udp csum offset is 6
|
||||||
|
b[at] ^= 0xFF
|
||||||
|
b[at+1] ^= 0xFF
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func Fuzz_handleGRO(f *testing.F) {
|
||||||
|
pkt0 := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)
|
||||||
|
pkt1 := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101)
|
||||||
|
pkt2 := tcp4Packet(ip4PortA, ip4PortC, header.TCPFlagAck, 100, 201)
|
||||||
|
pkt3 := tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1)
|
||||||
|
pkt4 := tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101)
|
||||||
|
pkt5 := tcp6Packet(ip6PortA, ip6PortC, header.TCPFlagAck, 100, 201)
|
||||||
|
pkt6 := udp4Packet(ip4PortA, ip4PortB, 100)
|
||||||
|
pkt7 := udp4Packet(ip4PortA, ip4PortB, 100)
|
||||||
|
pkt8 := udp4Packet(ip4PortA, ip4PortC, 100)
|
||||||
|
pkt9 := udp6Packet(ip6PortA, ip6PortB, 100)
|
||||||
|
pkt10 := udp6Packet(ip6PortA, ip6PortB, 100)
|
||||||
|
pkt11 := udp6Packet(ip6PortA, ip6PortC, 100)
|
||||||
|
f.Add(pkt0, pkt1, pkt2, pkt3, pkt4, pkt5, pkt6, pkt7, pkt8, pkt9, pkt10, pkt11, true, offset)
|
||||||
|
f.Fuzz(func(t *testing.T, pkt0, pkt1, pkt2, pkt3, pkt4, pkt5, pkt6, pkt7, pkt8, pkt9, pkt10, pkt11 []byte, canUDPGRO bool, offset int) {
|
||||||
|
pkts := [][]byte{pkt0, pkt1, pkt2, pkt3, pkt4, pkt5, pkt6, pkt7, pkt8, pkt9, pkt10, pkt11}
|
||||||
|
toWrite := make([]int, 0, len(pkts))
|
||||||
|
handleGRO(pkts, offset, newTCPGROTable(), newUDPGROTable(), canUDPGRO, &toWrite)
|
||||||
|
if len(toWrite) > len(pkts) {
|
||||||
|
t.Errorf("len(toWrite): %d > len(pkts): %d", len(toWrite), len(pkts))
|
||||||
|
}
|
||||||
|
seenWriteI := make(map[int]bool)
|
||||||
|
for _, writeI := range toWrite {
|
||||||
|
if writeI < 0 || writeI > len(pkts)-1 {
|
||||||
|
t.Errorf("toWrite value (%d) outside bounds of len(pkts): %d", writeI, len(pkts))
|
||||||
|
}
|
||||||
|
if seenWriteI[writeI] {
|
||||||
|
t.Errorf("duplicate toWrite value: %d", writeI)
|
||||||
|
}
|
||||||
|
seenWriteI[writeI] = true
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_handleGRO(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
pktsIn [][]byte
|
||||||
|
canUDPGRO bool
|
||||||
|
wantToWrite []int
|
||||||
|
wantLens []int
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"multiple protocols and flows",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // tcp4 flow 1
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100), // udp4 flow 1
|
||||||
|
udp4Packet(ip4PortA, ip4PortC, 100), // udp4 flow 2
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // tcp4 flow 1
|
||||||
|
tcp4Packet(ip4PortA, ip4PortC, header.TCPFlagAck, 100, 201), // tcp4 flow 2
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1), // tcp6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101), // tcp6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortC, header.TCPFlagAck, 100, 201), // tcp6 flow 2
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100), // udp4 flow 1
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100), // udp6 flow 1
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100), // udp6 flow 1
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 4, 5, 7, 9},
|
||||||
|
[]int{240, 228, 128, 140, 260, 160, 248},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"multiple protocols and flows no UDP GRO",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // tcp4 flow 1
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100), // udp4 flow 1
|
||||||
|
udp4Packet(ip4PortA, ip4PortC, 100), // udp4 flow 2
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // tcp4 flow 1
|
||||||
|
tcp4Packet(ip4PortA, ip4PortC, header.TCPFlagAck, 100, 201), // tcp4 flow 2
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1), // tcp6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101), // tcp6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortC, header.TCPFlagAck, 100, 201), // tcp6 flow 2
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100), // udp4 flow 1
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100), // udp6 flow 1
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100), // udp6 flow 1
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
[]int{0, 1, 2, 4, 5, 7, 8, 9, 10},
|
||||||
|
[]int{240, 128, 128, 140, 260, 160, 128, 148, 148},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"PSH interleaved",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // v4 flow 1
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck|header.TCPFlagPsh, 100, 101), // v4 flow 1
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 301), // v4 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1), // v6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck|header.TCPFlagPsh, 100, 101), // v6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 201), // v6 flow 1
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 301), // v6 flow 1
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 2, 4, 6},
|
||||||
|
[]int{240, 240, 260, 260},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"coalesceItemInvalidCSum",
|
||||||
|
[][]byte{
|
||||||
|
flipTCP4Checksum(tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)), // v4 flow 1 seq 1 len 100
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // v4 flow 1 seq 101 len 100
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1 seq 201 len 100
|
||||||
|
flipUDP4Checksum(udp4Packet(ip4PortA, ip4PortB, 100)),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 3, 4},
|
||||||
|
[]int{140, 240, 128, 228},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"out of order",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // v4 flow 1 seq 101 len 100
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // v4 flow 1 seq 1 len 100
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1 seq 201 len 100
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0},
|
||||||
|
[]int{340},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"unequal TTL",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
||||||
|
fields.TTL++
|
||||||
|
}),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
udp4PacketMutateIPFields(ip4PortA, ip4PortB, 100, func(fields *header.IPv4Fields) {
|
||||||
|
fields.TTL++
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{140, 140, 128, 128},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"unequal ToS",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
||||||
|
fields.TOS++
|
||||||
|
}),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
udp4PacketMutateIPFields(ip4PortA, ip4PortB, 100, func(fields *header.IPv4Fields) {
|
||||||
|
fields.TOS++
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{140, 140, 128, 128},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"unequal flags more fragments set",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
||||||
|
fields.Flags = 1
|
||||||
|
}),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
udp4PacketMutateIPFields(ip4PortA, ip4PortB, 100, func(fields *header.IPv4Fields) {
|
||||||
|
fields.Flags = 1
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{140, 140, 128, 128},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"unequal flags DF set",
|
||||||
|
[][]byte{
|
||||||
|
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
||||||
|
fields.Flags = 2
|
||||||
|
}),
|
||||||
|
udp4Packet(ip4PortA, ip4PortB, 100),
|
||||||
|
udp4PacketMutateIPFields(ip4PortA, ip4PortB, 100, func(fields *header.IPv4Fields) {
|
||||||
|
fields.Flags = 2
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{140, 140, 128, 128},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ipv6 unequal hop limit",
|
||||||
|
[][]byte{
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp6PacketMutateIPFields(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv6Fields) {
|
||||||
|
fields.HopLimit++
|
||||||
|
}),
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100),
|
||||||
|
udp6PacketMutateIPFields(ip6PortA, ip6PortB, 100, func(fields *header.IPv6Fields) {
|
||||||
|
fields.HopLimit++
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{160, 160, 148, 148},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ipv6 unequal traffic class",
|
||||||
|
[][]byte{
|
||||||
|
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1),
|
||||||
|
tcp6PacketMutateIPFields(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv6Fields) {
|
||||||
|
fields.TrafficClass++
|
||||||
|
}),
|
||||||
|
udp6Packet(ip6PortA, ip6PortB, 100),
|
||||||
|
udp6PacketMutateIPFields(ip6PortA, ip6PortB, 100, func(fields *header.IPv6Fields) {
|
||||||
|
fields.TrafficClass++
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
[]int{0, 1, 2, 3},
|
||||||
|
[]int{160, 160, 148, 148},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
toWrite := make([]int, 0, len(tt.pktsIn))
|
||||||
|
err := handleGRO(tt.pktsIn, offset, newTCPGROTable(), newUDPGROTable(), tt.canUDPGRO, &toWrite)
|
||||||
|
if err != nil {
|
||||||
|
if tt.wantErr {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Fatalf("got err: %v", err)
|
||||||
|
}
|
||||||
|
if len(toWrite) != len(tt.wantToWrite) {
|
||||||
|
t.Fatalf("got %d packets, wanted %d", len(toWrite), len(tt.wantToWrite))
|
||||||
|
}
|
||||||
|
for i, pktI := range tt.wantToWrite {
|
||||||
|
if tt.wantToWrite[i] != toWrite[i] {
|
||||||
|
t.Fatalf("wantToWrite[%d]: %d != toWrite: %d", i, tt.wantToWrite[i], toWrite[i])
|
||||||
|
}
|
||||||
|
if tt.wantLens[i] != len(tt.pktsIn[pktI][offset:]) {
|
||||||
|
t.Errorf("wanted len %d packet at %d, got: %d", tt.wantLens[i], i, len(tt.pktsIn[pktI][offset:]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_packetIsGROCandidate(t *testing.T) {
|
||||||
|
tcp4 := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)[virtioNetHdrLen:]
|
||||||
|
tcp4TooShort := tcp4[:39]
|
||||||
|
ip4InvalidHeaderLen := make([]byte, len(tcp4))
|
||||||
|
copy(ip4InvalidHeaderLen, tcp4)
|
||||||
|
ip4InvalidHeaderLen[0] = 0x46
|
||||||
|
ip4InvalidProtocol := make([]byte, len(tcp4))
|
||||||
|
copy(ip4InvalidProtocol, tcp4)
|
||||||
|
ip4InvalidProtocol[9] = unix.IPPROTO_GRE
|
||||||
|
|
||||||
|
tcp6 := tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1)[virtioNetHdrLen:]
|
||||||
|
tcp6TooShort := tcp6[:59]
|
||||||
|
ip6InvalidProtocol := make([]byte, len(tcp6))
|
||||||
|
copy(ip6InvalidProtocol, tcp6)
|
||||||
|
ip6InvalidProtocol[6] = unix.IPPROTO_GRE
|
||||||
|
|
||||||
|
udp4 := udp4Packet(ip4PortA, ip4PortB, 100)[virtioNetHdrLen:]
|
||||||
|
udp4TooShort := udp4[:27]
|
||||||
|
|
||||||
|
udp6 := udp6Packet(ip6PortA, ip6PortB, 100)[virtioNetHdrLen:]
|
||||||
|
udp6TooShort := udp6[:47]
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
b []byte
|
||||||
|
canUDPGRO bool
|
||||||
|
want groCandidateType
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"tcp4",
|
||||||
|
tcp4,
|
||||||
|
true,
|
||||||
|
tcp4GROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tcp6",
|
||||||
|
tcp6,
|
||||||
|
true,
|
||||||
|
tcp6GROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp4",
|
||||||
|
udp4,
|
||||||
|
true,
|
||||||
|
udp4GROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp4 no support",
|
||||||
|
udp4,
|
||||||
|
false,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp6",
|
||||||
|
udp6,
|
||||||
|
true,
|
||||||
|
udp6GROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp6 no support",
|
||||||
|
udp6,
|
||||||
|
false,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp4 too short",
|
||||||
|
udp4TooShort,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"udp6 too short",
|
||||||
|
udp6TooShort,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tcp4 too short",
|
||||||
|
tcp4TooShort,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tcp6 too short",
|
||||||
|
tcp6TooShort,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"invalid IP version",
|
||||||
|
[]byte{0x00},
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"invalid IP header len",
|
||||||
|
ip4InvalidHeaderLen,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ip4 invalid protocol",
|
||||||
|
ip4InvalidProtocol,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ip6 invalid protocol",
|
||||||
|
ip6InvalidProtocol,
|
||||||
|
true,
|
||||||
|
notGROCandidate,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := packetIsGROCandidate(tt.b, tt.canUDPGRO); got != tt.want {
|
||||||
|
t.Errorf("packetIsGROCandidate() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_udpPacketsCanCoalesce(t *testing.T) {
|
||||||
|
udp4a := udp4Packet(ip4PortA, ip4PortB, 100)
|
||||||
|
udp4b := udp4Packet(ip4PortA, ip4PortB, 100)
|
||||||
|
udp4c := udp4Packet(ip4PortA, ip4PortB, 110)
|
||||||
|
|
||||||
|
type args struct {
|
||||||
|
pkt []byte
|
||||||
|
iphLen uint8
|
||||||
|
gsoSize uint16
|
||||||
|
item udpGROItem
|
||||||
|
bufs [][]byte
|
||||||
|
bufsOffset int
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args args
|
||||||
|
want canCoalesce
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"coalesceAppend equal gso",
|
||||||
|
args{
|
||||||
|
pkt: udp4a[offset:],
|
||||||
|
iphLen: 20,
|
||||||
|
gsoSize: 100,
|
||||||
|
item: udpGROItem{
|
||||||
|
gsoSize: 100,
|
||||||
|
iphLen: 20,
|
||||||
|
},
|
||||||
|
bufs: [][]byte{
|
||||||
|
udp4a,
|
||||||
|
udp4b,
|
||||||
|
},
|
||||||
|
bufsOffset: offset,
|
||||||
|
},
|
||||||
|
coalesceAppend,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"coalesceAppend smaller gso",
|
||||||
|
args{
|
||||||
|
pkt: udp4a[offset : len(udp4a)-90],
|
||||||
|
iphLen: 20,
|
||||||
|
gsoSize: 10,
|
||||||
|
item: udpGROItem{
|
||||||
|
gsoSize: 100,
|
||||||
|
iphLen: 20,
|
||||||
|
},
|
||||||
|
bufs: [][]byte{
|
||||||
|
udp4a,
|
||||||
|
udp4b,
|
||||||
|
},
|
||||||
|
bufsOffset: offset,
|
||||||
|
},
|
||||||
|
coalesceAppend,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"coalesceUnavailable smaller gso previously appended",
|
||||||
|
args{
|
||||||
|
pkt: udp4a[offset:],
|
||||||
|
iphLen: 20,
|
||||||
|
gsoSize: 100,
|
||||||
|
item: udpGROItem{
|
||||||
|
gsoSize: 100,
|
||||||
|
iphLen: 20,
|
||||||
|
},
|
||||||
|
bufs: [][]byte{
|
||||||
|
udp4c,
|
||||||
|
udp4b,
|
||||||
|
},
|
||||||
|
bufsOffset: offset,
|
||||||
|
},
|
||||||
|
coalesceUnavailable,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"coalesceUnavailable larger following smaller",
|
||||||
|
args{
|
||||||
|
pkt: udp4c[offset:],
|
||||||
|
iphLen: 20,
|
||||||
|
gsoSize: 110,
|
||||||
|
item: udpGROItem{
|
||||||
|
gsoSize: 100,
|
||||||
|
iphLen: 20,
|
||||||
|
},
|
||||||
|
bufs: [][]byte{
|
||||||
|
udp4a,
|
||||||
|
udp4c,
|
||||||
|
},
|
||||||
|
bufsOffset: offset,
|
||||||
|
},
|
||||||
|
coalesceUnavailable,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := udpPacketsCanCoalesce(tt.args.pkt, tt.args.iphLen, tt.args.gsoSize, tt.args.item, tt.args.bufs, tt.args.bufsOffset); got != tt.want {
|
||||||
|
t.Errorf("udpPacketsCanCoalesce() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -1,411 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package tun
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/netip"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
"golang.zx2c4.com/wireguard/conn"
|
|
||||||
"gvisor.dev/gvisor/pkg/tcpip"
|
|
||||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
offset = virtioNetHdrLen
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
ip4PortA = netip.MustParseAddrPort("192.0.2.1:1")
|
|
||||||
ip4PortB = netip.MustParseAddrPort("192.0.2.2:1")
|
|
||||||
ip4PortC = netip.MustParseAddrPort("192.0.2.3:1")
|
|
||||||
ip6PortA = netip.MustParseAddrPort("[2001:db8::1]:1")
|
|
||||||
ip6PortB = netip.MustParseAddrPort("[2001:db8::2]:1")
|
|
||||||
ip6PortC = netip.MustParseAddrPort("[2001:db8::3]:1")
|
|
||||||
)
|
|
||||||
|
|
||||||
func tcp4PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32, ipFn func(*header.IPv4Fields)) []byte {
|
|
||||||
totalLen := 40 + segmentSize
|
|
||||||
b := make([]byte, offset+int(totalLen), 65535)
|
|
||||||
ipv4H := header.IPv4(b[offset:])
|
|
||||||
srcAs4 := srcIPPort.Addr().As4()
|
|
||||||
dstAs4 := dstIPPort.Addr().As4()
|
|
||||||
ipFields := &header.IPv4Fields{
|
|
||||||
SrcAddr: tcpip.AddrFromSlice(srcAs4[:]),
|
|
||||||
DstAddr: tcpip.AddrFromSlice(dstAs4[:]),
|
|
||||||
Protocol: unix.IPPROTO_TCP,
|
|
||||||
TTL: 64,
|
|
||||||
TotalLength: uint16(totalLen),
|
|
||||||
}
|
|
||||||
if ipFn != nil {
|
|
||||||
ipFn(ipFields)
|
|
||||||
}
|
|
||||||
ipv4H.Encode(ipFields)
|
|
||||||
tcpH := header.TCP(b[offset+20:])
|
|
||||||
tcpH.Encode(&header.TCPFields{
|
|
||||||
SrcPort: srcIPPort.Port(),
|
|
||||||
DstPort: dstIPPort.Port(),
|
|
||||||
SeqNum: seq,
|
|
||||||
AckNum: 1,
|
|
||||||
DataOffset: 20,
|
|
||||||
Flags: flags,
|
|
||||||
WindowSize: 3000,
|
|
||||||
})
|
|
||||||
ipv4H.SetChecksum(^ipv4H.CalculateChecksum())
|
|
||||||
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_TCP, ipv4H.SourceAddress(), ipv4H.DestinationAddress(), uint16(20+segmentSize))
|
|
||||||
tcpH.SetChecksum(^tcpH.CalculateChecksum(pseudoCsum))
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func tcp4Packet(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32) []byte {
|
|
||||||
return tcp4PacketMutateIPFields(srcIPPort, dstIPPort, flags, segmentSize, seq, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func tcp6PacketMutateIPFields(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32, ipFn func(*header.IPv6Fields)) []byte {
|
|
||||||
totalLen := 60 + segmentSize
|
|
||||||
b := make([]byte, offset+int(totalLen), 65535)
|
|
||||||
ipv6H := header.IPv6(b[offset:])
|
|
||||||
srcAs16 := srcIPPort.Addr().As16()
|
|
||||||
dstAs16 := dstIPPort.Addr().As16()
|
|
||||||
ipFields := &header.IPv6Fields{
|
|
||||||
SrcAddr: tcpip.AddrFromSlice(srcAs16[:]),
|
|
||||||
DstAddr: tcpip.AddrFromSlice(dstAs16[:]),
|
|
||||||
TransportProtocol: unix.IPPROTO_TCP,
|
|
||||||
HopLimit: 64,
|
|
||||||
PayloadLength: uint16(segmentSize + 20),
|
|
||||||
}
|
|
||||||
if ipFn != nil {
|
|
||||||
ipFn(ipFields)
|
|
||||||
}
|
|
||||||
ipv6H.Encode(ipFields)
|
|
||||||
tcpH := header.TCP(b[offset+40:])
|
|
||||||
tcpH.Encode(&header.TCPFields{
|
|
||||||
SrcPort: srcIPPort.Port(),
|
|
||||||
DstPort: dstIPPort.Port(),
|
|
||||||
SeqNum: seq,
|
|
||||||
AckNum: 1,
|
|
||||||
DataOffset: 20,
|
|
||||||
Flags: flags,
|
|
||||||
WindowSize: 3000,
|
|
||||||
})
|
|
||||||
pseudoCsum := header.PseudoHeaderChecksum(unix.IPPROTO_TCP, ipv6H.SourceAddress(), ipv6H.DestinationAddress(), uint16(20+segmentSize))
|
|
||||||
tcpH.SetChecksum(^tcpH.CalculateChecksum(pseudoCsum))
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func tcp6Packet(srcIPPort, dstIPPort netip.AddrPort, flags header.TCPFlags, segmentSize, seq uint32) []byte {
|
|
||||||
return tcp6PacketMutateIPFields(srcIPPort, dstIPPort, flags, segmentSize, seq, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_handleVirtioRead(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
hdr virtioNetHdr
|
|
||||||
pktIn []byte
|
|
||||||
wantLens []int
|
|
||||||
wantErr bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
"tcp4",
|
|
||||||
virtioNetHdr{
|
|
||||||
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
|
||||||
gsoType: unix.VIRTIO_NET_HDR_GSO_TCPV4,
|
|
||||||
gsoSize: 100,
|
|
||||||
hdrLen: 40,
|
|
||||||
csumStart: 20,
|
|
||||||
csumOffset: 16,
|
|
||||||
},
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck|header.TCPFlagPsh, 200, 1),
|
|
||||||
[]int{140, 140},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp6",
|
|
||||||
virtioNetHdr{
|
|
||||||
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
|
||||||
gsoType: unix.VIRTIO_NET_HDR_GSO_TCPV6,
|
|
||||||
gsoSize: 100,
|
|
||||||
hdrLen: 60,
|
|
||||||
csumStart: 40,
|
|
||||||
csumOffset: 16,
|
|
||||||
},
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck|header.TCPFlagPsh, 200, 1),
|
|
||||||
[]int{160, 160},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
out := make([][]byte, conn.IdealBatchSize)
|
|
||||||
sizes := make([]int, conn.IdealBatchSize)
|
|
||||||
for i := range out {
|
|
||||||
out[i] = make([]byte, 65535)
|
|
||||||
}
|
|
||||||
tt.hdr.encode(tt.pktIn)
|
|
||||||
n, err := handleVirtioRead(tt.pktIn, out, sizes, offset)
|
|
||||||
if err != nil {
|
|
||||||
if tt.wantErr {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
t.Fatalf("got err: %v", err)
|
|
||||||
}
|
|
||||||
if n != len(tt.wantLens) {
|
|
||||||
t.Fatalf("got %d packets, wanted %d", n, len(tt.wantLens))
|
|
||||||
}
|
|
||||||
for i := range tt.wantLens {
|
|
||||||
if tt.wantLens[i] != sizes[i] {
|
|
||||||
t.Fatalf("wantLens[%d]: %d != outSizes: %d", i, tt.wantLens[i], sizes[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func flipTCP4Checksum(b []byte) []byte {
|
|
||||||
at := virtioNetHdrLen + 20 + 16 // 20 byte ipv4 header; tcp csum offset is 16
|
|
||||||
b[at] ^= 0xFF
|
|
||||||
b[at+1] ^= 0xFF
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func Fuzz_handleGRO(f *testing.F) {
|
|
||||||
pkt0 := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)
|
|
||||||
pkt1 := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101)
|
|
||||||
pkt2 := tcp4Packet(ip4PortA, ip4PortC, header.TCPFlagAck, 100, 201)
|
|
||||||
pkt3 := tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1)
|
|
||||||
pkt4 := tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101)
|
|
||||||
pkt5 := tcp6Packet(ip6PortA, ip6PortC, header.TCPFlagAck, 100, 201)
|
|
||||||
f.Add(pkt0, pkt1, pkt2, pkt3, pkt4, pkt5, offset)
|
|
||||||
f.Fuzz(func(t *testing.T, pkt0, pkt1, pkt2, pkt3, pkt4, pkt5 []byte, offset int) {
|
|
||||||
pkts := [][]byte{pkt0, pkt1, pkt2, pkt3, pkt4, pkt5}
|
|
||||||
toWrite := make([]int, 0, len(pkts))
|
|
||||||
handleGRO(pkts, offset, newTCPGROTable(), newTCPGROTable(), &toWrite)
|
|
||||||
if len(toWrite) > len(pkts) {
|
|
||||||
t.Errorf("len(toWrite): %d > len(pkts): %d", len(toWrite), len(pkts))
|
|
||||||
}
|
|
||||||
seenWriteI := make(map[int]bool)
|
|
||||||
for _, writeI := range toWrite {
|
|
||||||
if writeI < 0 || writeI > len(pkts)-1 {
|
|
||||||
t.Errorf("toWrite value (%d) outside bounds of len(pkts): %d", writeI, len(pkts))
|
|
||||||
}
|
|
||||||
if seenWriteI[writeI] {
|
|
||||||
t.Errorf("duplicate toWrite value: %d", writeI)
|
|
||||||
}
|
|
||||||
seenWriteI[writeI] = true
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_handleGRO(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
pktsIn [][]byte
|
|
||||||
wantToWrite []int
|
|
||||||
wantLens []int
|
|
||||||
wantErr bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
"multiple flows",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // v4 flow 1
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // v4 flow 1
|
|
||||||
tcp4Packet(ip4PortA, ip4PortC, header.TCPFlagAck, 100, 201), // v4 flow 2
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1), // v6 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101), // v6 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortC, header.TCPFlagAck, 100, 201), // v6 flow 2
|
|
||||||
},
|
|
||||||
[]int{0, 2, 3, 5},
|
|
||||||
[]int{240, 140, 260, 160},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"PSH interleaved",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // v4 flow 1
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck|header.TCPFlagPsh, 100, 101), // v4 flow 1
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 301), // v4 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1), // v6 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck|header.TCPFlagPsh, 100, 101), // v6 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 201), // v6 flow 1
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 301), // v6 flow 1
|
|
||||||
},
|
|
||||||
[]int{0, 2, 4, 6},
|
|
||||||
[]int{240, 240, 260, 260},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"coalesceItemInvalidCSum",
|
|
||||||
[][]byte{
|
|
||||||
flipTCP4Checksum(tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)), // v4 flow 1 seq 1 len 100
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // v4 flow 1 seq 101 len 100
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1 seq 201 len 100
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{140, 240},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"out of order",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101), // v4 flow 1 seq 101 len 100
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1), // v4 flow 1 seq 1 len 100
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 201), // v4 flow 1 seq 201 len 100
|
|
||||||
},
|
|
||||||
[]int{0},
|
|
||||||
[]int{340},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp4 unequal TTL",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
|
||||||
fields.TTL++
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{140, 140},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp4 unequal ToS",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
|
||||||
fields.TOS++
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{140, 140},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp4 unequal flags more fragments set",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
|
||||||
fields.Flags = 1
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{140, 140},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp4 unequal flags DF set",
|
|
||||||
[][]byte{
|
|
||||||
tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp4PacketMutateIPFields(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv4Fields) {
|
|
||||||
fields.Flags = 2
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{140, 140},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp6 unequal hop limit",
|
|
||||||
[][]byte{
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp6PacketMutateIPFields(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv6Fields) {
|
|
||||||
fields.HopLimit++
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{160, 160},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tcp6 unequal traffic class",
|
|
||||||
[][]byte{
|
|
||||||
tcp6Packet(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 1),
|
|
||||||
tcp6PacketMutateIPFields(ip6PortA, ip6PortB, header.TCPFlagAck, 100, 101, func(fields *header.IPv6Fields) {
|
|
||||||
fields.TrafficClass++
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
[]int{0, 1},
|
|
||||||
[]int{160, 160},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
toWrite := make([]int, 0, len(tt.pktsIn))
|
|
||||||
err := handleGRO(tt.pktsIn, offset, newTCPGROTable(), newTCPGROTable(), &toWrite)
|
|
||||||
if err != nil {
|
|
||||||
if tt.wantErr {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
t.Fatalf("got err: %v", err)
|
|
||||||
}
|
|
||||||
if len(toWrite) != len(tt.wantToWrite) {
|
|
||||||
t.Fatalf("got %d packets, wanted %d", len(toWrite), len(tt.wantToWrite))
|
|
||||||
}
|
|
||||||
for i, pktI := range tt.wantToWrite {
|
|
||||||
if tt.wantToWrite[i] != toWrite[i] {
|
|
||||||
t.Fatalf("wantToWrite[%d]: %d != toWrite: %d", i, tt.wantToWrite[i], toWrite[i])
|
|
||||||
}
|
|
||||||
if tt.wantLens[i] != len(tt.pktsIn[pktI][offset:]) {
|
|
||||||
t.Errorf("wanted len %d packet at %d, got: %d", tt.wantLens[i], i, len(tt.pktsIn[pktI][offset:]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_isTCP4NoIPOptions(t *testing.T) {
|
|
||||||
valid := tcp4Packet(ip4PortA, ip4PortB, header.TCPFlagAck, 100, 1)[virtioNetHdrLen:]
|
|
||||||
invalidLen := valid[:39]
|
|
||||||
invalidHeaderLen := make([]byte, len(valid))
|
|
||||||
copy(invalidHeaderLen, valid)
|
|
||||||
invalidHeaderLen[0] = 0x46
|
|
||||||
invalidProtocol := make([]byte, len(valid))
|
|
||||||
copy(invalidProtocol, valid)
|
|
||||||
invalidProtocol[9] = unix.IPPROTO_TCP + 1
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
b []byte
|
|
||||||
want bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
"valid",
|
|
||||||
valid,
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"invalid length",
|
|
||||||
invalidLen,
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"invalid version",
|
|
||||||
[]byte{0x00},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"invalid header len",
|
|
||||||
invalidHeaderLen,
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"invalid protocol",
|
|
||||||
invalidProtocol,
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
if got := isTCP4NoIPOptions(tt.b); got != tt.want {
|
|
||||||
t.Errorf("isTCP4NoIPOptions() = %v, want %v", got, tt.want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
int(34)
|
|
@ -1,8 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
[]byte("0")
|
|
||||||
int(-48)
|
|
@ -38,6 +38,7 @@ type NativeTun struct {
|
|||||||
statusListenersShutdown chan struct{}
|
statusListenersShutdown chan struct{}
|
||||||
batchSize int
|
batchSize int
|
||||||
vnetHdr bool
|
vnetHdr bool
|
||||||
|
udpGSO bool
|
||||||
|
|
||||||
closeOnce sync.Once
|
closeOnce sync.Once
|
||||||
|
|
||||||
@ -48,9 +49,10 @@ type NativeTun struct {
|
|||||||
readOpMu sync.Mutex // readOpMu guards readBuff
|
readOpMu sync.Mutex // readOpMu guards readBuff
|
||||||
readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
|
readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
|
||||||
|
|
||||||
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcp4GROTable, tcp6GROTable
|
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcpGROTable
|
||||||
toWrite []int
|
toWrite []int
|
||||||
tcp4GROTable, tcp6GROTable *tcpGROTable
|
tcpGROTable *tcpGROTable
|
||||||
|
udpGROTable *udpGROTable
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tun *NativeTun) File() *os.File {
|
func (tun *NativeTun) File() *os.File {
|
||||||
@ -333,8 +335,8 @@ func (tun *NativeTun) nameSlow() (string, error) {
|
|||||||
func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
||||||
tun.writeOpMu.Lock()
|
tun.writeOpMu.Lock()
|
||||||
defer func() {
|
defer func() {
|
||||||
tun.tcp4GROTable.reset()
|
tun.tcpGROTable.reset()
|
||||||
tun.tcp6GROTable.reset()
|
tun.udpGROTable.reset()
|
||||||
tun.writeOpMu.Unlock()
|
tun.writeOpMu.Unlock()
|
||||||
}()
|
}()
|
||||||
var (
|
var (
|
||||||
@ -343,7 +345,7 @@ func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
|||||||
)
|
)
|
||||||
tun.toWrite = tun.toWrite[:0]
|
tun.toWrite = tun.toWrite[:0]
|
||||||
if tun.vnetHdr {
|
if tun.vnetHdr {
|
||||||
err := handleGRO(bufs, offset, tun.tcp4GROTable, tun.tcp6GROTable, &tun.toWrite)
|
err := handleGRO(bufs, offset, tun.tcpGROTable, tun.udpGROTable, tun.udpGSO, &tun.toWrite)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@ -394,37 +396,42 @@ func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, e
|
|||||||
sizes[0] = n
|
sizes[0] = n
|
||||||
return 1, nil
|
return 1, nil
|
||||||
}
|
}
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||||
return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
|
return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
|
||||||
}
|
}
|
||||||
|
|
||||||
ipVersion := in[0] >> 4
|
ipVersion := in[0] >> 4
|
||||||
switch ipVersion {
|
switch ipVersion {
|
||||||
case 4:
|
case 4:
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
||||||
}
|
}
|
||||||
case 6:
|
case 6:
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
|
return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(in) <= int(hdr.csumStart+12) {
|
|
||||||
return 0, errors.New("packet is too short")
|
|
||||||
}
|
|
||||||
// Don't trust hdr.hdrLen from the kernel as it can be equal to the length
|
// Don't trust hdr.hdrLen from the kernel as it can be equal to the length
|
||||||
// of the entire first packet when the kernel is handling it as part of a
|
// of the entire first packet when the kernel is handling it as part of a
|
||||||
// FORWARD path. Instead, parse the TCP header length and add it onto
|
// FORWARD path. Instead, parse the transport header length and add it onto
|
||||||
// csumStart, which is synonymous for IP header length.
|
// csumStart, which is synonymous for IP header length.
|
||||||
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||||
if tcpHLen < 20 || tcpHLen > 60 {
|
hdr.hdrLen = hdr.csumStart + 8
|
||||||
// A TCP header must be between 20 and 60 bytes in length.
|
} else {
|
||||||
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
if len(in) <= int(hdr.csumStart+12) {
|
||||||
|
return 0, errors.New("packet is too short")
|
||||||
|
}
|
||||||
|
|
||||||
|
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
||||||
|
if tcpHLen < 20 || tcpHLen > 60 {
|
||||||
|
// A TCP header must be between 20 and 60 bytes in length.
|
||||||
|
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
||||||
|
}
|
||||||
|
hdr.hdrLen = hdr.csumStart + tcpHLen
|
||||||
}
|
}
|
||||||
hdr.hdrLen = hdr.csumStart + tcpHLen
|
|
||||||
|
|
||||||
if len(in) < int(hdr.hdrLen) {
|
if len(in) < int(hdr.hdrLen) {
|
||||||
return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
|
return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
|
||||||
@ -438,7 +445,7 @@ func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, e
|
|||||||
return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
|
return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
|
||||||
}
|
}
|
||||||
|
|
||||||
return tcpTSO(in, hdr, bufs, sizes, offset)
|
return gsoSplit(in, hdr, bufs, sizes, offset, ipVersion == 6)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
|
func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
|
||||||
@ -497,7 +504,8 @@ func (tun *NativeTun) BatchSize() int {
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// TODO: support TSO with ECN bits
|
// TODO: support TSO with ECN bits
|
||||||
tunOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
tunTCPOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
||||||
|
tunUDPOffloads = unix.TUN_F_USO4 | unix.TUN_F_USO6
|
||||||
)
|
)
|
||||||
|
|
||||||
func (tun *NativeTun) initFromFlags(name string) error {
|
func (tun *NativeTun) initFromFlags(name string) error {
|
||||||
@ -519,12 +527,17 @@ func (tun *NativeTun) initFromFlags(name string) error {
|
|||||||
}
|
}
|
||||||
got := ifr.Uint16()
|
got := ifr.Uint16()
|
||||||
if got&unix.IFF_VNET_HDR != 0 {
|
if got&unix.IFF_VNET_HDR != 0 {
|
||||||
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunOffloads)
|
// tunTCPOffloads were added in Linux v2.6. We require their support
|
||||||
|
// if IFF_VNET_HDR is set.
|
||||||
|
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
tun.vnetHdr = true
|
tun.vnetHdr = true
|
||||||
tun.batchSize = conn.IdealBatchSize
|
tun.batchSize = conn.IdealBatchSize
|
||||||
|
// tunUDPOffloads were added in Linux v6.2. We do not return an
|
||||||
|
// error if they are unsupported at runtime.
|
||||||
|
tun.udpGSO = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads|tunUDPOffloads) == nil
|
||||||
} else {
|
} else {
|
||||||
tun.batchSize = 1
|
tun.batchSize = 1
|
||||||
}
|
}
|
||||||
@ -575,8 +588,8 @@ func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
|
|||||||
events: make(chan Event, 5),
|
events: make(chan Event, 5),
|
||||||
errors: make(chan error, 5),
|
errors: make(chan error, 5),
|
||||||
statusListenersShutdown: make(chan struct{}),
|
statusListenersShutdown: make(chan struct{}),
|
||||||
tcp4GROTable: newTCPGROTable(),
|
tcpGROTable: newTCPGROTable(),
|
||||||
tcp6GROTable: newTCPGROTable(),
|
udpGROTable: newUDPGROTable(),
|
||||||
toWrite: make([]int, 0, conn.IdealBatchSize),
|
toWrite: make([]int, 0, conn.IdealBatchSize),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -628,12 +641,12 @@ func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) {
|
|||||||
}
|
}
|
||||||
file := os.NewFile(uintptr(fd), "/dev/tun")
|
file := os.NewFile(uintptr(fd), "/dev/tun")
|
||||||
tun := &NativeTun{
|
tun := &NativeTun{
|
||||||
tunFile: file,
|
tunFile: file,
|
||||||
events: make(chan Event, 5),
|
events: make(chan Event, 5),
|
||||||
errors: make(chan error, 5),
|
errors: make(chan error, 5),
|
||||||
tcp4GROTable: newTCPGROTable(),
|
tcpGROTable: newTCPGROTable(),
|
||||||
tcp6GROTable: newTCPGROTable(),
|
udpGROTable: newUDPGROTable(),
|
||||||
toWrite: make([]int, 0, conn.IdealBatchSize),
|
toWrite: make([]int, 0, conn.IdealBatchSize),
|
||||||
}
|
}
|
||||||
name, err := tun.Name()
|
name, err := tun.Name()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user