895d6c23cd
$ benchstat old.txt new.txt goos: linux goarch: amd64 pkg: golang.zx2c4.com/wireguard/tun cpu: 12th Gen Intel(R) Core(TM) i5-12400 │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Checksum/64-12 10.670n ± 2% 4.769n ± 0% -55.30% (p=0.000 n=10) Checksum/128-12 19.665n ± 2% 8.032n ± 0% -59.16% (p=0.000 n=10) Checksum/256-12 37.68n ± 1% 16.06n ± 0% -57.37% (p=0.000 n=10) Checksum/512-12 76.61n ± 3% 32.13n ± 0% -58.06% (p=0.000 n=10) Checksum/1024-12 160.55n ± 4% 64.25n ± 0% -59.98% (p=0.000 n=10) Checksum/1500-12 231.05n ± 7% 94.12n ± 0% -59.26% (p=0.000 n=10) Checksum/2048-12 309.5n ± 3% 128.5n ± 0% -58.48% (p=0.000 n=10) Checksum/4096-12 603.8n ± 4% 257.2n ± 0% -57.41% (p=0.000 n=10) Checksum/8192-12 1185.0n ± 3% 515.5n ± 0% -56.50% (p=0.000 n=10) Checksum/9000-12 1328.5n ± 5% 564.8n ± 0% -57.49% (p=0.000 n=10) Checksum/9001-12 1340.5n ± 3% 564.8n ± 0% -57.87% (p=0.000 n=10) geomean 185.3n 77.99n -57.92% Reviewed-by: Adrian Dewhurst <adrian@tailscale.com> Signed-off-by: Jordan Whited <jordan@tailscale.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
119 lines
4.2 KiB
Go
119 lines
4.2 KiB
Go
package tun
|
|
|
|
import "encoding/binary"
|
|
|
|
// TODO: Explore SIMD and/or other assembly optimizations.
|
|
// TODO: Test native endian loads. See RFC 1071 section 2 part B.
|
|
func checksumNoFold(b []byte, initial uint64) uint64 {
|
|
ac := initial
|
|
|
|
for len(b) >= 128 {
|
|
ac += uint64(binary.BigEndian.Uint32(b[:4]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[32:36]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[36:40]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[40:44]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[44:48]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[48:52]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[52:56]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[56:60]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[60:64]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[64:68]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[68:72]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[72:76]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[76:80]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[80:84]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[84:88]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[88:92]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[92:96]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[96:100]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[100:104]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[104:108]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[108:112]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[112:116]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[116:120]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[120:124]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[124:128]))
|
|
b = b[128:]
|
|
}
|
|
if len(b) >= 64 {
|
|
ac += uint64(binary.BigEndian.Uint32(b[:4]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[32:36]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[36:40]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[40:44]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[44:48]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[48:52]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[52:56]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[56:60]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[60:64]))
|
|
b = b[64:]
|
|
}
|
|
if len(b) >= 32 {
|
|
ac += uint64(binary.BigEndian.Uint32(b[:4]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
|
|
b = b[32:]
|
|
}
|
|
if len(b) >= 16 {
|
|
ac += uint64(binary.BigEndian.Uint32(b[:4]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
|
|
b = b[16:]
|
|
}
|
|
if len(b) >= 8 {
|
|
ac += uint64(binary.BigEndian.Uint32(b[:4]))
|
|
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
|
|
b = b[8:]
|
|
}
|
|
if len(b) >= 4 {
|
|
ac += uint64(binary.BigEndian.Uint32(b))
|
|
b = b[4:]
|
|
}
|
|
if len(b) >= 2 {
|
|
ac += uint64(binary.BigEndian.Uint16(b))
|
|
b = b[2:]
|
|
}
|
|
if len(b) == 1 {
|
|
ac += uint64(b[0]) << 8
|
|
}
|
|
|
|
return ac
|
|
}
|
|
|
|
func checksum(b []byte, initial uint64) uint16 {
|
|
ac := checksumNoFold(b, initial)
|
|
ac = (ac >> 16) + (ac & 0xffff)
|
|
ac = (ac >> 16) + (ac & 0xffff)
|
|
ac = (ac >> 16) + (ac & 0xffff)
|
|
ac = (ac >> 16) + (ac & 0xffff)
|
|
return uint16(ac)
|
|
}
|
|
|
|
func pseudoHeaderChecksumNoFold(protocol uint8, srcAddr, dstAddr []byte, totalLen uint16) uint64 {
|
|
sum := checksumNoFold(srcAddr, 0)
|
|
sum = checksumNoFold(dstAddr, sum)
|
|
sum = checksumNoFold([]byte{0, protocol}, sum)
|
|
tmp := make([]byte, 2)
|
|
binary.BigEndian.PutUint16(tmp, totalLen)
|
|
return checksumNoFold(tmp, sum)
|
|
}
|