Improve lock retention when sending

NDIS may call MINIPORT_SEND_NET_BUFFER_LISTS from parallel threads to
queue as many packets as fast as possible. Initial implementation of
ring buffers used a spin lock to completely serialize sending packets
making it sub-optimal and burning large amount of CPU.

This commit uses locked section to allocate space for packet(s) in the
ring. It copies the packets unlocked, then it locks again to adjust the
ring tail.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2019-07-17 11:53:25 +02:00
parent 1914547ab3
commit e7fad38a07

171
wintun.c
View File

@ -28,7 +28,8 @@
/* Memory alignment of packets and rings */ /* Memory alignment of packets and rings */
#define TUN_ALIGNMENT sizeof(ULONG) #define TUN_ALIGNMENT sizeof(ULONG)
#define TUN_ALIGN(Size) (((ULONG)(Size) + (ULONG)(TUN_ALIGNMENT - 1)) & ~(ULONG)(TUN_ALIGNMENT - 1)) #define TUN_ALIGN(Size) (((ULONG)(Size) + ((ULONG)TUN_ALIGNMENT - 1)) & ~((ULONG)TUN_ALIGNMENT - 1))
#define TUN_IS_ALIGNED(Size) (!((ULONG)(Size) & ((ULONG)TUN_ALIGNMENT - 1)))
/* Maximum IP packet size */ /* Maximum IP packet size */
#define TUN_MAX_IP_PACKET_SIZE 0xFFFF #define TUN_MAX_IP_PACKET_SIZE 0xFFFF
/* Maximum packet size */ /* Maximum packet size */
@ -140,6 +141,11 @@ typedef struct _TUN_CTX
ULONG Capacity; ULONG Capacity;
KEVENT *TailMoved; KEVENT *TailMoved;
KSPIN_LOCK Lock; KSPIN_LOCK Lock;
ULONG RingTail; /* We need a private tail offset to keep track of ring allocation without disturbing the client. */
struct
{
NET_BUFFER_LIST *Head, *Tail;
} ActiveNbls;
} Send; } Send;
struct struct
@ -225,6 +231,31 @@ TunIndicateStatus(_In_ NDIS_HANDLE MiniportAdapterHandle, _In_ NDIS_MEDIA_CONNEC
NdisMIndicateStatusEx(MiniportAdapterHandle, &Indication); NdisMIndicateStatusEx(MiniportAdapterHandle, &Indication);
} }
static void
TunNblSetTailAndMarkActive(_Inout_ NET_BUFFER_LIST *Nbl, _In_ ULONG Tail)
{
ASSERT(TUN_IS_ALIGNED(Tail)); /* Alignment ensures bit 0 will be 0 (0=active, 1=completed). */
NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0] = (VOID *)Tail;
}
static ULONG
TunNblGetTail(_In_ NET_BUFFER_LIST *Nbl)
{
return (ULONG)((ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & ~((ULONG_PTR)TUN_ALIGNMENT - 1));
}
static void
TunNblMarkCompleted(_Inout_ NET_BUFFER_LIST *Nbl)
{
*(ULONG_PTR *)&NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0] |= 1;
}
static BOOLEAN
TunNblIsCompleted(_In_ NET_BUFFER_LIST *Nbl)
{
return (ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & 1;
}
static MINIPORT_SEND_NET_BUFFER_LISTS TunSendNetBufferLists; static MINIPORT_SEND_NET_BUFFER_LISTS TunSendNetBufferLists;
_Use_decl_annotations_ _Use_decl_annotations_
static void static void
@ -235,76 +266,120 @@ TunSendNetBufferLists(
ULONG SendFlags) ULONG SendFlags)
{ {
TUN_CTX *Ctx = (TUN_CTX *)MiniportAdapterContext; TUN_CTX *Ctx = (TUN_CTX *)MiniportAdapterContext;
LONG64 SentPacketsCount = 0, SentPacketsSize = 0, DiscardedPacketsCount = 0; LONG64 SentPacketsCount = 0, SentPacketsSize = 0, ErrorPacketsCount = 0, DiscardedPacketsCount = 0;
/* TODO: This handler is called by NDIS in parallel. Consider implementing a lock-less MPSC ring. */
KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock); KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
LONG Flags = InterlockedGet(&Ctx->Flags); LONG Flags = InterlockedGet(&Ctx->Flags);
TUN_RING *Ring = Ctx->Device.Send.Ring;
ULONG RingCapacity = Ctx->Device.Send.Capacity;
for (NET_BUFFER_LIST *Nbl = NetBufferLists; Nbl; Nbl = NET_BUFFER_LIST_NEXT_NBL(Nbl)) for (NET_BUFFER_LIST *Nbl = NetBufferLists, *NblNext; Nbl; Nbl = NblNext)
{ {
NblNext = NET_BUFFER_LIST_NEXT_NBL(Nbl);
/* Measure NBL. */
ULONG PacketsCount = 0, RequiredRingSpace = 0;
for (NET_BUFFER *Nb = NET_BUFFER_LIST_FIRST_NB(Nbl); Nb; Nb = NET_BUFFER_NEXT_NB(Nb)) for (NET_BUFFER *Nb = NET_BUFFER_LIST_FIRST_NB(Nbl); Nb; Nb = NET_BUFFER_NEXT_NB(Nb))
{ {
NDIS_STATUS Status; PacketsCount++;
if ((Status = NDIS_STATUS_ADAPTER_REMOVED, !(Flags & TUN_FLAGS_PRESENT)) || UINT PacketSize = NET_BUFFER_DATA_LENGTH(Nb);
(Status = NDIS_STATUS_PAUSED, !(Flags & TUN_FLAGS_RUNNING)) || if (PacketSize <= TUN_MAX_IP_PACKET_SIZE)
(Status = NDIS_STATUS_MEDIA_DISCONNECTED, !(Flags & TUN_FLAGS_CONNECTED))) RequiredRingSpace += TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
goto cleanupDiscardPacket; }
TUN_RING *Ring = Ctx->Device.Send.Ring; NDIS_STATUS Status;
ULONG RingCapacity = Ctx->Device.Send.Capacity; if ((Status = NDIS_STATUS_ADAPTER_REMOVED, !(Flags & TUN_FLAGS_PRESENT)) ||
ULONG PacketSize = NET_BUFFER_DATA_LENGTH(Nb); (Status = NDIS_STATUS_PAUSED, !(Flags & TUN_FLAGS_RUNNING)) ||
(Status = NDIS_STATUS_MEDIA_DISCONNECTED, !(Flags & TUN_FLAGS_CONNECTED)))
goto skipNbl;
/* Allocate space for packet(s) in the ring. */
ULONG RingHead = InterlockedGetU(&Ring->Head);
if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingHead >= RingCapacity)
goto skipNbl;
KLOCK_QUEUE_HANDLE LockHandle;
KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
ULONG RingTail = Ctx->Device.Send.RingTail;
ASSERT(RingTail < RingCapacity);
ULONG RingSpace = TUN_RING_WRAP(RingHead - RingTail - TUN_ALIGNMENT, RingCapacity);
if (Status = NDIS_STATUS_BUFFER_OVERFLOW, RingSpace < RequiredRingSpace)
goto cleanupKeReleaseInStackQueuedSpinLock;
TunNblSetTailAndMarkActive(
Nbl, Ctx->Device.Send.RingTail = TUN_RING_WRAP(RingTail + RequiredRingSpace, RingCapacity));
*(Ctx->Device.Send.ActiveNbls.Head ? &NET_BUFFER_LIST_NEXT_NBL(Ctx->Device.Send.ActiveNbls.Tail)
: &Ctx->Device.Send.ActiveNbls.Head) = Nbl;
Ctx->Device.Send.ActiveNbls.Tail = Nbl;
KeReleaseInStackQueuedSpinLock(&LockHandle);
/* Copy packet(s). */
for (NET_BUFFER *Nb = NET_BUFFER_LIST_FIRST_NB(Nbl); Nb; Nb = NET_BUFFER_NEXT_NB(Nb))
{
UINT PacketSize = NET_BUFFER_DATA_LENGTH(Nb);
if (Status = NDIS_STATUS_INVALID_LENGTH, PacketSize > TUN_MAX_IP_PACKET_SIZE) if (Status = NDIS_STATUS_INVALID_LENGTH, PacketSize > TUN_MAX_IP_PACKET_SIZE)
goto cleanupDiscardPacket; goto skipPacket;
ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
KLOCK_QUEUE_HANDLE LockHandle;
KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
ULONG RingHead = InterlockedGetU(&Ring->Head);
if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingHead >= RingCapacity)
goto cleanupReleaseSpinLock;
ULONG RingTail = InterlockedGetU(&Ring->Tail);
if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingTail >= RingCapacity)
goto cleanupReleaseSpinLock;
ULONG RingSpace = TUN_RING_WRAP(RingHead - RingTail - TUN_ALIGNMENT, RingCapacity);
if (Status = NDIS_STATUS_BUFFER_OVERFLOW, AlignedPacketSize > RingSpace)
goto cleanupReleaseSpinLock;
TUN_PACKET *Packet = (TUN_PACKET *)(Ring->Data + RingTail); TUN_PACKET *Packet = (TUN_PACKET *)(Ring->Data + RingTail);
Packet->Size = PacketSize; Packet->Size = PacketSize;
void *NbData = NdisGetDataBuffer(Nb, PacketSize, Packet->Data, 1, 0); void *NbData = NdisGetDataBuffer(Nb, PacketSize, Packet->Data, 1, 0);
if (!NbData) if (!NbData)
goto cleanupReleaseSpinLock; {
if (NbData != Packet->Data) NdisZeroMemory(
NdisMoveMemory(Packet->Data, NbData, PacketSize); Packet->Data, PacketSize); /* The space for the packet has already been allocated in the ring. Write
InterlockedExchangeU(&Ring->Tail, TUN_RING_WRAP(RingTail + AlignedPacketSize, RingCapacity)); null-packet rather than fixing the gap in the ring. */
KeSetEvent(Ctx->Device.Send.TailMoved, IO_NETWORK_INCREMENT, FALSE); DiscardedPacketsCount++;
}
else
{
if (NbData != Packet->Data)
NdisMoveMemory(Packet->Data, NbData, PacketSize);
SentPacketsCount++;
SentPacketsSize += PacketSize;
}
KeReleaseInStackQueuedSpinLock(&LockHandle); RingTail = TUN_RING_WRAP(RingTail + TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize), RingCapacity);
SentPacketsCount++;
SentPacketsSize += PacketSize;
continue; continue;
cleanupReleaseSpinLock: skipPacket:
KeReleaseInStackQueuedSpinLock(&LockHandle); ErrorPacketsCount++;
cleanupDiscardPacket:
DiscardedPacketsCount++;
NET_BUFFER_LIST_STATUS(Nbl) = Status; NET_BUFFER_LIST_STATUS(Nbl) = Status;
} }
} ASSERT(RingTail == TunNblGetTail(Nbl));
NdisMSendNetBufferListsComplete( /* Adjust the ring tail. */
Ctx->MiniportAdapterHandle, NetBufferLists, NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL); TunNblMarkCompleted(Nbl);
KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
while (Ctx->Device.Send.ActiveNbls.Head && TunNblIsCompleted(Ctx->Device.Send.ActiveNbls.Head))
{
NET_BUFFER_LIST *CompletedNbl = Ctx->Device.Send.ActiveNbls.Head;
Ctx->Device.Send.ActiveNbls.Head = NET_BUFFER_LIST_NEXT_NBL(CompletedNbl);
InterlockedExchangeU(&Ring->Tail, TunNblGetTail(CompletedNbl));
KeSetEvent(Ctx->Device.Send.TailMoved, IO_NETWORK_INCREMENT, FALSE);
NET_BUFFER_LIST_NEXT_NBL(CompletedNbl) = NULL;
NdisMSendNetBufferListsComplete(
Ctx->MiniportAdapterHandle, CompletedNbl, NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL);
}
KeReleaseInStackQueuedSpinLock(&LockHandle);
continue;
cleanupKeReleaseInStackQueuedSpinLock:
KeReleaseInStackQueuedSpinLock(&LockHandle);
skipNbl:
NET_BUFFER_LIST_STATUS(Nbl) = Status;
NET_BUFFER_LIST_NEXT_NBL(Nbl) = NULL;
NdisMSendNetBufferListsComplete(Ctx->MiniportAdapterHandle, Nbl, 0);
DiscardedPacketsCount += PacketsCount;
}
ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql); ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutOctets, SentPacketsSize); InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutOctets, SentPacketsSize);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastOctets, SentPacketsSize); InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastOctets, SentPacketsSize);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts, SentPacketsCount); InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts, SentPacketsCount);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutErrors, ErrorPacketsCount);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutDiscards, DiscardedPacketsCount); InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutDiscards, DiscardedPacketsCount);
} }
@ -519,6 +594,10 @@ TunDispatchRegisterBuffers(_Inout_ TUN_CTX *Ctx, _Inout_ IRP *Irp)
if (Status = STATUS_INSUFFICIENT_RESOURCES, !Ctx->Device.Send.Ring) if (Status = STATUS_INSUFFICIENT_RESOURCES, !Ctx->Device.Send.Ring)
goto cleanupSendUnlockPages; goto cleanupSendUnlockPages;
Ctx->Device.Send.RingTail = InterlockedGetU(&Ctx->Device.Send.Ring->Tail);
if (Status = STATUS_INVALID_PARAMETER, Ctx->Device.Send.RingTail >= Ctx->Device.Send.Capacity)
goto cleanupSendUnlockPages;
/* Analyze and lock receive ring. */ /* Analyze and lock receive ring. */
Ctx->Device.Receive.Capacity = TUN_RING_CAPACITY(Rrb->Receive.RingSize); Ctx->Device.Receive.Capacity = TUN_RING_CAPACITY(Rrb->Receive.RingSize);
if (Status = STATUS_INVALID_PARAMETER, if (Status = STATUS_INVALID_PARAMETER,