broken chkpt

This commit is contained in:
JackDoan
2025-11-11 11:38:43 -06:00
parent c645a45438
commit e7f01390a3
8 changed files with 271 additions and 113 deletions

View File

@@ -349,6 +349,74 @@ func (dt *DescriptorTable) getDescriptorChain(head uint16) (outBuffers, inBuffer
return
}
func (dt *DescriptorTable) getDescriptorChainContents(head uint16, out []byte) (int, error) {
if int(head) > len(dt.descriptors) {
return 0, fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
}
dt.mu.Lock()
defer dt.mu.Unlock()
// Iterate over the chain. The iteration is limited to the queue size to
// avoid ending up in an endless loop when things go very wrong.
length := 0
//find length
next := head
for range len(dt.descriptors) {
if next == dt.freeHeadIndex {
return 0, fmt.Errorf("%w: must not be part of the free chain", ErrInvalidDescriptorChain)
}
desc := &dt.descriptors[next]
if desc.flags&descriptorFlagWritable == 0 {
return 0, fmt.Errorf("receive queue contains device-readable buffer")
}
length += int(desc.length)
// Is this the tail of the chain?
if desc.flags&descriptorFlagHasNext == 0 {
break
}
// Detect loops.
if desc.next == head {
return 0, fmt.Errorf("%w: contains a loop", ErrInvalidDescriptorChain)
}
next = desc.next
}
//set out to length:
out = out[:length]
//now do the copying
copied := 0
for range len(dt.descriptors) {
desc := &dt.descriptors[next]
// The descriptor address points to memory not managed by Go, so this
// conversion is safe. See https://github.com/golang/go/issues/58625
//goland:noinspection GoVetUnsafePointer
bs := unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), desc.length)
copied += copy(out[copied:], bs)
// Is this the tail of the chain?
if desc.flags&descriptorFlagHasNext == 0 {
break
}
// we did this already, no need to detect loops.
next = desc.next
}
if copied != length {
panic(fmt.Sprintf("expected to copy %d bytes but only copied %d bytes", length, copied))
}
return length, nil
}
// freeDescriptorChain can be used to free a descriptor chain when it is no
// longer in use. The descriptor chain that starts with the given index will be
// put back into the free chain, so the descriptors can be used for later calls

View File

@@ -49,6 +49,8 @@ type SplitQueue struct {
offerMutex sync.Mutex
pageSize int
itemSize int
epoll eventfd.Epoll
}
// NewSplitQueue allocates a new [SplitQueue] in memory. The given queue size
@@ -132,6 +134,15 @@ func NewSplitQueue(queueSize int) (_ *SplitQueue, err error) {
sq.usedChains = make(chan UsedElement, queueSize)
sq.moreFreeDescriptors = make(chan struct{})
sq.epoll, err = eventfd.NewEpoll()
if err != nil {
return nil, err
}
err = sq.epoll.AddEvent(sq.callEventFD.FD())
if err != nil {
return nil, err
}
// Consume used buffer notifications in the background.
sq.stop = sq.startConsumeUsedRing()
@@ -194,25 +205,9 @@ func (sq *SplitQueue) UsedDescriptorChains() chan UsedElement {
}
// startConsumeUsedRing starts a goroutine that runs [consumeUsedRing].
// A function is returned that can be used to gracefully cancel it.
// A function is returned that can be used to gracefully cancel it. todo rename
func (sq *SplitQueue) startConsumeUsedRing() func() error {
ctx, cancel := context.WithCancel(context.Background())
done := make(chan error)
ep, err := eventfd.NewEpoll()
if err != nil {
panic(err)
}
err = ep.AddEvent(sq.callEventFD.FD())
if err != nil {
panic(err)
}
go func() {
done <- sq.consumeUsedRing(ctx, &ep)
}()
return func() error {
cancel()
// The goroutine blocks until it receives a signal on the event file
// descriptor, so it will never notice the context being canceled.
@@ -221,43 +216,28 @@ func (sq *SplitQueue) startConsumeUsedRing() func() error {
if err := sq.callEventFD.Kick(); err != nil {
return fmt.Errorf("wake up goroutine: %w", err)
}
// Wait for the goroutine to end. This prevents the event file
// descriptor to be closed while it's still being used.
// If the goroutine failed, this is the last chance to propagate the
// error so it at least doesn't go unnoticed, even though the error may
// be older already.
if err := <-done; err != nil {
return fmt.Errorf("goroutine: consume used ring: %w", err)
}
return nil
}
}
// consumeUsedRing runs in a goroutine, waits for the device to signal that it
// has used descriptor chains and puts all new [UsedElement]s into the channel
// for them.
func (sq *SplitQueue) consumeUsedRing(ctx context.Context, epoll *eventfd.Epoll) error {
// BlockAndGetHeads waits for the device to signal that it has used descriptor chains and returns all [UsedElement]s
func (sq *SplitQueue) BlockAndGetHeads(ctx context.Context) ([]UsedElement, error) {
var n int
var err error
for ctx.Err() == nil {
// Wait for a signal from the device.
if n, err = epoll.Block(); err != nil {
return fmt.Errorf("wait: %w", err)
if n, err = sq.epoll.Block(); err != nil {
return nil, fmt.Errorf("wait: %w", err)
}
if n > 0 {
_ = epoll.Clear() //???
// Process all new used elements.
for _, usedElement := range sq.usedRing.take() {
sq.usedChains <- usedElement
}
out := sq.usedRing.take()
_ = sq.epoll.Clear() //???
return out, nil
}
}
return nil
return nil, ctx.Err()
}
// blockForMoreDescriptors blocks on a channel waiting for more descriptors to free up.
@@ -345,6 +325,55 @@ func (sq *SplitQueue) OfferDescriptorChain(outBuffers [][]byte, numInBuffers int
return head, nil
}
func (sq *SplitQueue) OfferInDescriptorChains(numInBuffers int, waitFree bool) (uint16, error) {
sq.ensureInitialized()
// Synchronize the offering of descriptor chains. While the descriptor table
// and available ring are synchronized on their own as well, this does not
// protect us from interleaved calls which could cause reordering.
// By locking here, we can ensure that all descriptor chains are made
// available to the device in the same order as this method was called.
sq.offerMutex.Lock()
defer sq.offerMutex.Unlock()
// Create a descriptor chain for the given buffers.
var (
head uint16
err error
)
for {
head, err = sq.descriptorTable.createDescriptorChain(nil, numInBuffers)
if err == nil {
break
}
// I don't wanna use errors.Is, it's slow
//goland:noinspection GoDirectComparisonOfErrors
if err == ErrNotEnoughFreeDescriptors {
if waitFree {
// Wait for more free descriptors to be put back into the queue.
// If the number of free descriptors is still not sufficient, we'll
// land here again.
sq.blockForMoreDescriptors()
continue
} else {
return 0, err
}
}
return 0, fmt.Errorf("create descriptor chain: %w", err)
}
// Make the descriptor chain available to the device.
sq.availableRing.offer([]uint16{head})
// Notify the device to make it process the updated available ring.
if err := sq.kickEventFD.Kick(); err != nil {
return head, fmt.Errorf("notify device: %w", err)
}
return head, nil
}
func (sq *SplitQueue) OfferOutDescriptorChains(prepend []byte, outBuffers [][]byte, waitFree bool) ([]uint16, error) {
sq.ensureInitialized()
@@ -420,6 +449,11 @@ func (sq *SplitQueue) GetDescriptorChain(head uint16) (outBuffers, inBuffers [][
return sq.descriptorTable.getDescriptorChain(head)
}
func (sq *SplitQueue) GetDescriptorChainContents(head uint16, out []byte) (int, error) {
sq.ensureInitialized()
return sq.descriptorTable.getDescriptorChainContents(head, out)
}
// FreeDescriptorChain frees the descriptor chain with the given head index.
// The head index must be one that was returned by a previous call to
// [SplitQueue.OfferDescriptorChain] and the descriptor chain must not have been
@@ -447,6 +481,35 @@ func (sq *SplitQueue) FreeDescriptorChain(head uint16) error {
return nil
}
func (sq *SplitQueue) FreeAndOfferDescriptorChains(head uint16) error {
sq.ensureInitialized()
//todo I don't think we need this here?
// Synchronize the offering of descriptor chains. While the descriptor table
// and available ring are synchronized on their own as well, this does not
// protect us from interleaved calls which could cause reordering.
// By locking here, we can ensure that all descriptor chains are made
// available to the device in the same order as this method was called.
//sq.offerMutex.Lock()
//defer sq.offerMutex.Unlock()
//todo not doing this may break eventually?
//not called under lock
//if err := sq.descriptorTable.freeDescriptorChain(head); err != nil {
// return fmt.Errorf("free: %w", err)
//}
// Make the descriptor chain available to the device.
sq.availableRing.offer([]uint16{head})
// Notify the device to make it process the updated available ring.
if err := sq.kickEventFD.Kick(); err != nil {
return fmt.Errorf("notify device: %w", err)
}
return nil
}
// Close releases all resources used for this queue.
// The implementation will try to release as many resources as possible and
// collect potential errors before returning them.