switch Bits to a packed u64 (#1705)

2026-05-15 20:37:36 +02:00 · 2026-05-06 10:22:26 -05:00
parent b7e9939e92
commit ff91c37529
2 changed files with 452 additions and 164 deletions
--- a/bits.go
+++ b/bits.go
@@ -2,24 +2,42 @@ package nebula
 import (
 	"context"
 	"fmt"
 	"log/slog"
 	"math"
 	mathbits "math/bits"
 	"github.com/rcrowley/go-metrics"
 )
 const bitsPerWord = 64
 // Bits is a sliding-window anti-replay tracker. The window is stored as a
 // circular bitmap packed into uint64 words (8x denser than a []bool), so a
 // length-N window costs N/8 bytes. length must be a power of two.
 type Bits struct {
 	length             uint64
 	lengthMask         uint64
 	current            uint64
-	bits               []bool
+	bits               []uint64
 	lostCounter        metrics.Counter
 	dupeCounter        metrics.Counter
 	outOfWindowCounter metrics.Counter
 }
-func NewBits(bits uint64) *Bits {
+func NewBits(length uint64) *Bits {
 	if length == 0 || length&(length-1) != 0 {
 		panic(fmt.Sprintf("Bits length must be a power of two, got %d", length))
 	}
 	nWords := length / bitsPerWord
 	if nWords == 0 {
 		nWords = 1
 	}
 	b := &Bits{
-		length:             bits,
+		length:             length,
-		bits:               make([]bool, bits, bits),
+		lengthMask:         length - 1,
 		bits:               make([]uint64, nWords),
 		current:            0,
 		lostCounter:        metrics.GetOrRegisterCounter("network.packets.lost", nil),
 		dupeCounter:        metrics.GetOrRegisterCounter("network.packets.duplicate", nil),
@@ -27,71 +45,194 @@ func NewBits(bits uint64) *Bits {
 	}
 	// There is no counter value 0, mark it to avoid counting a lost packet later.
-	b.bits[0] = true
+	b.bits[0] = 1
 	b.current = 0
 	return b
 }
 func (b *Bits) get(i uint64) bool {
 	pos := i & b.lengthMask
 	//bit-shifting by 6 because i is a bit index, not a u64 index, and we need to find the u64 without bit in it
 	return b.bits[pos>>6]&(uint64(1)<<(pos&63)) != 0
 }
 func (b *Bits) set(i uint64) {
 	pos := i & b.lengthMask
 	b.bits[pos>>6] |= uint64(1) << (pos & 63)
 }
 // clearRange clears `count` bits starting at circular position `startPos`
 // (already masked to [0, length)) and returns how many of them were set
 // before the clear. count must be in [1, length].
 func (b *Bits) clearRange(startPos, count uint64) uint64 {
 	wasSet := uint64(0)
 	if count >= b.length {
 		for _, w := range b.bits {
 			wasSet += uint64(mathbits.OnesCount64(w))
 		}
 		clear(b.bits)
 		return wasSet
 	}
 	pos := startPos
 	remaining := count
 	// handle the potential partial word before pos becomes u64 aligned
 	word := pos >> 6
 	bit := pos & 63
 	take := uint64(64) - bit
 	if take > remaining {
 		take = remaining
 	}
 	if take > b.length-pos {
 		take = b.length - pos
 	}
 	var mask uint64
 	if take == 64 {
 		mask = math.MaxUint64
 	} else {
 		mask = ((uint64(1) << take) - 1) << bit
 	}
 	wasSet += uint64(mathbits.OnesCount64(b.bits[word] & mask))
 	b.bits[word] &^= mask
 	remaining -= take
 	pos = (pos + take) & b.lengthMask
 	// Clear whole words, keeping track of the number of set bits
 	for remaining >= 64 {
 		word = pos >> 6
 		wasSet += uint64(mathbits.OnesCount64(b.bits[word]))
 		b.bits[word] = 0
 		remaining -= 64
 		pos = (pos + 64) & b.lengthMask
 	}
 	// Clear the remaining partial word
 	if remaining > 0 {
 		word = pos >> 6
 		mask = (uint64(1) << remaining) - 1
 		wasSet += uint64(mathbits.OnesCount64(b.bits[word] & mask))
 		b.bits[word] &^= mask
 	}
 	return wasSet
 }
 func (b *Bits) strictlyWithinWindow(i uint64) bool {
 	// Handle the case where the window hasn't slid yet. This avoids u64 underflow.
 	inWarmup := b.current < b.length
 	if i < b.length && inWarmup {
 		return true
 	}
 	// Next, if the packet is in-window, see if we've seen it before
 	if i > b.current-b.length {
 		return true
 	}
 	return false //not within window!
 }
 // Check returns true if i is within (or way out in front of) the window, and not a replay
 func (b *Bits) Check(l *slog.Logger, i uint64) bool {
 	// If i is the next number, return true.
 	if i > b.current {
 		return true
 	}
-	// If i is within the window, check if it's been set already.
+	if b.strictlyWithinWindow(i) {
-	if i > b.current-b.length || i < b.length && b.current < b.length {
+		return !b.get(i)
 		return !b.bits[i%b.length]
 	}
 	// Not within the window
 	if l.Enabled(context.Background(), slog.LevelDebug) {
-		l.Debug("rejected a packet (top)",
+		l.Debug("rejected a packet (top)", "current", b.current, "incoming", i)
 			"current", b.current,
 			"incoming", i,
 		)
 	}
 	return false
 }
 // Update has three branches:
 //   - i == b.current+1: fast path; advance the cursor by one and lose-count
 //     the slot we just stomped (only past warmup; see the i > b.length guard
 //     below).
 //   - i  >  b.current+1: jump path; clear all slots between current and i
 //     (or up to a full window's worth, whichever is smaller) via clearRange,
 //     then mark i. Two arms here: a warmup arm that handles the very first
 //     window before the cursor has slid, and a steady-state arm that treats
 //     every cleared empty slot as a lost packet.
 //   - i  <= b.current: in-window check for duplicates; out-of-window otherwise.
 //
 // NewBits seeds bits[0]=1 so counter 0 looks "received" — Update never
 // clears that marker during warmup (clearRange skips position 0 when
 // startPos=1), and once b.current >= b.length the marker is no longer
 // consulted. The marker prevents a fictitious "lost" hit on the first real
 // counter.
 func (b *Bits) Update(l *slog.Logger, i uint64) bool {
-	// If i is the next number, return true and update current.
+	// Fast path: i is the next expected counter. Split out so the function
 	// stays small and avoids paying for the slow paths' slog argument-build
 	// stack frame on every call. The bit read/test/write is inlined to
 	// touch the backing word once.
 	if i == b.current+1 {
-		// Check if the oldest bit was lost since we are shifting the window by 1 and occupying it with this counter
+		pos := i & b.lengthMask
-		// The very first window can only be tracked as lost once we are on the 2nd window or greater
+		word := pos >> 6
-		if b.bits[i%b.length] == false && i > b.length {
+		mask := uint64(1) << (pos & 63)
 		w := b.bits[word]
 		if i > b.length && w&mask == 0 {
 			b.lostCounter.Inc(1)
 		}
-		b.bits[i%b.length] = true
+		b.bits[word] = w | mask
 		b.current = i
 		return true
 	}
 	return b.updateSlow(l, i)
 }
 // updateSlow handles jumps, in-window backfill, dupes, and out-of-window.
 func (b *Bits) updateSlow(l *slog.Logger, i uint64) bool {
 	// If i is a jump, adjust the window, record lost, update current, and return true
 	if i > b.current {
-		lost := int64(0)
+		end := i
-		// Zero out the bits between the current and the new counter value, limited by the window size,
+		if end > b.current+b.length {
-		// since the window is shifting
+			end = b.current + b.length
-		for n := b.current + 1; n <= min(i, b.current+b.length); n++ {
+		}
-			if b.bits[n%b.length] == false && n > b.length {
+		count := end - b.current
 		startPos := (b.current + 1) & b.lengthMask
 		var lost int64
 		if b.current >= b.length {
 			// Steady state: every cleared slot is past warmup, so any unset
 			// bit we evict is a lost packet from the previous cycle.
 			wasSet := b.clearRange(startPos, count)
 			lost = int64(count) - int64(wasSet)
 		} else {
 			// Warmup (the very first window). Some cleared slots represent
 			// packets <= length where eviction is not "lost" in the usual
 			// sense. This branch is taken at most once per connection so we
 			// don't bother optimizing it.
 			for n := b.current + 1; n <= end; n++ {
 				if !b.get(n) && n > b.length {
 					lost++
 				}
-			b.bits[n%b.length] = false
+			}
 			b.clearRange(startPos, count)
 		}
-		// Only record any skipped packets as a result of the window moving further than the window length
+		// Anything past the new window can never be backfilled, so it's lost.
-		// Any loss within the new window will be accounted for in future calls
+		if i > b.current+b.length {
-		lost += max(0, int64(i-b.current-b.length))
+			lost += int64(i - b.current - b.length)
 		}
 		b.lostCounter.Inc(lost)
-		b.bits[i%b.length] = true
+		b.set(i)
 		b.current = i
 		return true
 	}
-	// If i is within the current window but below the current counter,
+	// If i is within the current window but below the current counter, check to see if it's a duplicate
-	// Check to see if it's a duplicate
+	if b.strictlyWithinWindow(i) {
-	if i > b.current-b.length || i < b.length && b.current < b.length {
+		pos := i & b.lengthMask
-		if b.current == i || b.bits[i%b.length] == true {
+		word := pos >> 6
 		mask := uint64(1) << (pos & 63)
 		w := b.bits[word]
 		if b.current == i || w&mask != 0 {
 			if l.Enabled(context.Background(), slog.LevelDebug) {
 				l.Debug("Receive window",
 					"accepted", false,
@@ -104,7 +245,7 @@ func (b *Bits) Update(l *slog.Logger, i uint64) bool {
 			return false
 		}
-		b.bits[i%b.length] = true
+		b.bits[word] = w | mask
 		return true
 	}
--- a/bits_test.go
+++ b/bits_test.go
@@ -7,61 +7,79 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 // snapshot returns the bitmap as a []bool of length b.length, for readable
 // test assertions against the now-packed []uint64 storage.
 func (b *Bits) snapshot() []bool {
 	out := make([]bool, b.length)
 	for i := uint64(0); i < b.length; i++ {
 		out[i] = b.get(i)
 	}
 	return out
 }
 func TestBitsRequiresPowerOfTwo(t *testing.T) {
 	assert.Panics(t, func() { NewBits(10) })
 	assert.Panics(t, func() { NewBits(0) })
 	assert.NotPanics(t, func() { NewBits(1) })
 	assert.NotPanics(t, func() { NewBits(16) })
 	assert.NotPanics(t, func() { NewBits(1024) })
 	assert.NotPanics(t, func() { NewBits(16384) })
 }
 func TestBits(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+	b := NewBits(16)
-
+	assert.EqualValues(t, 16, b.length)
 	// make sure it is the right size
 	assert.Len(t, b.bits, 10)
 	// This is initialized to zero - receive one. This should work.
 	assert.True(t, b.Check(l, 1))
 	assert.True(t, b.Update(l, 1))
 	assert.EqualValues(t, 1, b.current)
-	g := []bool{true, true, false, false, false, false, false, false, false, false}
+	g := []bool{true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false}
-	assert.Equal(t, g, b.bits)
+	assert.Equal(t, g, b.snapshot())
 	// Receive two
 	assert.True(t, b.Check(l, 2))
 	assert.True(t, b.Update(l, 2))
 	assert.EqualValues(t, 2, b.current)
-	g = []bool{true, true, true, false, false, false, false, false, false, false}
+	g = []bool{true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false}
-	assert.Equal(t, g, b.bits)
+	assert.Equal(t, g, b.snapshot())
 	// Receive two again - it will fail
 	assert.False(t, b.Check(l, 2))
 	assert.False(t, b.Update(l, 2))
 	assert.EqualValues(t, 2, b.current)
-	// Jump ahead to 15, which should clear everything and set the 6th element
+	// Jump ahead to 25, which clears the window and sets slot 25%16 = 9.
-	assert.True(t, b.Check(l, 15))
+	assert.True(t, b.Check(l, 25))
-	assert.True(t, b.Update(l, 15))
+	assert.True(t, b.Update(l, 25))
-	assert.EqualValues(t, 15, b.current)
+	assert.EqualValues(t, 25, b.current)
-	g = []bool{false, false, false, false, false, true, false, false, false, false}
+	g = []bool{false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false}
-	assert.Equal(t, g, b.bits)
+	assert.Equal(t, g, b.snapshot())
-	// Mark 14, which is allowed because it is in the window
+	// Mark 24, which is in window (current 25, length 16, window covers [10,25]).
-	assert.True(t, b.Check(l, 14))
+	assert.True(t, b.Check(l, 24))
-	assert.True(t, b.Update(l, 14))
+	assert.True(t, b.Update(l, 24))
-	assert.EqualValues(t, 15, b.current)
+	assert.EqualValues(t, 25, b.current)
-	g = []bool{false, false, false, false, true, true, false, false, false, false}
+	g = []bool{false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, false}
-	assert.Equal(t, g, b.bits)
+	assert.Equal(t, g, b.snapshot())
-	// Mark 5, which is not allowed because it is not in the window
+	// Mark 5, not allowed because 5 <= current-length (25-16=9).
 	assert.False(t, b.Check(l, 5))
 	assert.False(t, b.Update(l, 5))
-	assert.EqualValues(t, 15, b.current)
+	assert.EqualValues(t, 25, b.current)
-	g = []bool{false, false, false, false, true, true, false, false, false, false}
+	g = []bool{false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, false}
-	assert.Equal(t, g, b.bits)
+	assert.Equal(t, g, b.snapshot())
-	// make sure we handle wrapping around once to the current position
+	// Make sure we handle wrapping around once to the same slot. With
-	b = NewBits(10)
+	// length=16, packets 1 and 17 share slot 1.
 	b = NewBits(16)
 	assert.True(t, b.Update(l, 1))
-	assert.True(t, b.Update(l, 11))
+	assert.True(t, b.Update(l, 17))
-	assert.Equal(t, []bool{false, true, false, false, false, false, false, false, false, false}, b.bits)
+	assert.Equal(t, []bool{false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false}, b.snapshot())
 	// Walk through a few windows in order
-	b = NewBits(10)
+	b = NewBits(16)
 	for i := uint64(1); i <= 100; i++ {
 		assert.True(t, b.Check(l, i), "Error while checking %v", i)
 		assert.True(t, b.Update(l, i), "Error while updating %v", i)
@@ -72,24 +90,31 @@ func TestBits(t *testing.T) {
 func TestBitsLargeJumps(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+
 	// length=16. Update(55) from current=0:
 	//   warmup, per-bit loop sees no n>16 with unset bits (slot 0 was set by
 	//   NewBits and gets re-evaluated when n=16; n=16 is not strictly > 16),
 	//   so the loop contributes 0. The jump exceeds the window so we record
 	//   55 - 0 - 16 = 39 packets fell out the back.
 	b := NewBits(16)
 	b.lostCounter.Clear()
 	assert.True(t, b.Update(l, 55))
 	assert.Equal(t, int64(39), b.lostCounter.Count())
-	b = NewBits(10)
+	// Update(100): clears 16 slots starting at slot 56%16=8. Only slot 7 (for
-	b.lostCounter.Clear()
+	// packet 55) was set, so 16 - 1 = 15 evicted slots had unset bits.
-	assert.True(t, b.Update(l, 55)) // We saw packet 55 and can still track 45,46,47,48,49,50,51,52,53,54
+	// Plus 100 - 55 - 16 = 29 packets fell past the window. Total 44.
-	assert.Equal(t, int64(45), b.lostCounter.Count())
+	assert.True(t, b.Update(l, 100))
 	assert.Equal(t, int64(39+44), b.lostCounter.Count())
-	assert.True(t, b.Update(l, 100)) // We saw packet 55 and 100 and can still track 90,91,92,93,94,95,96,97,98,99
+	// Update(200): same shape: 16 - 1 = 15 evicted unset, plus 200 - 100 - 16 = 84 past window. Total 99.
-	assert.Equal(t, int64(89), b.lostCounter.Count())
+	assert.True(t, b.Update(l, 200))
-
+	assert.Equal(t, int64(39+44+99), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 200)) // We saw packet 55, 100, and 200 and can still track 190,191,192,193,194,195,196,197,198,199
 	assert.Equal(t, int64(188), b.lostCounter.Count())
 }
 func TestBitsDupeCounter(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+	b := NewBits(16)
 	b.lostCounter.Clear()
 	b.dupeCounter.Clear()
 	b.outOfWindowCounter.Clear()
@@ -114,120 +139,117 @@ func TestBitsDupeCounter(t *testing.T) {
 func TestBitsOutOfWindowCounter(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+	b := NewBits(16)
 	b.lostCounter.Clear()
 	b.dupeCounter.Clear()
 	b.outOfWindowCounter.Clear()
 	// Jump to 20 (warmup branch + 4 past-window packets).
 	assert.True(t, b.Update(l, 20))
 	assert.Equal(t, int64(0), b.outOfWindowCounter.Count())
-	assert.True(t, b.Update(l, 21))
+	// 9 single-step advances, each evicts a slot whose bit was cleared during
-	assert.True(t, b.Update(l, 22))
+	// the jump above and whose value was never seen, so each contributes 1
-	assert.True(t, b.Update(l, 23))
+	// to lostCounter.
-	assert.True(t, b.Update(l, 24))
+	for n := uint64(21); n <= 29; n++ {
-	assert.True(t, b.Update(l, 25))
+		assert.True(t, b.Update(l, n))
-	assert.True(t, b.Update(l, 26))
+	}
 	assert.True(t, b.Update(l, 27))
 	assert.True(t, b.Update(l, 28))
 	assert.True(t, b.Update(l, 29))
 	assert.Equal(t, int64(0), b.outOfWindowCounter.Count())
 	// 0 is below current-length (29-16=13) so it falls outside the window.
 	assert.False(t, b.Update(l, 0))
 	assert.Equal(t, int64(1), b.outOfWindowCounter.Count())
-	assert.Equal(t, int64(19), b.lostCounter.Count()) // packet 0 wasn't lost
+	// 4 from the Update(20) jump + 9 from 21..29.
 	assert.Equal(t, int64(13), b.lostCounter.Count())
 	assert.Equal(t, int64(0), b.dupeCounter.Count())
 	assert.Equal(t, int64(1), b.outOfWindowCounter.Count())
 }
 func TestBitsLostCounter(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+	b := NewBits(16)
 	b.lostCounter.Clear()
 	b.dupeCounter.Clear()
 	b.outOfWindowCounter.Clear()
-	assert.True(t, b.Update(l, 20))
+	// Walk 20..29 like the original, just with a bigger window. Same
-	assert.True(t, b.Update(l, 21))
+	// reasoning as TestBitsOutOfWindowCounter: 4 past-window from Update(20),
-	assert.True(t, b.Update(l, 22))
+	// then 9 more from the unit advances.
-	assert.True(t, b.Update(l, 23))
+	for n := uint64(20); n <= 29; n++ {
-	assert.True(t, b.Update(l, 24))
+		assert.True(t, b.Update(l, n))
-	assert.True(t, b.Update(l, 25))
+	}
-	assert.True(t, b.Update(l, 26))
+	assert.Equal(t, int64(13), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 27))
 	assert.True(t, b.Update(l, 28))
 	assert.True(t, b.Update(l, 29))
 	assert.Equal(t, int64(19), b.lostCounter.Count()) // packet 0 wasn't lost
 	assert.Equal(t, int64(0), b.dupeCounter.Count())
 	assert.Equal(t, int64(0), b.outOfWindowCounter.Count())
-	b = NewBits(10)
+	b = NewBits(16)
 	b.lostCounter.Clear()
 	b.dupeCounter.Clear()
 	b.outOfWindowCounter.Clear()
-	assert.True(t, b.Update(l, 9))
+	// Update(15) clears the warmup window (no lost), sets slot 15.
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	// 10 will set 0 index, 0 was already set, no lost packets
 	assert.True(t, b.Update(l, 10))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	// 11 will set 1 index, 1 was missed, we should see 1 packet lost
 	assert.True(t, b.Update(l, 11))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	// Now let's fill in the window, should end up with 8 lost packets
 	assert.True(t, b.Update(l, 12))
 	assert.True(t, b.Update(l, 13))
 	assert.True(t, b.Update(l, 14))
 	assert.True(t, b.Update(l, 15))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	// Update(16): slot 0 was already set (NewBits seeded it), and 16 is not
 	// strictly > length, so nothing is recorded as lost.
 	assert.True(t, b.Update(l, 16))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	// Update(17): we jumped straight from 0 to 15, so slot 1 was cleared
 	// (and never re-set). 17 > 16 is past warmup, so packet 1 is recorded lost.
 	assert.True(t, b.Update(l, 17))
-	assert.True(t, b.Update(l, 18))
+	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 19))
 	assert.Equal(t, int64(8), b.lostCounter.Count())
-	// Jump ahead by a window size
+	// Fill in 18..30 in single steps. Each i evicts slot i%16. Slots 2..14
-	assert.True(t, b.Update(l, 29))
+	// were all cleared during Update(15), and we never re-set any of them,
-	assert.Equal(t, int64(8), b.lostCounter.Count())
+	// so each i in 18..30 is a fresh lost packet — 13 more.
-	// Now lets walk ahead normally through the window, the missed packets should fill in
+	for n := uint64(18); n <= 30; n++ {
-	assert.True(t, b.Update(l, 30))
+		assert.True(t, b.Update(l, n))
-	assert.True(t, b.Update(l, 31))
+	}
-	assert.True(t, b.Update(l, 32))
+	assert.Equal(t, int64(14), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 33))
 	assert.True(t, b.Update(l, 34))
 	assert.True(t, b.Update(l, 35))
 	assert.True(t, b.Update(l, 36))
 	assert.True(t, b.Update(l, 37))
 	assert.True(t, b.Update(l, 38))
 	// 39 packets tracked, 22 seen, 17 lost
 	assert.Equal(t, int64(17), b.lostCounter.Count())
-	// Jump ahead by 2 windows, should have recording 1 full window missing
+	// Jump ahead by exactly one window size.
-	assert.True(t, b.Update(l, 58))
+	assert.True(t, b.Update(l, 46))
-	assert.Equal(t, int64(27), b.lostCounter.Count())
+	// end = min(46, 30+16) = 46, count = 16, all slots cleared. Before the
-	// Now lets walk ahead normally through the window, the missed packets should fill in from this window
+	// jump every slot 0..15 had been set (Update(15), (16), (17), 18..30),
-	assert.True(t, b.Update(l, 59))
+	// so wasSet=16 and 46 == current+length means no past-window slack:
-	assert.True(t, b.Update(l, 60))
+	// lost contribution = 0.
-	assert.True(t, b.Update(l, 61))
+	assert.Equal(t, int64(14), b.lostCounter.Count())
-	assert.True(t, b.Update(l, 62))
+
-	assert.True(t, b.Update(l, 63))
+	// Walk 47..55. The Update(46) jump cleared every slot, so only slot 14
-	assert.True(t, b.Update(l, 64))
+	// (for packet 46) is set when we start. Each subsequent unit step lands
-	assert.True(t, b.Update(l, 65))
+	// on a slot that was cleared and is past warmup, so it counts as lost.
-	assert.True(t, b.Update(l, 66))
+	// 9 more = 23.
-	assert.True(t, b.Update(l, 67))
+	for n := uint64(47); n <= 55; n++ {
-	// 68 packets tracked, 32 seen, 36 missed
+		assert.True(t, b.Update(l, n))
-	assert.Equal(t, int64(36), b.lostCounter.Count())
+	}
 	assert.Equal(t, int64(23), b.lostCounter.Count())
 	// Jump ahead by two windows: clears the window plus past-window loss.
 	assert.True(t, b.Update(l, 87))
 	// current=55, length=16. end = min(87, 71) = 71. count=16, all slots
 	// cleared. Slots set before the clear are slots 14,15,0..7 (10 total).
 	// Lost from clear = 16 - 10 = 6. Past window: 87 - 55 - 16 = 16. +22.
 	assert.Equal(t, int64(45), b.lostCounter.Count())
 	assert.Equal(t, int64(0), b.dupeCounter.Count())
 	assert.Equal(t, int64(0), b.outOfWindowCounter.Count())
 }
 func TestBitsLostCounterIssue1(t *testing.T) {
 	l := test.NewLogger()
-	b := NewBits(10)
+	b := NewBits(16)
 	b.lostCounter.Clear()
 	b.dupeCounter.Clear()
 	b.outOfWindowCounter.Clear()
 	// Receive 4, backfill 1, then 9, 2, 3, 5, 6, 7 (skip 8), 10, 11, 14.
 	// Then jump to 25 — slot 25%16=9 is being evicted, but it had been set
 	// (we received packet 9), so no spurious lost increment. The original
 	// regression was about double-counting a missing packet when its slot
 	// got cleared on a jump. With the jump path now using clearRange's
 	// word-level wasSet count, the same semantics hold.
 	assert.True(t, b.Update(l, 4))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 1))
@@ -244,7 +266,7 @@ func TestBitsLostCounterIssue1(t *testing.T) {
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 7))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
-	// assert.True(t, b.Update(l, 8))
+	// Skip packet 8.
 	assert.True(t, b.Update(l, 10))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 11))
@@ -252,9 +274,23 @@ func TestBitsLostCounterIssue1(t *testing.T) {
 	assert.True(t, b.Update(l, 14))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
-	// Issue seems to be here, we reset missing packet 8 to false here and don't increment the lost counter
+
-	assert.True(t, b.Update(l, 19))
+	// Jump to 25. With length=16, slot 25%16=9 corresponds to packet 9
 	// (which we DID receive), so its bit is set and no lost++ from that
 	// eviction. The trace below shows the only loss is packet 8.
 	assert.True(t, b.Update(l, 25))
 	// current was 14, i=25. end=min(25,30)=25. count=11. startPos=15.
 	// steady? current=14<16, so warmup branch: per-bit n=15..25, count those
 	// with !get(n) AND n>16. n=17..25 are >16. Among slots 17%16=1..25%16=9
 	// did we set slots 1..9 (packets 1..9)? Yes for all but slot 8 (packet 8
 	// was skipped). n=24 maps to slot 8 which is FALSE → lost++. All other
 	// n in 17..25 map to slots that are set. n=16 is not strictly > 16. So
 	// lost = 1.
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	// Fill in 12, 13, 15, 16. Each is below current=25 (in-window). 16 must
 	// recheck slot 0 — it was set by NewBits and then cleared by the
 	// Update(25) jump, so 16 backfills cleanly.
 	assert.True(t, b.Update(l, 12))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 13))
@@ -263,29 +299,140 @@ func TestBitsLostCounterIssue1(t *testing.T) {
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 16))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 17))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 18))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 20))
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.True(t, b.Update(l, 21))
-	// We missed packet 8 above
+	// We missed packet 8 above and that loss is still recorded once, never
 	// double-counted, never zeroed.
 	assert.Equal(t, int64(1), b.lostCounter.Count())
 	assert.Equal(t, int64(0), b.dupeCounter.Count())
 	assert.Equal(t, int64(0), b.outOfWindowCounter.Count())
 }
-func BenchmarkBits(b *testing.B) {
+// TestBitsWarmupOvershoot exercises the jump path's warmup arm with an
-	z := NewBits(10)
+// overshoot past one full window. NewBits leaves current=0 with only slot 0
-	for n := 0; n < b.N; n++ {
+// "set" by the marker. Jumping straight to length+k must (a) clear every
-		for i := range z.bits {
+// slot the jump straddles, (b) count only past-window slack (not the
-			z.bits[i] = true
+// in-window slots, which never had a "lost" tenant during warmup), and
-		}
+// (c) leave the cursor at the new counter so subsequent unit advances
-		for i := range z.bits {
+// count from steady state. The marker bit at slot 0 is irrelevant once
-			z.bits[i] = false
+// current >= length.
-		}
+func TestBitsWarmupOvershoot(t *testing.T) {
 	l := test.NewLogger()
 	b := NewBits(16)
 	b.lostCounter.Clear()
 	// Jump from current=0 to i=20 (length=16, overshoot=4).
 	// Warmup arm: counts slots in [1..16] where bit unset and n>length.
 	// Only n=16 was unset and >length: but slot 16%16=0 is the marker,
 	// so b.get(16) reads bits[0]=1 and skips. Result: 0 lost from the loop.
 	// Past-window: i - current - length = 20 - 0 - 16 = 4 lost.
 	assert.True(t, b.Update(l, 20))
 	assert.Equal(t, int64(4), b.lostCounter.Count())
 	assert.Equal(t, uint64(20), b.current)
 	// Steady state now (current=20 >= length=16). Unit advance to 21
 	// stomps slot 21%16=5, which was cleared by the jump and not reset,
 	// so this is +1 lost.
 	assert.True(t, b.Update(l, 21))
 	assert.Equal(t, int64(5), b.lostCounter.Count())
 }
 // TestBitsCheckAcrossWarmupBoundary pins the underflow trick in Check's
 // in-window clause. While in warmup, b.current-b.length underflows uint64
 // to a huge value so the first OR-clause is always false; the second
 // clause (i < length && current < length) carries the in-window check.
 // Once current >= length the regimes flip cleanly.
 func TestBitsCheckAcrossWarmupBoundary(t *testing.T) {
 	l := test.NewLogger()
 	b := NewBits(16)
 	// Warmup: current=0. Check(0) must read the marker (set) and return false.
 	assert.False(t, b.Check(l, 0), "marker slot should look already-received")
 	// Warmup: any 0 < i < length is in-window and unset → accepted.
 	for i := uint64(1); i < 16; i++ {
 		assert.True(t, b.Check(l, i), "warmup in-window i=%d should be accepted", i)
 	}
 	// Warmup: i >= length but > current is "next number" so accepted.
 	assert.True(t, b.Check(l, 16))
 	assert.True(t, b.Check(l, 1_000_000))
 	// Cross into steady state.
 	assert.True(t, b.Update(l, 100))
 	// Now current=100, length=16. In-window range is [85..100].
 	// 84 is just outside: the underflow clause activates; 84 > 100-16=84 is false.
 	// And the warmup clause is false (current >= length). So out of window.
 	assert.False(t, b.Check(l, 84))
 	// 85 sits at the boundary. 85 > 84 is true → in window, unset → accept.
 	assert.True(t, b.Check(l, 85))
 	// 100 is current itself; not strictly greater, in-window, but already set.
 	assert.False(t, b.Check(l, 100))
 	// Way out: clearly out of window.
 	assert.False(t, b.Check(l, 50))
 }
 // TestBitsMarkerInvariant verifies the seeded bits[0]=1 marker behaves
 // correctly across warmup and beyond. Update should never clear the marker
 // during warmup (clearRange skips position 0 when startPos=1), and once
 // current >= length the marker is no longer consulted by Check/Update on
 // the live path — but it must still report counter 0 as a duplicate while
 // we are in warmup.
 func TestBitsMarkerInvariant(t *testing.T) {
 	l := test.NewLogger()
 	b := NewBits(8)
 	// Counter 0 is the seeded marker; Check sees it as already received.
 	assert.False(t, b.Check(l, 0))
 	// Update(0) at current=0 hits the duplicate branch.
 	b.dupeCounter.Clear()
 	assert.False(t, b.Update(l, 0))
 	assert.Equal(t, int64(1), b.dupeCounter.Count())
 	// Walk forward through warmup; the marker must remain set.
 	for n := uint64(1); n <= 7; n++ {
 		assert.True(t, b.Update(l, n))
 	}
 	// Position 0 (the marker) should still read as set because we never
 	// cleared it; Update(0) still looks like a duplicate.
 	assert.False(t, b.Check(l, 0))
 	// Cross into steady state with a unit advance to 8: pos=0, evicts the
 	// marker bit. The lost-counter guard (i > b.length) is false (8 == 8),
 	// so this advance does NOT charge a lost packet — exactly what the
 	// marker is there to prevent.
 	b.lostCounter.Clear()
 	assert.True(t, b.Update(l, 8))
 	assert.Equal(t, int64(0), b.lostCounter.Count())
 	// The slot at pos 0 is now occupied by counter 8.
 	assert.False(t, b.Check(l, 8))
 }
 // BenchmarkBitsUpdateInOrder is the steady-state hot path: each call is
 // i == current+1.
 func BenchmarkBitsUpdateInOrder(b *testing.B) {
 	l := test.NewLogger()
 	z := NewBits(16384)
 	for n := 0; n < b.N; n++ {
 		z.Update(l, uint64(n)+1)
 	}
 }
 // BenchmarkBitsUpdateReorder simulates light reorder within the window:
 // every other packet arrives one slot behind its predecessor (forces the
 // in-window backfill branch).
 func BenchmarkBitsUpdateReorder(b *testing.B) {
 	l := test.NewLogger()
 	z := NewBits(16384)
 	for n := 0; n < b.N; n++ {
 		base := uint64(n) * 2
 		z.Update(l, base+2)
 		z.Update(l, base+1)
 	}
 }
 // BenchmarkBitsUpdateLargeJumps stresses the clearRange word-level path.
 func BenchmarkBitsUpdateLargeJumps(b *testing.B) {
 	l := test.NewLogger()
 	z := NewBits(16384)
 	for n := 0; n < b.N; n++ {
 		z.Update(l, uint64(n+1)*1000)
 	}
 }