bitmagnet-io · abitofevrything · Jun 29, 2025 · Jun 29, 2025 · mgdigital · Jul 1, 2025
diff --git a/internal/dhtcrawler/crawler.go b/internal/dhtcrawler/crawler.go
@@ -48,7 +48,8 @@ type crawler struct {
 	// containing every hash it has already encountered.
 	// This avoids multiple attempts to crawl the same hash, and takes a lot of load off the database query
 	// that checks if a hash has already been indexed.
-	ignoreHashes    *ignoreHashes
+	ignoreHashes    *ignoreFilter
+	ignoreNodes     *ignoreFilter
 	blockingManager blocking.Manager
 	// soughtNodeID is a random node ID used as the target for find_node and sample_infohashes requests.
 	// It is rotated every 10 seconds.
@@ -101,18 +102,32 @@ type infoHashWithScrape struct {
 	bfpe bloom.Filter
 }
 
-type ignoreHashes struct {
+type ignoreFilter struct {
 	mutex sync.Mutex
 	bloom *boom.StableBloomFilter
 }
 
-func (i *ignoreHashes) testAndAdd(id protocol.ID) bool {
+func (i *ignoreFilter) testAndAdd(id protocol.ID) bool {
 	i.mutex.Lock()
 	defer i.mutex.Unlock()
 
 	return i.bloom.TestAndAdd(id[:])
 }
 
+func (i *ignoreFilter) test(id protocol.ID) bool {
+	i.mutex.Lock()
+	defer i.mutex.Unlock()
+
+	return i.bloom.Test(id[:])
+}
+
+func (i *ignoreFilter) add(id protocol.ID) {
+	i.mutex.Lock()
+	defer i.mutex.Unlock()
+
+	i.bloom.Add(id[:])
+}
+
 func (c *crawler) rotateSoughtNodeID(ctx context.Context) {
 	for {
 		select {

diff --git a/internal/dhtcrawler/discovered_nodes.go b/internal/dhtcrawler/discovered_nodes.go
@@ -45,16 +45,22 @@ func (c *crawler) runDiscoveredNodes(ctx context.Context) {
 					addrs = append(addrs, p.Addr().Addr())
 				}
 			}
-			// for any discovered node not already in the routing table,
-			// we will block until it can be sent to any one of the pipeline channels.
-			unknownAddrs := c.kTable.FilterKnownAddrs(addrs)
-			for _, addr := range unknownAddrs {
+
+			// For any newly discovered node, we will block until it can be
+			// sent to any one of the pipeline channels.
+			for _, addr := range addrs {
 				p := m[addr.String()]
+
+				if c.ignoreNodes.test(p.ID()) {
+					continue
+				}
+
 				select {
 				case <-ctx.Done():
 					return
 				case c.nodesForFindNode.In() <- p:
 				case c.nodesForSampleInfoHashes.In() <- p:
+					c.ignoreNodes.add(p.ID())
 				case c.nodesForPing.In() <- p:
 				}
 			}

diff --git a/internal/dhtcrawler/factory.go b/internal/dhtcrawler/factory.go
@@ -117,9 +117,12 @@ func New(params Params) Result {
 						savePieces:         params.Config.SavePieces,
 						rescrapeThreshold:  params.Config.RescrapeThreshold,
 						dao:                query,
-						ignoreHashes: &ignoreHashes{
+						ignoreHashes: &ignoreFilter{
 							bloom: boom.NewStableBloomFilter(10_000_000, 2, 0.001),
 						},
+						ignoreNodes: &ignoreFilter{
+							bloom: boom.NewStableBloomFilter(200_000*uint(scalingFactor), 2, 0.001),
+						},
 						blockingManager: blockingManager,
 						soughtNodeID:    &concurrency.AtomicValue[protocol.ID]{},
 						stopped:         make(chan struct{}),

diff --git a/internal/protocol/dht/ktable/mocks/Table.go b/internal/protocol/dht/ktable/mocks/Table.go
diff --git a/internal/protocol/dht/ktable/query.go b/internal/protocol/dht/ktable/query.go
@@ -1,7 +1,6 @@
 package ktable
 
 import (
-	"net/netip"
 	"sort"
 	"time"
 )
@@ -40,24 +39,6 @@ func (c GetOldestPeers) execReturn(t *table) []Node {
 	return peers
 }
 
-var _ Query[[]netip.Addr] = FilterKnownAddrs{}
-
-type FilterKnownAddrs struct {
-	Addrs []netip.Addr
-}
-
-func (c FilterKnownAddrs) execReturn(t *table) []netip.Addr {
-	var unknown []netip.Addr
-
-	for _, addr := range c.Addrs {
-		if _, ok := t.addrs.addrs[addr.String()]; !ok {
-			unknown = append(unknown, addr)
-		}
-	}
-
-	return unknown
-}
-
 var _ Query[[]Node] = GetNodesForSampleInfoHashes{}
 
 type GetNodesForSampleInfoHashes struct {

diff --git a/internal/protocol/dht/ktable/table.go b/internal/protocol/dht/ktable/table.go
@@ -25,7 +25,6 @@ type TableQuery interface {
 	GetClosestNodes(id ID) []Node
 	GetOldestNodes(cutoff time.Time, n int) []Node
 	GetNodesForSampleInfoHashes(n int) []Node
-	FilterKnownAddrs(addrs []netip.Addr) []netip.Addr
 	GetHashOrClosestNodes(id ID) GetHashOrClosestNodesResult
 	// SampleHashesAndNodes returns a random sample of up to 8 hashes and nodes, and the total hashes count.
 	SampleHashesAndNodes() SampleHashesAndNodesResult
@@ -133,15 +132,6 @@ func (t *table) GetNodesForSampleInfoHashes(n int) []Node {
 	}.execReturn(t)
 }
 
-func (t *table) FilterKnownAddrs(addrs []netip.Addr) []netip.Addr {
-	t.mutex.RLock()
-	defer t.mutex.RUnlock()
-
-	return FilterKnownAddrs{
-		Addrs: addrs,
-	}.execReturn(t)
-}
-
 func (t *table) GetHashOrClosestNodes(id ID) GetHashOrClosestNodesResult {
 	t.mutex.RLock()
 	defer t.mutex.RUnlock()