@@ -61,15 +61,16 @@ async def fetch_prices_batch(urls: List[str]) -> Dict[str, Optional[float]]:
6161 """Fetch multiple prices concurrently using asyncio.
6262
6363 Amazon URLs are routed through the stealth extractor when enabled.
64- Other URLs use standard aiohttp fetching.
64+ Amazon stealth requests run SEQUENTIALLY (one at a time) to reduce memory usage.
65+ Other URLs use standard aiohttp fetching concurrently.
6566 """
6667 results = {}
6768
6869 # Separate Amazon URLs from others for different handling
6970 amazon_urls = [url for url in urls if url and _is_amazon_url (url )]
7071 other_urls = [url for url in urls if url and not _is_amazon_url (url )]
7172
72- # We'll use a semaphore to limit concurrency
73+ # We'll use a semaphore to limit concurrency for standard requests
7374 semaphore = asyncio .Semaphore (MAX_CONCURRENT_REQUESTS )
7475
7576 async def fetch_one_standard (url : str ):
@@ -79,54 +80,55 @@ async def fetch_one_standard(url: str):
7980 await asyncio .sleep (random .uniform (0.1 , 1.0 ))
8081 price = await _fetch_price_async_standard (url )
8182 return url , price
82-
83- async def fetch_one_amazon (url : str , identity , manager ):
84- """Fetch Amazon URL using stealth extractor."""
85- async with semaphore :
86- # Add more jitter for Amazon to avoid rate limits
87- await asyncio .sleep (random .uniform (0.5 , 2.0 ))
88- price = await _fetch_amazon_stealth (url , identity , manager )
89- return url , price
9083
9184 try :
92- # Handle non-Amazon URLs with standard fetching
85+ # Handle non-Amazon URLs with standard fetching (concurrent)
9386 standard_tasks = [fetch_one_standard (url ) for url in other_urls ]
9487
95- # Handle Amazon URLs with stealth extraction if enabled
96- amazon_tasks = []
88+ if standard_tasks :
89+ completed = await asyncio .gather (* standard_tasks , return_exceptions = True )
90+ for result in completed :
91+ if isinstance (result , Exception ):
92+ logger .error (f"Async batch error: { result } " )
93+ continue
94+ if isinstance (result , tuple ) and len (result ) == 2 :
95+ url , price = result
96+ results [url ] = price
97+
98+ # Handle Amazon URLs with stealth extraction SEQUENTIALLY (one at a time)
99+ # This prevents memory exhaustion from multiple Playwright browsers
97100 if amazon_urls and AMAZON_STEALTH_ENABLED :
98101 manager = _get_identity_manager ()
99102 if manager :
103+ logger .info (f"Processing { len (amazon_urls )} Amazon URLs sequentially via stealth extraction" )
100104 for url in amazon_urls :
101105 identity = manager .get_healthy_identity ()
102106 if identity :
103- amazon_tasks .append (fetch_one_amazon (url , identity , manager ))
107+ # Add jitter between requests
108+ await asyncio .sleep (random .uniform (1.0 , 3.0 ))
109+ price = await _fetch_amazon_stealth (url , identity , manager )
110+ results [url ] = price
104111 else :
105112 logger .warning (f"No healthy identity available for { url } " )
106- # Return None for this URL
107113 results [url ] = None
108114 else :
109115 logger .warning ("IdentityManager not available, skipping Amazon stealth extraction" )
110- # Fall back to standard fetching for Amazon URLs
111- amazon_tasks = [fetch_one_standard (url ) for url in amazon_urls ]
116+ # Fall back to standard fetching for Amazon URLs (likely to fail)
117+ for url in amazon_urls :
118+ async with semaphore :
119+ await asyncio .sleep (random .uniform (0.1 , 1.0 ))
120+ price = await _fetch_price_async_standard (url )
121+ results [url ] = price
112122 elif amazon_urls :
113123 # Stealth not enabled, use standard fetching (likely to fail)
114124 logger .info ("Amazon stealth disabled, using standard fetch for Amazon URLs" )
115- amazon_tasks = [fetch_one_standard (url ) for url in amazon_urls ]
116-
117- all_tasks = standard_tasks + amazon_tasks
118- if not all_tasks :
119- return results
120-
121- completed = await asyncio .gather (* all_tasks , return_exceptions = True )
125+ for url in amazon_urls :
126+ async with semaphore :
127+ await asyncio .sleep (random .uniform (0.1 , 1.0 ))
128+ price = await _fetch_price_async_standard (url )
129+ results [url ] = price
122130
123- for result in completed :
124- if isinstance (result , Exception ):
125- logger .error (f"Async batch error: { result } " )
126- continue
127- if isinstance (result , tuple ) and len (result ) == 2 :
128- url , price = result
129- results [url ] = price
131+ return results
130132
131133 except Exception as e :
132134 logger .error (f"Global async batch failure: { e } " )
0 commit comments