Skip to content

Commit e82fce4

Browse files
committed
Serialize Amazon stealth requests to reduce memory usage
- Run only one Playwright browser at a time for Amazon URLs - Non-Amazon URLs still run concurrently via aiohttp - Add jitter between sequential Amazon requests (1-3 seconds) - This prevents R14/R15 memory errors on Heroku
1 parent 69d842c commit e82fce4

File tree

1 file changed

+33
-31
lines changed

1 file changed

+33
-31
lines changed

services/price_async.py

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,16 @@ async def fetch_prices_batch(urls: List[str]) -> Dict[str, Optional[float]]:
6161
"""Fetch multiple prices concurrently using asyncio.
6262
6363
Amazon URLs are routed through the stealth extractor when enabled.
64-
Other URLs use standard aiohttp fetching.
64+
Amazon stealth requests run SEQUENTIALLY (one at a time) to reduce memory usage.
65+
Other URLs use standard aiohttp fetching concurrently.
6566
"""
6667
results = {}
6768

6869
# Separate Amazon URLs from others for different handling
6970
amazon_urls = [url for url in urls if url and _is_amazon_url(url)]
7071
other_urls = [url for url in urls if url and not _is_amazon_url(url)]
7172

72-
# We'll use a semaphore to limit concurrency
73+
# We'll use a semaphore to limit concurrency for standard requests
7374
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
7475

7576
async def fetch_one_standard(url: str):
@@ -79,54 +80,55 @@ async def fetch_one_standard(url: str):
7980
await asyncio.sleep(random.uniform(0.1, 1.0))
8081
price = await _fetch_price_async_standard(url)
8182
return url, price
82-
83-
async def fetch_one_amazon(url: str, identity, manager):
84-
"""Fetch Amazon URL using stealth extractor."""
85-
async with semaphore:
86-
# Add more jitter for Amazon to avoid rate limits
87-
await asyncio.sleep(random.uniform(0.5, 2.0))
88-
price = await _fetch_amazon_stealth(url, identity, manager)
89-
return url, price
9083

9184
try:
92-
# Handle non-Amazon URLs with standard fetching
85+
# Handle non-Amazon URLs with standard fetching (concurrent)
9386
standard_tasks = [fetch_one_standard(url) for url in other_urls]
9487

95-
# Handle Amazon URLs with stealth extraction if enabled
96-
amazon_tasks = []
88+
if standard_tasks:
89+
completed = await asyncio.gather(*standard_tasks, return_exceptions=True)
90+
for result in completed:
91+
if isinstance(result, Exception):
92+
logger.error(f"Async batch error: {result}")
93+
continue
94+
if isinstance(result, tuple) and len(result) == 2:
95+
url, price = result
96+
results[url] = price
97+
98+
# Handle Amazon URLs with stealth extraction SEQUENTIALLY (one at a time)
99+
# This prevents memory exhaustion from multiple Playwright browsers
97100
if amazon_urls and AMAZON_STEALTH_ENABLED:
98101
manager = _get_identity_manager()
99102
if manager:
103+
logger.info(f"Processing {len(amazon_urls)} Amazon URLs sequentially via stealth extraction")
100104
for url in amazon_urls:
101105
identity = manager.get_healthy_identity()
102106
if identity:
103-
amazon_tasks.append(fetch_one_amazon(url, identity, manager))
107+
# Add jitter between requests
108+
await asyncio.sleep(random.uniform(1.0, 3.0))
109+
price = await _fetch_amazon_stealth(url, identity, manager)
110+
results[url] = price
104111
else:
105112
logger.warning(f"No healthy identity available for {url}")
106-
# Return None for this URL
107113
results[url] = None
108114
else:
109115
logger.warning("IdentityManager not available, skipping Amazon stealth extraction")
110-
# Fall back to standard fetching for Amazon URLs
111-
amazon_tasks = [fetch_one_standard(url) for url in amazon_urls]
116+
# Fall back to standard fetching for Amazon URLs (likely to fail)
117+
for url in amazon_urls:
118+
async with semaphore:
119+
await asyncio.sleep(random.uniform(0.1, 1.0))
120+
price = await _fetch_price_async_standard(url)
121+
results[url] = price
112122
elif amazon_urls:
113123
# Stealth not enabled, use standard fetching (likely to fail)
114124
logger.info("Amazon stealth disabled, using standard fetch for Amazon URLs")
115-
amazon_tasks = [fetch_one_standard(url) for url in amazon_urls]
116-
117-
all_tasks = standard_tasks + amazon_tasks
118-
if not all_tasks:
119-
return results
120-
121-
completed = await asyncio.gather(*all_tasks, return_exceptions=True)
125+
for url in amazon_urls:
126+
async with semaphore:
127+
await asyncio.sleep(random.uniform(0.1, 1.0))
128+
price = await _fetch_price_async_standard(url)
129+
results[url] = price
122130

123-
for result in completed:
124-
if isinstance(result, Exception):
125-
logger.error(f"Async batch error: {result}")
126-
continue
127-
if isinstance(result, tuple) and len(result) == 2:
128-
url, price = result
129-
results[url] = price
131+
return results
130132

131133
except Exception as e:
132134
logger.error(f"Global async batch failure: {e}")

0 commit comments

Comments
 (0)