-
Notifications
You must be signed in to change notification settings - Fork 271
Expand file tree
/
Copy pathconfig.toml
More file actions
139 lines (115 loc) · 5.87 KB
/
config.toml
File metadata and controls
139 lines (115 loc) · 5.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#:schema ./config-schema.json
# Enable debug logging to see detailed checking process for each proxy
# Warning: Extremely verbose - logs every connection attempt and failure
debug = false
[output]
# Output directory (Docker overrides this to /app/out via volume mount)
path = "./out"
# Sort proxies by response time (true) or by IP address (false)
# Speed sorting: fastest proxies first. IP sorting: natural order by protocol, IP and port.
sort_by_speed = true
# Plain text output (.txt files)
[output.txt]
enabled = true
# JSON output with metadata (.json files)
[output.json]
enabled = true
# Add ASN (network provider) info to JSON output
# Uses offline database
include_asn = true
# Add geolocation data to JSON output
# Uses offline database
include_geolocation = true
[checking]
# URL for checking proxy functionality
# httpbin-compatible: Returns JSON with IP info for ASN/geo data
# plain-text: Returns just IP address for basic connectivity
# Examples:
# "https://httpbin.org/ip" - JSON with "origin" key. Full-featured checking.
# "https://ipv4.icanhazip.com" - Simple IP return. Full-featured checking.
# "https://google.com" - Basic connect/read check only
# "" - Skip checking entirely (scrape only)
check_url = "https://ipv4.icanhazip.com"
# Number of proxies to check simultaneously
# Higher values = faster checking but more RAM/network usage. Lower if you experience crashes or timeouts.
max_concurrent_checks = 512
# Proxy response timeout (seconds)
# Higher = finds more slow proxies but takes longer
# Lower = faster checking but discards slow proxies
timeout = 10.0
connect_timeout = 5.0
# User-Agent header for proxy check requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
[scraping]
# Maximum proxies to collect per source (0 = unlimited)
# When this limit is reached, the source is skipped with a warning to prevent memory issues
max_proxies_per_source = 100000
# Request timeout for fetching proxy sources (seconds)
# Higher values allow slower sources to complete but increase total scraping time
timeout = 10.0
connect_timeout = 5.0
# HTTP(S), SOCKS4, or SOCKS5 proxy used for fetching sources (e.g., "socks5://user:pass@host:port"). Leave empty to disable.
proxy = ""
# User-Agent header for scraping requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
# Proxy sources configuration
[scraping.http]
enabled = true
urls = [
# Local file examples:
# "./my_http_proxies.txt",
# "/home/user/my_http_proxies.txt",
# "C:/Users/user/Desktop/my_http_proxies.txt",
# "file:///home/user/my_http_proxies.txt",
# Advanced configuration examples (with basic auth or custom headers):
# HTTP Basic Auth example:
# { url = "https://some.api/endpoint", basic_auth = { username = "user", password = "password123" } },
# Custom headers example:
# { url = "https://some.api/endpoint", headers = { Authorization = "Bearer YOUR_API_KEY" } },
"https://api.proxyscrape.com/v3/free-proxy-list/get?request=getproxies&protocol=http",
"https://api.proxyscrape.com/v3/free-proxy-list/get?request=getproxies&protocol=https",
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/refs/heads/master/http.txt",
"https://raw.githubusercontent.com/proxifly/free-proxy-list/refs/heads/main/proxies/protocols/http/data.txt",
"https://raw.githubusercontent.com/proxifly/free-proxy-list/refs/heads/main/proxies/protocols/https/data.txt",
"https://raw.githubusercontent.com/roosterkid/openproxylist/refs/heads/main/HTTPS_RAW.txt",
"https://raw.githubusercontent.com/sunny9577/proxy-scraper/refs/heads/master/generated/http_proxies.txt",
]
[scraping.socks4]
enabled = true
urls = [
# Local file examples:
# "./my_socks4_proxies.txt",
# "/home/user/my_socks4_proxies.txt",
# "C:/Users/user/Desktop/my_socks4_proxies.txt",
# "file:///home/user/my_socks4_proxies.txt",
# Advanced configuration examples (with basic auth or custom headers):
# HTTP Basic Auth example:
# { url = "https://some.api/endpoint", basic_auth = { username = "user", password = "password123" } },
# Custom headers example:
# { url = "https://some.api/endpoint", headers = { Authorization = "Bearer YOUR_API_KEY" } },
"https://api.proxyscrape.com/v3/free-proxy-list/get?request=getproxies&protocol=socks4",
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/refs/heads/master/socks4.txt",
"https://raw.githubusercontent.com/proxifly/free-proxy-list/refs/heads/main/proxies/protocols/socks4/data.txt",
"https://raw.githubusercontent.com/roosterkid/openproxylist/refs/heads/main/SOCKS4_RAW.txt",
"https://raw.githubusercontent.com/sunny9577/proxy-scraper/refs/heads/master/generated/socks4_proxies.txt",
]
[scraping.socks5]
enabled = true
urls = [
# Local file examples:
# "./my_socks5_proxies.txt",
# "/home/user/my_socks5_proxies.txt",
# "C:/Users/user/Desktop/my_socks5_proxies.txt",
# "file:///home/user/my_socks5_proxies.txt",
# Advanced configuration examples (with basic auth or custom headers):
# HTTP Basic Auth example:
# { url = "https://some.api/endpoint", basic_auth = { username = "user", password = "password123" } },
# Custom headers example:
# { url = "https://some.api/endpoint", headers = { Authorization = "Bearer YOUR_API_KEY" } },
"https://api.proxyscrape.com/v3/free-proxy-list/get?request=getproxies&protocol=socks5",
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/refs/heads/master/socks5.txt",
"https://raw.githubusercontent.com/hookzof/socks5_list/refs/heads/master/proxy.txt",
"https://raw.githubusercontent.com/proxifly/free-proxy-list/refs/heads/main/proxies/protocols/socks5/data.txt",
"https://raw.githubusercontent.com/roosterkid/openproxylist/refs/heads/main/SOCKS5_RAW.txt",
"https://raw.githubusercontent.com/sunny9577/proxy-scraper/refs/heads/master/generated/socks5_proxies.txt",
]