|
1 | 1 | # Copyright:: Chef Software, Inc. |
2 | 2 | # All Rights Reserved |
| 3 | +# |
| 4 | +# OpenSearch Recipe - Enhanced with Smart Readiness Checking |
| 5 | +# |
| 6 | +# This recipe configures OpenSearch for Chef Server with intelligent |
| 7 | +# startup validation instead of hardcoded sleep timings. The enhancement |
| 8 | +# addresses CHEF-26134 timing issues that emerged with OpenSearch 1.3.20 |
| 9 | +# by implementing proper health checks before security configuration. |
3 | 10 |
|
4 | 11 | MAX_MAP_COUNT = 262_144 |
5 | 12 | cluster_name = if node['previous_run'] && node['previous_run']['opensearch'] && node['previous_run']['opensearch']['cluster_name'] |
|
177 | 184 | retry_delay 1 |
178 | 185 | end |
179 | 186 |
|
180 | | -chef_sleep 10 |
| 187 | +# Smart OpenSearch readiness check instead of hardcoded sleep |
| 188 | +ruby_block 'wait_for_opensearch_ready' do |
| 189 | + block do |
| 190 | + require 'net/http' |
| 191 | + require 'json' |
| 192 | + require 'timeout' |
| 193 | + |
| 194 | + max_attempts = 60 # Maximum 5 minutes (60 * 5 seconds) |
| 195 | + attempt = 0 |
| 196 | + opensearch_ready = false |
| 197 | + opensearch_port = node['private_chef']['opensearch']['port'] || 9200 |
| 198 | + |
| 199 | + Chef::Log.info("Waiting for OpenSearch to become ready on port #{opensearch_port}...") |
| 200 | + |
| 201 | + while attempt < max_attempts && !opensearch_ready |
| 202 | + attempt += 1 |
| 203 | + |
| 204 | + begin |
| 205 | + Timeout::timeout(10) do |
| 206 | + # Check if OpenSearch API is responding |
| 207 | + uri = URI("http://localhost:#{opensearch_port}/") |
| 208 | + http = Net::HTTP.new(uri.host, uri.port) |
| 209 | + http.read_timeout = 5 |
| 210 | + http.open_timeout = 5 |
| 211 | + |
| 212 | + request = Net::HTTP::Get.new(uri) |
| 213 | + response = http.request(request) |
| 214 | + |
| 215 | + if response.code == '200' || response.code == '401' |
| 216 | + # 200 = OK, 401 = Unauthorized but service is running |
| 217 | + begin |
| 218 | + # Additional check: verify cluster health |
| 219 | + health_uri = URI("http://localhost:#{opensearch_port}/_cluster/health") |
| 220 | + health_request = Net::HTTP::Get.new(health_uri) |
| 221 | + health_response = http.request(health_request) |
| 222 | + |
| 223 | + if health_response.code == '200' |
| 224 | + health_data = JSON.parse(health_response.body) |
| 225 | + cluster_status = health_data['status'] |
| 226 | + |
| 227 | + if ['green', 'yellow'].include?(cluster_status) |
| 228 | + Chef::Log.info("OpenSearch is ready! Cluster status: #{cluster_status}, attempt #{attempt}/#{max_attempts}") |
| 229 | + opensearch_ready = true |
| 230 | + else |
| 231 | + Chef::Log.debug("OpenSearch cluster status is #{cluster_status}, waiting... (attempt #{attempt}/#{max_attempts})") |
| 232 | + end |
| 233 | + elsif health_response.code == '401' |
| 234 | + # Security is enabled but service is responding - we can proceed |
| 235 | + Chef::Log.info("OpenSearch is ready! Security enabled, attempt #{attempt}/#{max_attempts}") |
| 236 | + opensearch_ready = true |
| 237 | + else |
| 238 | + Chef::Log.debug("OpenSearch cluster health check failed with code #{health_response.code}, waiting... (attempt #{attempt}/#{max_attempts})") |
| 239 | + end |
| 240 | + rescue JSON::ParserError, StandardError => e |
| 241 | + Chef::Log.debug("Error checking cluster health: #{e.message}, but basic service is responding") |
| 242 | + # If health check fails but basic service responds, consider it ready |
| 243 | + opensearch_ready = true |
| 244 | + end |
| 245 | + else |
| 246 | + Chef::Log.debug("OpenSearch not ready, HTTP response code: #{response.code} (attempt #{attempt}/#{max_attempts})") |
| 247 | + end |
| 248 | + end |
| 249 | + rescue Timeout::Error |
| 250 | + Chef::Log.debug("Timeout connecting to OpenSearch (attempt #{attempt}/#{max_attempts})") |
| 251 | + rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e |
| 252 | + Chef::Log.debug("Connection failed to OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})") |
| 253 | + rescue StandardError => e |
| 254 | + Chef::Log.debug("Unexpected error checking OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})") |
| 255 | + end |
| 256 | + |
| 257 | + unless opensearch_ready |
| 258 | + if attempt < max_attempts |
| 259 | + Chef::Log.debug("OpenSearch not ready, waiting 5 seconds before retry...") |
| 260 | + sleep 5 |
| 261 | + else |
| 262 | + raise "OpenSearch failed to become ready after #{max_attempts * 5} seconds. Please check OpenSearch logs." |
| 263 | + end |
| 264 | + end |
| 265 | + end |
| 266 | + |
| 267 | + Chef::Log.info("OpenSearch readiness verified successfully!") |
| 268 | + end |
| 269 | + action :run |
| 270 | +end |
181 | 271 |
|
182 | 272 | execute 'add internal user to opensearch security plugin' do |
183 | 273 | command 'export JAVA_HOME="/opt/opscode/embedded/open-jre/"; ./securityadmin.sh -f ../securityconfig/internal_users.yml -icl -nhnv -cert /opt/opscode/embedded/opensearch/config/admin.pem -cacert /opt/opscode/embedded/opensearch/config/root-ca.pem -key /opt/opscode/embedded/opensearch/config/admin-key.pem' |
184 | 274 | cwd '/opt/opscode/embedded/opensearch/plugins/opensearch-security/tools/' |
185 | 275 | user OmnibusHelper.new(node).ownership['owner'] |
186 | | - retries 10 |
187 | | - retry_delay 1 |
| 276 | + retries 5 |
| 277 | + retry_delay 15 |
188 | 278 | end |
189 | 279 |
|
190 | 280 | include_recipe 'infra-server::opensearch_index' |
0 commit comments