Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Copyright:: Chef Software, Inc.
# All Rights Reserved
#
# OpenSearch Recipe - Enhanced with Smart Readiness Checking
#
# This recipe configures OpenSearch for Chef Server with intelligent
# startup validation instead of hardcoded sleep timings. The enhancement
# addresses CHEF-26134 timing issues that emerged with OpenSearch 1.3.20
# by implementing proper health checks before security configuration.

MAX_MAP_COUNT = 262_144
cluster_name = if node['previous_run'] && node['previous_run']['opensearch'] && node['previous_run']['opensearch']['cluster_name']
Expand Down Expand Up @@ -177,14 +184,101 @@
retry_delay 1
end

chef_sleep 10
# Smart OpenSearch readiness check instead of hardcoded sleep
ruby_block 'wait_for_opensearch_ready' do
block do
require 'net/http'
require 'json'
require 'timeout'

max_attempts = 60 # Maximum 5 minutes (60 * 5 seconds)
attempt = 0
opensearch_ready = false
opensearch_port = node['private_chef']['opensearch']['port'] || 9200

Chef::Log.info("Waiting for OpenSearch to become ready on port #{opensearch_port}...")

while attempt < max_attempts && !opensearch_ready
attempt += 1

begin
Timeout.timeout(10) do
# Check if OpenSearch API is responding
uri = URI("http://localhost:#{opensearch_port}/")
http = Net::HTTP.new(uri.host, uri.port)
http.read_timeout = 5
http.open_timeout = 5

request = Net::HTTP::Get.new(uri)
response = http.request(request)

if response.code == '200' || response.code == '401'
# 200 = OK, 401 = Unauthorized but service is running
begin
# Additional check: verify cluster health
health_uri = URI("http://localhost:#{opensearch_port}/_cluster/health")
health_request = Net::HTTP::Get.new(health_uri)
health_response = http.request(health_request)

if health_response.code == '200'
health_data = JSON.parse(health_response.body)
cluster_status = health_data['status']

if %w(green yellow).include?(cluster_status)
Chef::Log.info("OpenSearch is ready! Cluster status: #{cluster_status}, attempt #{attempt}/#{max_attempts}")
opensearch_ready = true
else
Chef::Log.debug("OpenSearch cluster status is #{cluster_status}, waiting... (attempt #{attempt}/#{max_attempts})")
end
elsif health_response.code == '401'
# Security is enabled but service is responding - we can proceed
Chef::Log.info("OpenSearch is ready! Security enabled, attempt #{attempt}/#{max_attempts}")
opensearch_ready = true
else
Chef::Log.debug("OpenSearch cluster health check failed with code #{health_response.code}, waiting... (attempt #{attempt}/#{max_attempts})")
end
rescue JSON::ParserError => e
Chef::Log.debug("Error checking cluster health: #{e.message}, but basic service is responding")
# If health check fails but basic service responds, consider it ready
opensearch_ready = true
rescue StandardError => e
Chef::Log.debug("Error checking cluster health: #{e.message}, but basic service is responding")
# If health check fails but basic service responds, consider it ready
opensearch_ready = true
end
else
Chef::Log.debug("OpenSearch not ready, HTTP response code: #{response.code} (attempt #{attempt}/#{max_attempts})")
end
end
rescue Timeout::Error
Chef::Log.debug("Timeout connecting to OpenSearch (attempt #{attempt}/#{max_attempts})")
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
Chef::Log.debug("Connection failed to OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})")
rescue StandardError => e
Chef::Log.debug("Unexpected error checking OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})")
end

unless opensearch_ready
if attempt < max_attempts
Chef::Log.debug('OpenSearch not ready, waiting 5 seconds before retry...')
sleep 5
else
raise "OpenSearch failed to become ready after #{max_attempts * 5} seconds. Please check OpenSearch logs."
end
end
end

Chef::Log.info('OpenSearch readiness verified successfully!')
end
action :run
end

execute 'add internal user to opensearch security plugin' do
command 'export JAVA_HOME="/opt/opscode/embedded/open-jre/"; ./securityadmin.sh -f ../securityconfig/internal_users.yml -icl -nhnv -cert /opt/opscode/embedded/opensearch/config/admin.pem -cacert /opt/opscode/embedded/opensearch/config/root-ca.pem -key /opt/opscode/embedded/opensearch/config/admin-key.pem'
cwd '/opt/opscode/embedded/opensearch/plugins/opensearch-security/tools/'
user OmnibusHelper.new(node).ownership['owner']
retries 10
retry_delay 1
retries 5
retry_delay 15
end

include_recipe 'infra-server::opensearch_index'
Loading