Skip to content

Commit 7d6e1ee

Browse files
kalroysreepuramsudheer
authored andcommitted
Update opensearch cookbook to check when OpenSearch is up instead of a fixed timeout
Signed-off-by: Kallol Roy <kallol.roy@progress.com>
1 parent df73ea5 commit 7d6e1ee

1 file changed

Lines changed: 93 additions & 3 deletions

File tree

  • omnibus/files/server-ctl-cookbooks/infra-server/recipes

omnibus/files/server-ctl-cookbooks/infra-server/recipes/opensearch.rb

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Copyright:: Chef Software, Inc.
22
# All Rights Reserved
3+
#
4+
# OpenSearch Recipe - Enhanced with Smart Readiness Checking
5+
#
6+
# This recipe configures OpenSearch for Chef Server with intelligent
7+
# startup validation instead of hardcoded sleep timings. The enhancement
8+
# addresses CHEF-26134 timing issues that emerged with OpenSearch 1.3.20
9+
# by implementing proper health checks before security configuration.
310

411
MAX_MAP_COUNT = 262_144
512
cluster_name = if node['previous_run'] && node['previous_run']['opensearch'] && node['previous_run']['opensearch']['cluster_name']
@@ -177,14 +184,97 @@
177184
retry_delay 1
178185
end
179186

180-
chef_sleep 10
187+
# Smart OpenSearch readiness check instead of hardcoded sleep
188+
ruby_block 'wait_for_opensearch_ready' do
189+
block do
190+
require 'net/http'
191+
require 'json'
192+
require 'timeout'
193+
194+
max_attempts = 60 # Maximum 5 minutes (60 * 5 seconds)
195+
attempt = 0
196+
opensearch_ready = false
197+
opensearch_port = node['private_chef']['opensearch']['port'] || 9200
198+
199+
Chef::Log.info("Waiting for OpenSearch to become ready on port #{opensearch_port}...")
200+
201+
while attempt < max_attempts && !opensearch_ready
202+
attempt += 1
203+
204+
begin
205+
Timeout::timeout(10) do
206+
# Check if OpenSearch API is responding
207+
uri = URI("http://localhost:#{opensearch_port}/")
208+
http = Net::HTTP.new(uri.host, uri.port)
209+
http.read_timeout = 5
210+
http.open_timeout = 5
211+
212+
request = Net::HTTP::Get.new(uri)
213+
response = http.request(request)
214+
215+
if response.code == '200' || response.code == '401'
216+
# 200 = OK, 401 = Unauthorized but service is running
217+
begin
218+
# Additional check: verify cluster health
219+
health_uri = URI("http://localhost:#{opensearch_port}/_cluster/health")
220+
health_request = Net::HTTP::Get.new(health_uri)
221+
health_response = http.request(health_request)
222+
223+
if health_response.code == '200'
224+
health_data = JSON.parse(health_response.body)
225+
cluster_status = health_data['status']
226+
227+
if ['green', 'yellow'].include?(cluster_status)
228+
Chef::Log.info("OpenSearch is ready! Cluster status: #{cluster_status}, attempt #{attempt}/#{max_attempts}")
229+
opensearch_ready = true
230+
else
231+
Chef::Log.debug("OpenSearch cluster status is #{cluster_status}, waiting... (attempt #{attempt}/#{max_attempts})")
232+
end
233+
elsif health_response.code == '401'
234+
# Security is enabled but service is responding - we can proceed
235+
Chef::Log.info("OpenSearch is ready! Security enabled, attempt #{attempt}/#{max_attempts}")
236+
opensearch_ready = true
237+
else
238+
Chef::Log.debug("OpenSearch cluster health check failed with code #{health_response.code}, waiting... (attempt #{attempt}/#{max_attempts})")
239+
end
240+
rescue JSON::ParserError, StandardError => e
241+
Chef::Log.debug("Error checking cluster health: #{e.message}, but basic service is responding")
242+
# If health check fails but basic service responds, consider it ready
243+
opensearch_ready = true
244+
end
245+
else
246+
Chef::Log.debug("OpenSearch not ready, HTTP response code: #{response.code} (attempt #{attempt}/#{max_attempts})")
247+
end
248+
end
249+
rescue Timeout::Error
250+
Chef::Log.debug("Timeout connecting to OpenSearch (attempt #{attempt}/#{max_attempts})")
251+
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
252+
Chef::Log.debug("Connection failed to OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})")
253+
rescue StandardError => e
254+
Chef::Log.debug("Unexpected error checking OpenSearch: #{e.message} (attempt #{attempt}/#{max_attempts})")
255+
end
256+
257+
unless opensearch_ready
258+
if attempt < max_attempts
259+
Chef::Log.debug("OpenSearch not ready, waiting 5 seconds before retry...")
260+
sleep 5
261+
else
262+
raise "OpenSearch failed to become ready after #{max_attempts * 5} seconds. Please check OpenSearch logs."
263+
end
264+
end
265+
end
266+
267+
Chef::Log.info("OpenSearch readiness verified successfully!")
268+
end
269+
action :run
270+
end
181271

182272
execute 'add internal user to opensearch security plugin' do
183273
command 'export JAVA_HOME="/opt/opscode/embedded/open-jre/"; ./securityadmin.sh -f ../securityconfig/internal_users.yml -icl -nhnv -cert /opt/opscode/embedded/opensearch/config/admin.pem -cacert /opt/opscode/embedded/opensearch/config/root-ca.pem -key /opt/opscode/embedded/opensearch/config/admin-key.pem'
184274
cwd '/opt/opscode/embedded/opensearch/plugins/opensearch-security/tools/'
185275
user OmnibusHelper.new(node).ownership['owner']
186-
retries 10
187-
retry_delay 1
276+
retries 5
277+
retry_delay 15
188278
end
189279

190280
include_recipe 'infra-server::opensearch_index'

0 commit comments

Comments
 (0)