diff --git a/components-rs/ddtrace.h b/components-rs/ddtrace.h index df9d034a8d..aa5f209afb 100644 --- a/components-rs/ddtrace.h +++ b/components-rs/ddtrace.h @@ -19,8 +19,6 @@ extern ddog_VecRemoteConfigCapabilities DDTRACE_REMOTE_CONFIG_CAPABILITIES; extern const uint8_t *DDOG_PHP_FUNCTION; -extern struct ddog_SidecarTransport *ddtrace_sidecar; - /** * # Safety * Must be called from a single-threaded context, such as MINIT. diff --git a/components-rs/sidecar.rs b/components-rs/sidecar.rs index 210f0ad9d1..d7e6a68766 100644 --- a/components-rs/sidecar.rs +++ b/components-rs/sidecar.rs @@ -170,10 +170,6 @@ pub extern "C" fn ddog_sidecar_connect_php( MaybeError::None } -#[no_mangle] -#[allow(non_upper_case_globals)] -pub static mut ddtrace_sidecar: *mut SidecarTransport = std::ptr::null_mut(); - #[no_mangle] pub extern "C" fn ddtrace_sidecar_reconnect( transport: &mut Box, diff --git a/ext/auto_flush.c b/ext/auto_flush.c index 90b1912d89..98130d30d2 100644 --- a/ext/auto_flush.c +++ b/ext/auto_flush.c @@ -47,7 +47,7 @@ ZEND_RESULT_CODE ddtrace_flush_tracer(bool force_on_startup, bool collect_cycles char *url = ddtrace_agent_url(); if (get_global_DD_TRACE_SIDECAR_TRACE_SENDER()) { - if (ddtrace_sidecar) { + if (DDTRACE_G(sidecar)) { ddog_SenderParameters parameters = { .tracer_headers_tags = { .container_id = ddtrace_get_container_id(), @@ -59,7 +59,7 @@ ZEND_RESULT_CODE ddtrace_flush_tracer(bool force_on_startup, bool collect_cycles .client_computed_top_level = get_DD_TRACE_STATS_COMPUTATION_ENABLED(), .client_computed_stats = !get_global_DD_APM_TRACING_ENABLED() || get_DD_TRACE_STATS_COMPUTATION_ENABLED(), }, - .transport = ddtrace_sidecar, + .transport = DDTRACE_G(sidecar), .instance_id = ddtrace_sidecar_instance_id, .limit = limit, .n_requests = get_global_DD_TRACE_AGENT_FLUSH_AFTER_N_REQUESTS(), diff --git a/ext/coms.c b/ext/coms.c index f9be2711d7..f413a97ac9 100644 --- a/ext/coms.c +++ b/ext/coms.c @@ -771,10 +771,10 @@ static struct curl_slist *dd_agent_headers_alloc(void) { dd_append_header(&list, "Datadog-Meta-Lang-Interpreter", sapi_module.name, strlen(sapi_module.name)); dd_append_header(&list, "Datadog-Meta-Lang-Version", php_version_rt.ptr, php_version_rt.len); dd_append_header(&list, "Datadog-Meta-Tracer-Version", ZEND_STRL(PHP_DDTRACE_VERSION)); - if (!get_global_DD_APM_TRACING_ENABLED() || (ddtrace_sidecar && get_global_DD_TRACE_STATS_COMPUTATION_ENABLED())) { + if (!get_global_DD_APM_TRACING_ENABLED() || (ddtrace_sidecar_for_signal && get_global_DD_TRACE_STATS_COMPUTATION_ENABLED())) { dd_append_header(&list, "Datadog-Client-Computed-Stats", ZEND_STRL("true")); } - if (ddtrace_sidecar && get_global_DD_TRACE_STATS_COMPUTATION_ENABLED()) { + if (ddtrace_sidecar_for_signal && get_global_DD_TRACE_STATS_COMPUTATION_ENABLED()) { dd_append_header(&list, "Datadog-Client-Computed-Top-Level", ZEND_STRL("true")); } diff --git a/ext/ddtrace.c b/ext/ddtrace.c index 129e7dcd10..3267c80678 100644 --- a/ext/ddtrace.c +++ b/ext/ddtrace.c @@ -409,21 +409,21 @@ static inline void dd_alter_prop(size_t prop_offset, zval *old_value, zval *new_ bool ddtrace_alter_dd_service(zval *old_value, zval *new_value, zend_string *new_str) { dd_alter_prop(XtOffsetOf(ddtrace_span_properties, property_service), old_value, new_value, new_str); if (DDTRACE_G(request_initialized)) { - ddtrace_sidecar_submit_root_span_data_direct(&ddtrace_sidecar, NULL, new_str, get_DD_ENV(), get_DD_VERSION()); + ddtrace_sidecar_submit_root_span_data_direct(&DDTRACE_G(sidecar), NULL, new_str, get_DD_ENV(), get_DD_VERSION()); } return true; } bool ddtrace_alter_dd_env(zval *old_value, zval *new_value, zend_string *new_str) { dd_alter_prop(XtOffsetOf(ddtrace_span_properties, property_env), old_value, new_value, new_str); if (DDTRACE_G(request_initialized)) { - ddtrace_sidecar_submit_root_span_data_direct(&ddtrace_sidecar, NULL, get_DD_SERVICE(), new_str, get_DD_VERSION()); + ddtrace_sidecar_submit_root_span_data_direct(&DDTRACE_G(sidecar), NULL, get_DD_SERVICE(), new_str, get_DD_VERSION()); } return true; } bool ddtrace_alter_dd_version(zval *old_value, zval *new_value, zend_string *new_str) { dd_alter_prop(XtOffsetOf(ddtrace_span_properties, property_version), old_value, new_value, new_str); if (DDTRACE_G(request_initialized)) { - ddtrace_sidecar_submit_root_span_data_direct(&ddtrace_sidecar, NULL, get_DD_SERVICE(), get_DD_ENV(), new_str); + ddtrace_sidecar_submit_root_span_data_direct(&DDTRACE_G(sidecar), NULL, get_DD_SERVICE(), get_DD_ENV(), new_str); } return true; } @@ -697,6 +697,9 @@ static PHP_GSHUTDOWN_FUNCTION(ddtrace) { zend_hash_destroy(&ddtrace_globals->git_metadata); + // Drop the per-thread sidecar transport (thread-lifetime, one per thread). + ddtrace_sidecar_gshutdown(); + tsrm_mutex_free(ddtrace_globals->sidecar_universal_service_tags_mutex); #ifdef CXA_THREAD_ATEXIT_WRAPPER @@ -1594,8 +1597,8 @@ static PHP_MSHUTDOWN_FUNCTION(ddtrace) { ddtrace_coms_mshutdown_proxy_env(); } else /* ! part of the if outside the ifdef */ #endif - if (get_global_DD_TRACE_FORCE_FLUSH_ON_SHUTDOWN() && ddtrace_sidecar) { - ddog_sidecar_flush_traces(&ddtrace_sidecar); + if (get_global_DD_TRACE_FORCE_FLUSH_ON_SHUTDOWN() && DDTRACE_G(sidecar)) { + ddog_sidecar_flush_traces(&DDTRACE_G(sidecar)); } ddtrace_log_mshutdown(); @@ -2809,7 +2812,7 @@ PHP_FUNCTION(DDTrace_dogstatsd_set) { PHP_FUNCTION(DDTrace_are_endpoints_collected) { UNUSED(execute_data); - if (!ddtrace_sidecar || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id)) { + if (!DDTRACE_G(sidecar) || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id)) { RETURN_TRUE; // Skip overhead if unnecessary } @@ -2865,7 +2868,7 @@ PHP_FUNCTION(DDTrace_add_endpoint) { RETURN_FALSE; } - if (!ddtrace_sidecar || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id)) { + if (!DDTRACE_G(sidecar) || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id)) { RETURN_FALSE; } @@ -2890,7 +2893,7 @@ PHP_FUNCTION(DDTrace_flush_endpoints) { UNUSED(execute_data); UNUSED(return_value); - if (!ddtrace_sidecar || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id) || !DDTRACE_G(telemetry_buffer)) { + if (!DDTRACE_G(sidecar) || !ddtrace_sidecar_instance_id || !DDTRACE_G(sidecar_queue_id) || !DDTRACE_G(telemetry_buffer)) { return; } @@ -2902,7 +2905,7 @@ PHP_FUNCTION(DDTrace_flush_endpoints) { ddog_CharSlice env_name = dd_zend_string_to_CharSlice(DDTRACE_G(last_env_name)); ddtrace_ffi_try("Failed flushing endpoint telemetry buffer", - ddog_sidecar_telemetry_filter_flush(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), ddtrace_telemetry_buffer(), ddtrace_telemetry_cache(), service_name, env_name)); + ddog_sidecar_telemetry_filter_flush(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), ddtrace_telemetry_buffer(), ddtrace_telemetry_cache(), service_name, env_name)); } PHP_FUNCTION(dd_trace_send_traces_via_thread) { @@ -3004,7 +3007,7 @@ PHP_FUNCTION(dd_trace_internal_fn) { } } else if (params_count == 1 && FUNCTION_NAME_MATCHES("detect_composer_installed_json")) { ddog_CharSlice path = dd_zend_string_to_CharSlice(Z_STR_P(ZVAL_VARARG_PARAM(params, 0))); - ddtrace_detect_composer_installed_json(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), path); + ddtrace_detect_composer_installed_json(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), path); RETVAL_TRUE; } else if (params_count == 2 && FUNCTION_NAME_MATCHES("mark_integration_loaded")) { zval *name = ZVAL_VARARG_PARAM(params, 0); @@ -3032,24 +3035,24 @@ PHP_FUNCTION(dd_trace_internal_fn) { RETVAL_TRUE; } } else if (FUNCTION_NAME_MATCHES("dump_sidecar")) { - if (!ddtrace_sidecar) { + if (!DDTRACE_G(sidecar)) { RETURN_FALSE; } - ddog_CharSlice slice = ddog_sidecar_dump(&ddtrace_sidecar); + ddog_CharSlice slice = ddog_sidecar_dump(&DDTRACE_G(sidecar)); RETVAL_STRINGL(slice.ptr, slice.len); free((void *) slice.ptr); } else if (FUNCTION_NAME_MATCHES("stats_sidecar")) { - if (!ddtrace_sidecar) { + if (!DDTRACE_G(sidecar)) { RETURN_FALSE; } - ddog_CharSlice slice = ddog_sidecar_stats(&ddtrace_sidecar); + ddog_CharSlice slice = ddog_sidecar_stats(&DDTRACE_G(sidecar)); RETVAL_STRINGL(slice.ptr, slice.len); free((void *) slice.ptr); } else if (FUNCTION_NAME_MATCHES("break_sidecar_connection")) { - if (!ddtrace_sidecar) { + if (!DDTRACE_G(sidecar)) { RETURN_FALSE; } - ddog_sidecar_send_garbage(&ddtrace_sidecar); + ddog_sidecar_send_garbage(&DDTRACE_G(sidecar)); ddtrace_generate_runtime_id(); ddtrace_force_new_instance_id(); RETURN_TRUE; @@ -3079,8 +3082,8 @@ PHP_FUNCTION(dd_trace_internal_fn) { } } else #endif - if (ddtrace_sidecar) { - ddtrace_ffi_try("Failed synchronously flushing traces", ddog_sidecar_flush_traces(&ddtrace_sidecar)); + if (DDTRACE_G(sidecar)) { + ddtrace_ffi_try("Failed synchronously flushing traces", ddog_sidecar_flush_traces(&DDTRACE_G(sidecar))); } RETVAL_TRUE; #ifndef _WIN32 @@ -3214,8 +3217,8 @@ PHP_FUNCTION(dd_trace_synchronous_flush) { } } else #endif - if (ddtrace_sidecar) { - ddtrace_ffi_try("Failed synchronously flushing traces", ddog_sidecar_flush_traces(&ddtrace_sidecar)); + if (DDTRACE_G(sidecar)) { + ddtrace_ffi_try("Failed synchronously flushing traces", ddog_sidecar_flush_traces(&DDTRACE_G(sidecar))); } RETURN_NULL(); } diff --git a/ext/ddtrace.h b/ext/ddtrace.h index d0a4a2cd62..84def7e4f4 100644 --- a/ext/ddtrace.h +++ b/ext/ddtrace.h @@ -149,6 +149,7 @@ ZEND_BEGIN_MODULE_GLOBALS(ddtrace) zend_reference *curl_multi_injecting_spans; char *cgroup_file; + ddog_SidecarTransport *sidecar; ddog_QueueId sidecar_queue_id; MUTEX_T sidecar_universal_service_tags_mutex; ddog_AgentRemoteConfigReader *agent_config_reader; @@ -166,6 +167,7 @@ ZEND_BEGIN_MODULE_GLOBALS(ddtrace) bool request_initialized; HashTable telemetry_spans_created_per_integration; ddog_SidecarActionsBuffer *telemetry_buffer; + ddog_SidecarActionsBuffer *metrics_buffer; bool asm_event_emitted; diff --git a/ext/exception_serialize.c b/ext/exception_serialize.c index 98dcb504a4..2a933f1928 100644 --- a/ext/exception_serialize.c +++ b/ext/exception_serialize.c @@ -384,7 +384,7 @@ static void ddtrace_collect_exception_debug_data(zend_object *exception, zend_ob ddog_add_str_span_meta_str(span, "error.debug_info_captured", "true"); ddog_add_str_span_meta_CharSlice(span, "_dd.debug.error.exception_hash", (ddog_CharSlice){.ptr = exception_hash, .len = hash_len}); - if (!ddog_exception_hash_limiter_inc(ddtrace_sidecar, (uint64_t)exception_long_hash, get_DD_EXCEPTION_REPLAY_CAPTURE_INTERVAL_SECONDS())) { + if (!ddog_exception_hash_limiter_inc(DDTRACE_G(sidecar), (uint64_t)exception_long_hash, get_DD_EXCEPTION_REPLAY_CAPTURE_INTERVAL_SECONDS())) { LOG(TRACE, "Skipping exception replay capture due to hash %.*s already recently hit", hash_len, exception_hash); goto cleanup; } diff --git a/ext/live_debugger.c b/ext/live_debugger.c index c967b0b552..e758ce0cf2 100644 --- a/ext/live_debugger.c +++ b/ext/live_debugger.c @@ -154,7 +154,7 @@ static void dd_probe_resolved(void *data, bool found) { def->probe.status_msg = DDOG_CHARSLICE_C("Method does not exist on the given class"); def->probe.status_exception = DDOG_CHARSLICE_C("METHOD_NOT_FOUND"); } - ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); + ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); } static int64_t dd_init_live_debugger_probe(const ddog_Probe *probe, dd_probe_def *def, zai_hook_begin begin, zai_hook_end end, void (*def_dtor)(void *), size_t dynamic) { @@ -205,14 +205,14 @@ static int64_t dd_init_live_debugger_probe(const ddog_Probe *probe, dd_probe_def def->probe.status_msg = DDOG_CHARSLICE_C("Method does not exist on the given class"); def->probe.status_exception = DDOG_CHARSLICE_C("METHOD_NOT_FOUND"); error: - ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); + ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); def_dtor(def); return -1; } if (def->probe.status != DDOG_PROBE_STATUS_INSTALLED) { def->probe.status = DDOG_PROBE_STATUS_RECEIVED; - ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); + ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); } zend_hash_index_add_new_ptr(&DDTRACE_G(active_rc_hooks), id, def); @@ -222,7 +222,7 @@ static int64_t dd_init_live_debugger_probe(const ddog_Probe *probe, dd_probe_def static void dd_probe_mark_active(dd_probe_def *def) { if (def->probe.status != DDOG_PROBE_STATUS_EMITTING) { def->probe.status = DDOG_PROBE_STATUS_EMITTING; - ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); + ddog_send_debugger_diagnostics(DDTRACE_G(remote_config_state), &DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), &def->probe, ddtrace_nanoseconds_realtime() / 1000000); } } @@ -1654,8 +1654,8 @@ bool ddtrace_alter_dynamic_instrumentation_config(zval *old_value, zval *new_val ddog_set_dynamic_instrumentation_enabled(DDTRACE_G(remote_config_state), enabled); } - if (DDTRACE_G(request_initialized) && ddtrace_sidecar) { - ddog_sidecar_set_request_config(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), enabled ? DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_ENABLED : DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_DISABLED); + if (DDTRACE_G(request_initialized) && DDTRACE_G(sidecar)) { + ddog_sidecar_set_request_config(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), enabled ? DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_ENABLED : DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_DISABLED); } return true; } diff --git a/ext/otel_config.c b/ext/otel_config.c index 43c08c0b46..c03fdd4290 100644 --- a/ext/otel_config.c +++ b/ext/otel_config.c @@ -9,8 +9,8 @@ ZEND_EXTERN_MODULE_GLOBALS(ddtrace); static void report_otel_cfg_telemetry_invalid(const char *otel_cfg, const char *dd_cfg, bool pre_rinit) { - if (!pre_rinit && ddtrace_sidecar && get_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, DDOG_CHARSLICE_C("otel.env.invalid"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + if (!pre_rinit && DDTRACE_G(sidecar) && get_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), DDOG_CHARSLICE_C("otel.env.invalid"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); ddog_SidecarActionsBuffer *buffer = ddtrace_telemetry_buffer(); ddog_CharSlice tags; tags.len = asprintf((char **)&tags.ptr, "config_opentelemetry:%s,config_datadog:%s", otel_cfg, dd_cfg); diff --git a/ext/serializer.c b/ext/serializer.c index 28ab4c9521..b4c6cddc79 100644 --- a/ext/serializer.c +++ b/ext/serializer.c @@ -1362,7 +1362,7 @@ ddog_SpanBytes *ddtrace_serialize_span_to_rust_span(ddtrace_span_data *span, ddo // Trace-level filter: when stats computation is enabled, drop the span from the // entire pipeline (trace sending + stats) if its trace is filtered. - if (ddtrace_sidecar && get_DD_TRACE_STATS_COMPUTATION_ENABLED()) { + if (DDTRACE_G(sidecar) && get_DD_TRACE_STATS_COMPUTATION_ENABLED()) { if (DDTRACE_G(agent_info_reader)) { ddog_apply_agent_info_concentrator_config(DDTRACE_G(agent_info_reader)); } @@ -1534,7 +1534,7 @@ ddog_SpanBytes *ddtrace_serialize_span_to_rust_span(ddtrace_span_data *span, ddo ZEND_HASH_FOREACH_END(); - if (!span_sampling_applied && ddtrace_sidecar && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && ddog_is_agent_info_ready()) { + if (!span_sampling_applied && DDTRACE_G(sidecar) && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && ddog_is_agent_info_ready()) { if (inferred_span) { // Inferred span won't be serialized, so feed it to the concentrator here. ddtrace_span_precomputed inferred_pre; @@ -1779,7 +1779,7 @@ ddog_SpanBytes *ddtrace_serialize_span_to_rust_span(ddtrace_span_data *span, ddo zend_string_release(Z_STR(prop_root_service_as_string)); zend_string_release(Z_STR(prop_service_as_string)); - if (ddtrace_sidecar && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && ddog_is_agent_info_ready()) { + if (DDTRACE_G(sidecar) && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && ddog_is_agent_info_ready()) { ddtrace_feed_span_to_concentrator(span, &pre); } @@ -1804,7 +1804,7 @@ ddog_SpanBytes *ddtrace_serialize_span_to_rust_span(ddtrace_span_data *span, ddo } } - if (ddtrace_sidecar && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && !is_inferred_span) { + if (DDTRACE_G(sidecar) && get_DD_TRACE_STATS_COMPUTATION_ENABLED() && !is_inferred_span) { bool is_top_level_span = !span->parent; if (span->parent) { zval *parent_service = &SPANDATA(span->parent)->property_service; diff --git a/ext/sidecar.c b/ext/sidecar.c index eb4dc299b5..dc432ee097 100644 --- a/ext/sidecar.c +++ b/ext/sidecar.c @@ -29,6 +29,12 @@ ddog_Endpoint *dogstatsd_endpoint; // always set when ddtrace_endpoint is set struct ddog_InstanceId *ddtrace_sidecar_instance_id; static uint8_t dd_sidecar_formatted_session_id[36]; +// Best-effort pointer for the signal handler (SIGTERM/SIGINT). Set to the first +// per-thread connection; never cleared until MSHUTDOWN. Not atomic: concurrent +// shutdown is already a best-effort race for signal handlers, so atomicity of +// the pointer load alone would not prevent the underlying use-after-free. +ddog_SidecarTransport *ddtrace_sidecar_for_signal = NULL; + // Connection mode tracking dd_sidecar_active_mode_t ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_NONE; int32_t ddtrace_sidecar_master_pid = 0; @@ -57,7 +63,7 @@ static void ddtrace_set_non_resettable_sidecar_globals(void) { } } -// Set the globals that must be updated in case of fork +// Build the process-level instance ID (one per PHP process, reset after fork). static void ddtrace_set_resettable_sidecar_globals(void) { uint8_t formatted_run_time_id[36]; ddtrace_format_runtime_id(&formatted_run_time_id); @@ -129,7 +135,7 @@ static void dd_sidecar_post_connect(ddog_SidecarTransport **transport, bool is_f } void ddtrace_sidecar_update_process_tags(void) { - if (!ddtrace_sidecar) { + if (!DDTRACE_G(sidecar)) { return; } @@ -138,7 +144,7 @@ void ddtrace_sidecar_update_process_tags(void) { return; } - ddog_sidecar_session_set_process_tags(&ddtrace_sidecar, process_tags); + ddog_sidecar_session_set_process_tags(&DDTRACE_G(sidecar), process_tags); } static ddog_SidecarTransport *dd_sidecar_connection_factory_ex(bool is_fork); @@ -158,46 +164,18 @@ static void dd_sidecar_on_reconnect(ddog_SidecarTransport *transport) { dd_sidecar_post_connect(&transport, false, logpath); - // update the sidecar connection on all threads on ZTS -#if ZTS - tsrm_mutex_lock(ddtrace_threads_mutex); - - void *TSRMLS_CACHE; // DDTRACE_G() accesses a variable named TSRMLS_CACHE. Make use of variable shadowing in scopes... - ZEND_HASH_FOREACH_PTR(&ddtrace_tls_bases, TSRMLS_CACHE) { -#endif - // We need the lock even on NTS as it might originate from the background sender - tsrm_mutex_lock(DDTRACE_G(sidecar_universal_service_tags_mutex)); - - // when we get disconnected during shutdown - if (DDTRACE_G(sidecar_queue_id) && DDTRACE_G(last_service_name)) { - ddog_CharSlice service_name = dd_zend_string_to_CharSlice(DDTRACE_G(last_service_name)); - ddog_CharSlice env_name = dd_zend_string_to_CharSlice(DDTRACE_G(last_env_name)); - ddog_CharSlice version = dd_zend_string_to_CharSlice(DDTRACE_G(last_version)); - - ddog_DynamicInstrumentationConfigState dynamic_instrumentation_state; -#if ZTS - // With the current architecture of config it's not accessible via the TSRMLS_CACHE and thus may be actually invalid on the current thread. - // This is a known issue and will be fixed with refactor of the module_globals usage of config. The current behaviour is not perfect, but has to be considered acceptable. - if (zai_config_memoized_entries[DDTRACE_CONFIG_DD_DYNAMIC_INSTRUMENTATION_ENABLED].name_index >= 0) { - dynamic_instrumentation_state = get_global_DD_DYNAMIC_INSTRUMENTATION_ENABLED() ? DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_ENABLED : DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_DISABLED; - } else { - dynamic_instrumentation_state = DDOG_DYNAMIC_INSTRUMENTATION_CONFIG_STATE_NOT_SET; - } -#else - dynamic_instrumentation_state = ddtrace_dynamic_instrumentation_state(); -#endif - ddtrace_ffi_try("Failed sending config data", - ddog_sidecar_set_universal_service_tags(&transport, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), service_name, - env_name, version, &DDTRACE_G(active_global_tags), dynamic_instrumentation_state)); - } - - tsrm_mutex_unlock(DDTRACE_G(sidecar_universal_service_tags_mutex)); -#if ZTS - } ZEND_HASH_FOREACH_END(); + tsrm_mutex_lock(DDTRACE_G(sidecar_universal_service_tags_mutex)); - tsrm_mutex_unlock(ddtrace_threads_mutex); -#endif + if (DDTRACE_G(sidecar_queue_id) && DDTRACE_G(last_service_name)) { + ddog_CharSlice service_name = dd_zend_string_to_CharSlice(DDTRACE_G(last_service_name)); + ddog_CharSlice env_name = dd_zend_string_to_CharSlice(DDTRACE_G(last_env_name)); + ddog_CharSlice version = dd_zend_string_to_CharSlice(DDTRACE_G(last_version)); + ddtrace_ffi_try("Failed sending config data", + ddog_sidecar_set_universal_service_tags(&transport, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), service_name, + env_name, version, &DDTRACE_G(active_global_tags), ddtrace_dynamic_instrumentation_state())); + } + tsrm_mutex_unlock(DDTRACE_G(sidecar_universal_service_tags_mutex)); } static ddog_SidecarTransport *dd_sidecar_connect(bool as_worker, bool is_fork) { @@ -277,8 +255,8 @@ static void ddtrace_sidecar_setup_thread_mode(bool appsec_activation, bool appse bool listener_available = ddog_sidecar_is_master_listener_active(ddtrace_sidecar_master_pid); if (is_child_process || listener_available) { - ddtrace_sidecar = dd_sidecar_connect(true, false); - if (ddtrace_sidecar) { + DDTRACE_G(sidecar) = dd_sidecar_connect(true, false); + if (DDTRACE_G(sidecar)) { if (is_child_process) { LOG(INFO, "Worker connected to sidecar master listener (worker PID=%d, master PID=%d)", (int32_t)current_pid, ddtrace_sidecar_master_pid); @@ -287,14 +265,10 @@ static void ddtrace_sidecar_setup_thread_mode(bool appsec_activation, bool appse } if (!is_child_process) { - // listener_available was true but connect failed (e.g. race: socket not yet bound) LOG(WARN, "Failed to connect to own master listener (PID=%d)", (int32_t)current_pid); return; } - // Worker processes must not start their own listener thread - the master listener - // must be started in MINIT (in the master process) so it survives forking. - // If we can't connect, run without the sidecar rather than starting a per-worker thread. LOG(WARN, "Cannot connect to master sidecar listener from worker (child PID=%d, master PID=%d)", (int32_t)current_pid, ddtrace_sidecar_master_pid); return; @@ -310,8 +284,8 @@ static void ddtrace_sidecar_setup_thread_mode(bool appsec_activation, bool appse LOG(INFO, "Started sidecar master listener thread (PID=%d)", ddtrace_sidecar_master_pid); - ddtrace_sidecar = dd_sidecar_connect(true, false); - if (!ddtrace_sidecar) { + DDTRACE_G(sidecar) = dd_sidecar_connect(true, false); + if (!DDTRACE_G(sidecar)) { LOG(WARN, "Failed to connect master process to sidecar"); return; } @@ -418,23 +392,24 @@ void ddtrace_sidecar_setup(bool appsec_activation, bool appsec_config) { if (mode == DD_TRACE_SIDECAR_CONNECTION_MODE_THREAD) { ddtrace_sidecar_setup_thread_mode(appsec_activation, appsec_config); - return; - } - - ddtrace_sidecar = dd_sidecar_connect(false, false); + } else { + DDTRACE_G(sidecar) = dd_sidecar_connect(false, false); - if (!ddtrace_sidecar) { - if (mode == DD_TRACE_SIDECAR_CONNECTION_MODE_AUTO && ddtrace_endpoint) { - LOG(WARN, "Subprocess connection failed, falling back to thread mode"); - ddtrace_sidecar_setup_thread_mode(appsec_activation, appsec_config); - return; + if (!DDTRACE_G(sidecar)) { + if (mode == DD_TRACE_SIDECAR_CONNECTION_MODE_AUTO && ddtrace_endpoint) { + LOG(WARN, "Subprocess connection failed, falling back to thread mode"); + ddtrace_sidecar_setup_thread_mode(appsec_activation, appsec_config); + } else if (ddtrace_endpoint) { + dd_free_endpoints(); + } + } else if (get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + ddtrace_telemetry_first_init(); } + } - if (ddtrace_endpoint) { - dd_free_endpoints(); - } - } else if (get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { - ddtrace_telemetry_first_init(); + // Record the first established connection for best-effort signal-handler use. + if (DDTRACE_G(sidecar) && !ddtrace_sidecar_for_signal) { + ddtrace_sidecar_for_signal = DDTRACE_G(sidecar); } } @@ -465,44 +440,44 @@ void ddtrace_sidecar_handle_fork(void) { ddtrace_force_new_instance_id(); - if (ddtrace_sidecar) { - ddog_sidecar_transport_drop(ddtrace_sidecar); - ddtrace_sidecar = NULL; + // After fork only one thread (the one that called fork) survives, so we only + // need to drop and reconnect the current thread's transport. + if (DDTRACE_G(sidecar)) { + ddog_sidecar_transport_drop(DDTRACE_G(sidecar)); + DDTRACE_G(sidecar) = NULL; } + ddtrace_sidecar_for_signal = NULL; if (ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_THREAD) { ddtrace_ffi_try("Failed clearing inherited listener state", ddog_sidecar_clear_inherited_listener()); - // Try to connect as a worker to parent's listener - ddtrace_sidecar = dd_sidecar_connect(true, true); - if (ddtrace_sidecar) { + DDTRACE_G(sidecar) = dd_sidecar_connect(true, true); + if (DDTRACE_G(sidecar)) { LOG(INFO, "Child process reconnected to parent's sidecar listener after fork (child PID=%d, parent=%d)", (int32_t)getpid(), ddtrace_sidecar_master_pid); - return; - } - - // Parent's listener not available, fall back to starting a new master in this process - LOG(INFO, "Parent's sidecar listener not available after fork (child PID=%d, parent=%d), starting new master", - (int32_t)getpid(), ddtrace_sidecar_master_pid); + } else { + LOG(INFO, "Parent's sidecar listener not available after fork (child PID=%d, parent=%d), starting new master", + (int32_t)getpid(), ddtrace_sidecar_master_pid); - ddtrace_sidecar_master_pid = (int32_t)getpid(); - if (!ddtrace_ffi_try("Failed starting sidecar master listener in child process", - ddog_sidecar_connect_master((int32_t)ddtrace_sidecar_master_pid))) { - if (ddtrace_endpoint) { - dd_free_endpoints(); + ddtrace_sidecar_master_pid = (int32_t)getpid(); + if (!ddtrace_ffi_try("Failed starting sidecar master listener in child process", + ddog_sidecar_connect_master((int32_t)ddtrace_sidecar_master_pid))) { + if (ddtrace_endpoint) { + dd_free_endpoints(); + } + return; } - return; - } - ddtrace_sidecar = dd_sidecar_connect(true, false); - if (!ddtrace_sidecar) { - LOG(WARN, "Failed to connect to new sidecar master in child process (PID=%d)", - (int32_t)getpid()); + DDTRACE_G(sidecar) = dd_sidecar_connect(true, false); + if (!DDTRACE_G(sidecar)) { + LOG(WARN, "Failed to connect to new sidecar master in child process (PID=%d)", + (int32_t)getpid()); + } } } else if (ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_SUBPROCESS) { - ddtrace_sidecar = ddtrace_sidecar_connect(true); - if (!ddtrace_sidecar) { + DDTRACE_G(sidecar) = ddtrace_sidecar_connect(true); + if (!DDTRACE_G(sidecar)) { if (ddtrace_endpoint) { dd_free_endpoints(); } @@ -510,17 +485,28 @@ void ddtrace_sidecar_handle_fork(void) { ddtrace_sidecar_submit_root_span_data(); } } + + if (DDTRACE_G(sidecar)) { + ddtrace_sidecar_for_signal = DDTRACE_G(sidecar); + } #endif } void ddtrace_sidecar_ensure_active(void) { - if (ddtrace_sidecar) { - ddtrace_sidecar_reconnect(&ddtrace_sidecar, ddtrace_sidecar_connect_callback); + if (DDTRACE_G(sidecar)) { + ddtrace_sidecar_reconnect(&DDTRACE_G(sidecar), ddtrace_sidecar_connect_callback); + } else if (ddtrace_endpoint) { + // First RINIT on this thread: the process-level setup already ran (endpoint is + // set), so establish this thread's own connection now. + DDTRACE_G(sidecar) = ddtrace_sidecar_connect(false); + if (DDTRACE_G(sidecar) && !ddtrace_sidecar_for_signal) { + ddtrace_sidecar_for_signal = DDTRACE_G(sidecar); + } } } void ddtrace_sidecar_finalize(bool clear_id) { - if (!ddtrace_sidecar) { + if (!DDTRACE_G(sidecar)) { return; } @@ -535,12 +521,17 @@ void ddtrace_sidecar_finalize(bool clear_id) { if (clear_id) { ddtrace_ffi_try("Failed removing application from sidecar", - ddog_sidecar_application_remove(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &queue_id)); + ddog_sidecar_application_remove(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &queue_id)); } } void ddtrace_sidecar_shutdown(void) { - // Shutdown master listener if this is the master process and thread mode is active + ddtrace_sidecar_for_signal = NULL; + + // In thread mode, drop the main thread's connection before shutting down the + // listener to avoid deadlock. GSHUTDOWN owns transport cleanup for all other + // threads; the main thread's GSHUTDOWN runs after MSHUTDOWN on some SAPIs, + // so we handle it here explicitly for the thread-mode case. #ifdef _WIN32 int32_t current_pid = (int32_t)GetCurrentProcessId(); #else @@ -550,18 +541,16 @@ void ddtrace_sidecar_shutdown(void) { ddtrace_sidecar_master_pid != 0 && current_pid == ddtrace_sidecar_master_pid) { - // Close worker connection first to avoid deadlock - if (ddtrace_sidecar) { - ddog_sidecar_transport_drop(ddtrace_sidecar); - ddtrace_sidecar = NULL; + if (DDTRACE_G(sidecar)) { + ddog_sidecar_transport_drop(DDTRACE_G(sidecar)); + DDTRACE_G(sidecar) = NULL; } - // Then shutdown listener thread ddtrace_ffi_try("Failed shutting down master listener", ddog_sidecar_shutdown_master_listener()); } - // Standard cleanup + // Process-level instance ID (dropped once at MSHUTDOWN, not per-thread). if (ddtrace_sidecar_instance_id) { ddog_sidecar_instanceId_drop(ddtrace_sidecar_instance_id); ddtrace_sidecar_instance_id = NULL; @@ -571,12 +560,6 @@ void ddtrace_sidecar_shutdown(void) { dd_free_endpoints(); } - if (ddtrace_sidecar) { - ddog_sidecar_transport_drop(ddtrace_sidecar); - ddtrace_sidecar = NULL; - } - - // Reset mode ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_NONE; } @@ -671,62 +654,62 @@ void ddtrace_sidecar_push_tags(ddog_Vec_Tag *vec, zval *tags) { } void ddtrace_sidecar_dogstatsd_count(zend_string *metric, zend_long value, zval *tags) { - if (!ddtrace_sidecar || !get_DD_INTEGRATION_METRICS_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_DD_INTEGRATION_METRICS_ENABLED()) { return; } ddog_Vec_Tag vec = ddog_Vec_Tag_new(); ddtrace_sidecar_push_tags(&vec, tags); ddtrace_ffi_try("Failed sending dogstatsd count metric", - ddog_sidecar_dogstatsd_count(&ddtrace_sidecar, ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); + ddog_sidecar_dogstatsd_count(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); ddog_Vec_Tag_drop(vec); } void ddtrace_sidecar_dogstatsd_distribution(zend_string *metric, double value, zval *tags) { - if (!ddtrace_sidecar || !get_DD_INTEGRATION_METRICS_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_DD_INTEGRATION_METRICS_ENABLED()) { return; } ddog_Vec_Tag vec = ddog_Vec_Tag_new(); ddtrace_sidecar_push_tags(&vec, tags); ddtrace_ffi_try("Failed sending dogstatsd distribution metric", - ddog_sidecar_dogstatsd_distribution(&ddtrace_sidecar, ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); + ddog_sidecar_dogstatsd_distribution(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); ddog_Vec_Tag_drop(vec); } void ddtrace_sidecar_dogstatsd_gauge(zend_string *metric, double value, zval *tags) { - if (!ddtrace_sidecar || !get_DD_INTEGRATION_METRICS_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_DD_INTEGRATION_METRICS_ENABLED()) { return; } ddog_Vec_Tag vec = ddog_Vec_Tag_new(); ddtrace_sidecar_push_tags(&vec, tags); ddtrace_ffi_try("Failed sending dogstatsd gauge metric", - ddog_sidecar_dogstatsd_gauge(&ddtrace_sidecar, ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); + ddog_sidecar_dogstatsd_gauge(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); ddog_Vec_Tag_drop(vec); } void ddtrace_sidecar_dogstatsd_histogram(zend_string *metric, double value, zval *tags) { - if (!ddtrace_sidecar || !get_DD_INTEGRATION_METRICS_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_DD_INTEGRATION_METRICS_ENABLED()) { return; } ddog_Vec_Tag vec = ddog_Vec_Tag_new(); ddtrace_sidecar_push_tags(&vec, tags); ddtrace_ffi_try("Failed sending dogstatsd histogram metric", - ddog_sidecar_dogstatsd_histogram(&ddtrace_sidecar, ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); + ddog_sidecar_dogstatsd_histogram(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); ddog_Vec_Tag_drop(vec); } void ddtrace_sidecar_dogstatsd_set(zend_string *metric, zend_long value, zval *tags) { - if (!ddtrace_sidecar || !get_DD_INTEGRATION_METRICS_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_DD_INTEGRATION_METRICS_ENABLED()) { return; } ddog_Vec_Tag vec = ddog_Vec_Tag_new(); ddtrace_sidecar_push_tags(&vec, tags); ddtrace_ffi_try("Failed sending dogstatsd set metric", - ddog_sidecar_dogstatsd_set(&ddtrace_sidecar, ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); + ddog_sidecar_dogstatsd_set(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, dd_zend_string_to_CharSlice(metric), value, &vec)); ddog_Vec_Tag_drop(vec); } @@ -801,7 +784,7 @@ void ddtrace_sidecar_submit_root_span_data_direct(ddog_SidecarTransport **transp } // Force resend on reconnect - if (changed || !root || *transport != ddtrace_sidecar) { + if (changed || !root || *transport != DDTRACE_G(sidecar)) { tsrm_mutex_lock(DDTRACE_G(sidecar_universal_service_tags_mutex)); if (DDTRACE_G(last_service_name)) { zend_string_release(DDTRACE_G(last_service_name)); @@ -836,19 +819,19 @@ void ddtrace_sidecar_submit_root_span_data(void) { if (DDTRACE_G(active_stack)) { ddtrace_root_span_data *root = DDTRACE_G(active_stack)->root_span; if (root) { - ddtrace_sidecar_submit_root_span_data_direct_defaults(&ddtrace_sidecar, root); + ddtrace_sidecar_submit_root_span_data_direct_defaults(&DDTRACE_G(sidecar), root); } } } void ddtrace_sidecar_send_debugger_data(ddog_Vec_DebuggerPayload payloads) { LOGEV(DEBUG, UNUSED(log); ddog_log_debugger_data(&payloads);); - ddog_sidecar_send_debugger_data(&ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), payloads); + ddog_sidecar_send_debugger_data(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), payloads); } void ddtrace_sidecar_send_debugger_datum(ddog_DebuggerPayload *payload) { LOGEV(DEBUG, UNUSED(log); ddog_log_debugger_datum(payload);); - ddog_sidecar_send_debugger_datum(&ddtrace_sidecar, ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), payload); + ddog_sidecar_send_debugger_datum(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, DDTRACE_G(sidecar_queue_id), payload); } void ddtrace_sidecar_activate(void) { @@ -881,19 +864,32 @@ void ddtrace_sidecar_rinit(void) { } } - ddtrace_sidecar_submit_root_span_data_direct_defaults(&ddtrace_sidecar, NULL); + ddtrace_sidecar_submit_root_span_data_direct_defaults(&DDTRACE_G(sidecar), NULL); } void ddtrace_sidecar_rshutdown(void) { ddog_Vec_Tag_drop(DDTRACE_G(active_global_tags)); } +void ddtrace_sidecar_gshutdown(void) { + if (DDTRACE_G(sidecar)) { + if (DDTRACE_G(sidecar) == ddtrace_sidecar_for_signal) { + ddtrace_sidecar_for_signal = NULL; + } + + // Drain any accumulated background-sender metrics before the transport goes away. + ddtrace_telemetry_flush_bgs_metrics_final(); + ddog_sidecar_transport_drop(DDTRACE_G(sidecar)); + DDTRACE_G(sidecar) = NULL; + } +} + bool ddtrace_alter_test_session_token(zval *old_value, zval *new_value, zend_string *new_str) { UNUSED(old_value, new_str); - if (ddtrace_sidecar) { + if (DDTRACE_G(sidecar)) { ddog_endpoint_set_test_token(ddtrace_endpoint, dd_zend_string_to_CharSlice(Z_STR_P(new_value))); ddtrace_ffi_try("Failed updating test session token", - ddog_sidecar_set_test_session_token(&ddtrace_sidecar, dd_zend_string_to_CharSlice(Z_STR_P(new_value)))); + ddog_sidecar_set_test_session_token(&DDTRACE_G(sidecar), dd_zend_string_to_CharSlice(Z_STR_P(new_value)))); } #ifndef _WIN32 ddtrace_coms_set_test_session_token(Z_STRVAL_P(new_value), Z_STRLEN_P(new_value)); @@ -902,7 +898,7 @@ bool ddtrace_alter_test_session_token(zval *old_value, zval *new_value, zend_str } bool ddtrace_exception_debugging_is_active(void) { - return ddtrace_sidecar && ddtrace_sidecar_instance_id && get_DD_EXCEPTION_REPLAY_ENABLED(); + return DDTRACE_G(sidecar) && ddtrace_sidecar_instance_id && get_DD_EXCEPTION_REPLAY_ENABLED(); } ddog_crasht_Metadata ddtrace_setup_crashtracking_metadata(ddog_Vec_Tag *tags) { diff --git a/ext/sidecar.h b/ext/sidecar.h index b18bea433e..610f1d5e77 100644 --- a/ext/sidecar.h +++ b/ext/sidecar.h @@ -14,9 +14,13 @@ typedef enum { DD_SIDECAR_CONNECTION_THREAD = 2 } dd_sidecar_active_mode_t; -extern ddog_SidecarTransport *ddtrace_sidecar; -extern ddog_Endpoint *ddtrace_endpoint; +// ddtrace_sidecar_instance_id is a process global — one identity per PHP process. extern struct ddog_InstanceId *ddtrace_sidecar_instance_id; +// Best-effort pointer used only by the signal handler (SIGTERM/SIGINT), which cannot call +// TSRMLS_FETCH() safely. Set to the first thread's connection; never cleared until MSHUTDOWN. +// Not atomic: concurrent shutdown is a pre-existing best-effort race for signal handlers. +extern ddog_SidecarTransport *ddtrace_sidecar_for_signal; +extern ddog_Endpoint *ddtrace_endpoint; extern dd_sidecar_active_mode_t ddtrace_sidecar_active_mode; extern int32_t ddtrace_sidecar_master_pid; @@ -56,6 +60,7 @@ void ddtrace_sidecar_send_debugger_datum(ddog_DebuggerPayload *payload); void ddtrace_sidecar_activate(void); void ddtrace_sidecar_rinit(void); void ddtrace_sidecar_rshutdown(void); +void ddtrace_sidecar_gshutdown(void); void ddtrace_sidecar_dogstatsd_count(zend_string *metric, zend_long value, zval *tags); void ddtrace_sidecar_dogstatsd_distribution(zend_string *metric, double value, zval *tags); diff --git a/ext/signals.c b/ext/signals.c index 0373a696d5..09cf043055 100644 --- a/ext/signals.c +++ b/ext/signals.c @@ -363,7 +363,7 @@ static int dd_call_prev_handler(bool flush) { } if (flush) { - ddog_sidecar_flush_traces(&ddtrace_sidecar); + ddog_sidecar_flush_traces(&ddtrace_sidecar_for_signal); } if (prev_handler == SIG_DFL) { @@ -403,7 +403,7 @@ static void dd_sigint_sigterm_handler(int sig, siginfo_t *si, void *uc) { memcpy(&dd_signal_data.si, si, sizeof(*si)); dd_signal_data.uc = uc; - if (ddtrace_sidecar) { + if (ddtrace_sidecar_for_signal) { // Spawn a thread using clone() to perform sidecar cleanup asynchronously to avoid async unsafeness in the signal handler void *stack_top = dd_signal_async_stack + dd_signal_async_stack_size; int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD; diff --git a/ext/span_stats.c b/ext/span_stats.c index 0344992d41..e3290aff41 100644 --- a/ext/span_stats.c +++ b/ext/span_stats.c @@ -397,7 +397,7 @@ void ddtrace_feed_span_to_concentrator(ddtrace_span_data *span, const ddtrace_sp ddtrace_concentrator_cb_data data = { .span = span, .pre = pre, .needs_ipc = false }; ddog_span_concentrator_with(env_slice, version_slice, service_slice, ddtrace_span_concentrator_feed_cb, &data); - if (data.needs_ipc && ddtrace_sidecar) { - ddog_sidecar_add_php_span_to_concentrator(&ddtrace_sidecar, env_slice, version_slice, &data.ipc_stats); + if (data.needs_ipc && DDTRACE_G(sidecar)) { + ddog_sidecar_add_php_span_to_concentrator(&DDTRACE_G(sidecar), env_slice, version_slice, &data.ipc_stats); } } diff --git a/ext/telemetry.c b/ext/telemetry.c index ba2f3cdd08..4bc4f0ee41 100644 --- a/ext/telemetry.c +++ b/ext/telemetry.c @@ -1,5 +1,10 @@ #include "components-rs/sidecar.h" #include "ddtrace.h" +#ifndef _WIN32 +#include +#else +#include +#endif #include "configuration.h" #include "integrations/integrations.h" #include @@ -76,8 +81,8 @@ static bool dd_check_for_composer_autoloader(zend_ulong invocation, zend_execute UNUSED(invocation, auxiliary, dynamic); ddog_CharSlice composer_path = dd_zend_string_to_CharSlice(execute_data->func->op_array.filename); - if (!ddtrace_sidecar // if sidecar connection was broken, let's skip immediately - || ddtrace_detect_composer_installed_json(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), composer_path)) { + if (!DDTRACE_G(sidecar) // if sidecar connection was broken, let's skip immediately + || ddtrace_detect_composer_installed_json(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), composer_path)) { zai_hook_remove((zai_str)ZAI_STR_EMPTY, (zai_str)ZAI_STR_EMPTY, dd_composer_hook_id); } return true; @@ -119,12 +124,12 @@ void ddtrace_telemetry_register_services(ddog_SidecarTransport **sidecar) { } void ddtrace_telemetry_lifecycle_end() { - if (!ddtrace_sidecar || !get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + if (!DDTRACE_G(sidecar) || !get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { return; } ddtrace_ffi_try("Failed ending sidecar lifecycle", - ddog_sidecar_lifecycle_end(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id))); + ddog_sidecar_lifecycle_end(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id))); } void ddtrace_telemetry_finalize() { @@ -219,7 +224,7 @@ void ddtrace_telemetry_finalize() { // Telemetry metrics ddog_CharSlice metric_name = DDOG_CHARSLICE_C("spans_created"); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, metric_name, DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), metric_name, DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); zend_string *integration_name; zval *metric_value; ZEND_HASH_FOREACH_STR_KEY_VAL(&DDTRACE_G(telemetry_spans_created_per_integration), integration_name, metric_value) { @@ -228,18 +233,18 @@ void ddtrace_telemetry_finalize() { zai_string_destroy(&tags); } ZEND_HASH_FOREACH_END(); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, DDOG_CHARSLICE_C("context_header_style.extracted"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), DDOG_CHARSLICE_C("context_header_style.extracted"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("context_header_style.extracted"), DDTRACE_G(baggage_extract_count), DDOG_CHARSLICE_C("header_style:baggage")); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, DDOG_CHARSLICE_C("context_header_style.injected"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), DDOG_CHARSLICE_C("context_header_style.injected"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("context_header_style.injected"), DDTRACE_G(baggage_inject_count), DDOG_CHARSLICE_C("header_style:baggage")); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, DDOG_CHARSLICE_C("context_header.truncated"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), DDOG_CHARSLICE_C("context_header.truncated"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("context_header.truncated"), DDTRACE_G(baggage_max_item_count), DDOG_CHARSLICE_C("truncation_reason:baggage_byte_item_exceeded")); ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("context_header.truncated"), DDTRACE_G(baggage_max_byte_count), DDOG_CHARSLICE_C("truncation_reason:baggage_byte_count_exceeded")); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, DDOG_CHARSLICE_C("context_header_style.malformed"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), DDOG_CHARSLICE_C("context_header_style.malformed"), DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_TRACERS); ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("context_header_style.malformed"), DDTRACE_G(baggage_malformed_count), DDOG_CHARSLICE_C("header_style:baggage")); metric_name = DDOG_CHARSLICE_C("logs_created"); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, metric_name, DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_GENERAL); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), metric_name, DDOG_METRIC_TYPE_COUNT, DDOG_METRIC_NAMESPACE_GENERAL); static struct { ddog_CharSlice level; ddog_CharSlice tags; @@ -260,10 +265,12 @@ void ddtrace_telemetry_finalize() { dd_commit_metrics(); ddtrace_ffi_try("Failed flushing filtered telemetry buffer", - ddog_sidecar_telemetry_filter_flush(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), buffer, ddtrace_telemetry_cache(), service_name, env_name)); + ddog_sidecar_telemetry_filter_flush(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), buffer, ddtrace_telemetry_cache(), service_name, env_name)); ddog_sidecar_telemetry_buffer_drop(buffer); + // Flush any accumulated BGS (background sender) metrics if enough time has passed. + ddtrace_telemetry_flush_bgs_metrics_if_due(); } void ddtrace_telemetry_notify_integration(const char *name, size_t name_len) { @@ -271,7 +278,7 @@ void ddtrace_telemetry_notify_integration(const char *name, size_t name_len) { } void ddtrace_telemetry_notify_integration_version(const char *name, size_t name_len, const char *version, size_t version_len) { - if (ddtrace_sidecar && get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + if (DDTRACE_G(sidecar) && get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { ddog_CharSlice integration = (ddog_CharSlice) {.len = name_len, .ptr = name}; ddog_CharSlice ver = (ddog_CharSlice) {.len = version_len, .ptr = version}; ddog_sidecar_telemetry_addIntegration_buffer(ddtrace_telemetry_buffer(), integration, ver, true); @@ -308,74 +315,126 @@ void ddtrace_telemetry_inc_spans_created(ddtrace_span_data *span) { zend_string_release(integration); } +// Process-global atomic accumulators for background-sender metrics. +// Written by the BGS thread (coms.c) without any lock; drained by a PHP request +// thread in ddtrace_telemetry_flush_bgs_metrics_if_due(). +static _Atomic(int) bgs_metric_requests = 0; +static _Atomic(int) bgs_metric_responses_1xx = 0; +static _Atomic(int) bgs_metric_responses_2xx = 0; +static _Atomic(int) bgs_metric_responses_3xx = 0; +static _Atomic(int) bgs_metric_responses_4xx = 0; +static _Atomic(int) bgs_metric_responses_5xx = 0; +static _Atomic(int) bgs_metric_errors_timeout = 0; +static _Atomic(int) bgs_metric_errors_network = 0; +static _Atomic(int) bgs_metric_errors_status_code = 0; +// Timestamp (nanoseconds) of the last flush; used to rate-limit to one flush per interval. +static _Atomic(uint64_t) bgs_metrics_last_flush_ns = 0; + void ddtrace_telemetry_send_trace_api_metrics(trace_api_metrics metrics) { - if (!ddtrace_sidecar || !get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + // Pure atomic accumulation — never touches the sidecar. + if (!metrics.requests) { return; } + atomic_fetch_add(&bgs_metric_requests, metrics.requests); + atomic_fetch_add(&bgs_metric_responses_1xx, metrics.responses_1xx); + atomic_fetch_add(&bgs_metric_responses_2xx, metrics.responses_2xx); + atomic_fetch_add(&bgs_metric_responses_3xx, metrics.responses_3xx); + atomic_fetch_add(&bgs_metric_responses_4xx, metrics.responses_4xx); + atomic_fetch_add(&bgs_metric_responses_5xx, metrics.responses_5xx); + atomic_fetch_add(&bgs_metric_errors_timeout, metrics.errors_timeout); + atomic_fetch_add(&bgs_metric_errors_network, metrics.errors_network); + atomic_fetch_add(&bgs_metric_errors_status_code, metrics.errors_status_code); +} - if (!metrics.requests) { +void ddtrace_telemetry_flush_bgs_metrics_if_due(void) { + if (!DDTRACE_G(sidecar) || !get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + return; + } + + // Rate-limit: flush at most once per agent flush interval. + uint64_t now_ns = ddtrace_nanoseconds_realtime(); + uint64_t last = atomic_load(&bgs_metrics_last_flush_ns); + uint64_t interval_ns = (uint64_t)get_global_DD_TRACE_AGENT_FLUSH_INTERVAL() * 1000000ULL; + if (now_ns - last < interval_ns) { + return; + } + // CAS ensures only one thread flushes per interval. + if (!atomic_compare_exchange_strong(&bgs_metrics_last_flush_ns, &last, now_ns)) { + return; + } + + int requests = atomic_exchange(&bgs_metric_requests, 0); + if (!requests) { return; } ddog_SidecarActionsBuffer *buffer = ddog_sidecar_telemetry_buffer_alloc(); - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.requests"), (double)metrics.requests, DDOG_CHARSLICE_C("")); + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.requests"), requests, DDOG_CHARSLICE_C("")); - if (metrics.responses_1xx) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), (double)metrics.responses_1xx, DDOG_CHARSLICE_C("status_code:1xx")); + int v; + if ((v = atomic_exchange(&bgs_metric_responses_1xx, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), v, DDOG_CHARSLICE_C("status_code:1xx")); } - if (metrics.responses_2xx) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), (double)metrics.responses_2xx, DDOG_CHARSLICE_C("status_code:2xx")); + if ((v = atomic_exchange(&bgs_metric_responses_2xx, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), v, DDOG_CHARSLICE_C("status_code:2xx")); } - if (metrics.responses_3xx) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), (double)metrics.responses_3xx, DDOG_CHARSLICE_C("status_code:3xx")); + if ((v = atomic_exchange(&bgs_metric_responses_3xx, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), v, DDOG_CHARSLICE_C("status_code:3xx")); } - if (metrics.responses_4xx) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), (double)metrics.responses_4xx, DDOG_CHARSLICE_C("status_code:4xx")); + if ((v = atomic_exchange(&bgs_metric_responses_4xx, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), v, DDOG_CHARSLICE_C("status_code:4xx")); } - if (metrics.responses_5xx) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), (double)metrics.responses_5xx, DDOG_CHARSLICE_C("status_code:5xx")); + if ((v = atomic_exchange(&bgs_metric_responses_5xx, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.responses"), v, DDOG_CHARSLICE_C("status_code:5xx")); } - - if (metrics.errors_timeout) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), (double)metrics.errors_timeout, DDOG_CHARSLICE_C("type:timeout")); + if ((v = atomic_exchange(&bgs_metric_errors_timeout, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), v, DDOG_CHARSLICE_C("type:timeout")); } - if (metrics.errors_network) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), (double)metrics.errors_network, DDOG_CHARSLICE_C("type:network")); + if ((v = atomic_exchange(&bgs_metric_errors_network, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), v, DDOG_CHARSLICE_C("type:network")); } - if (metrics.errors_status_code) { - ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), (double)metrics.errors_status_code, DDOG_CHARSLICE_C("type:status_code")); + if ((v = atomic_exchange(&bgs_metric_errors_status_code, 0))) { + ddog_sidecar_telemetry_add_span_metric_point_buffer(buffer, DDOG_CHARSLICE_C("trace_api.errors"), v, DDOG_CHARSLICE_C("type:status_code")); } ddtrace_ffi_try("Failed flushing background sender metrics", - ddog_sidecar_telemetry_buffer_flush(&ddtrace_sidecar, ddtrace_sidecar_instance_id, &dd_bgs_queued_id, buffer)); + ddog_sidecar_telemetry_buffer_flush(&DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &dd_bgs_queued_id, buffer)); } -ZEND_TLS ddog_SidecarActionsBuffer *metrics_buffer; +void ddtrace_telemetry_flush_bgs_metrics_final(void) { + if (!DDTRACE_G(sidecar) || !get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED()) { + return; + } + // Bypass the time gate so any remaining metrics are sent before the transport + // is dropped in GSHUTDOWN. Setting last_flush_ns to 0 makes the time check in + // _if_due always pass; the CAS inside still prevents a concurrent double-flush. + atomic_store(&bgs_metrics_last_flush_ns, 0); + ddtrace_telemetry_flush_bgs_metrics_if_due(); +} -DDTRACE_PUBLIC void ddtrace_metric_register_buffer(zend_string *name, ddog_MetricType type, ddog_MetricNamespace ns) -{ - if (!ddtrace_sidecar) { +DDTRACE_PUBLIC void ddtrace_metric_register_buffer(zend_string *name, ddog_MetricType type, ddog_MetricNamespace ns) { + if (!DDTRACE_G(sidecar)) { return; } ddog_CharSlice metric_name = dd_zend_string_to_CharSlice(name); - ddog_sidecar_telemetry_register_metric(&ddtrace_sidecar, metric_name, type, ns); + ddog_sidecar_telemetry_register_metric(&DDTRACE_G(sidecar), metric_name, type, ns); } DDTRACE_PUBLIC bool ddtrace_metric_add_point(zend_string *name, double value, zend_string *tags) { - if (!metrics_buffer) { - metrics_buffer = ddog_sidecar_telemetry_buffer_alloc(); + if (!DDTRACE_G(metrics_buffer)) { + DDTRACE_G(metrics_buffer) = ddog_sidecar_telemetry_buffer_alloc(); } ddog_CharSlice metric_name = dd_zend_string_to_CharSlice(name); ddog_CharSlice tags_slice = dd_zend_string_to_CharSlice(tags); - ddog_sidecar_telemetry_add_span_metric_point_buffer(metrics_buffer, metric_name, value, tags_slice); + ddog_sidecar_telemetry_add_span_metric_point_buffer(DDTRACE_G(metrics_buffer), metric_name, value, tags_slice); return true; } static void dd_commit_metrics() { - if (!metrics_buffer) { + if (!DDTRACE_G(metrics_buffer)) { return; } ddog_sidecar_telemetry_buffer_flush( - &ddtrace_sidecar, ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), metrics_buffer); - metrics_buffer = NULL; + &DDTRACE_G(sidecar), ddtrace_sidecar_instance_id, &DDTRACE_G(sidecar_queue_id), DDTRACE_G(metrics_buffer)); + DDTRACE_G(metrics_buffer) = NULL; } diff --git a/ext/telemetry.h b/ext/telemetry.h index 84a7f0c434..aa891ea32a 100644 --- a/ext/telemetry.h +++ b/ext/telemetry.h @@ -35,7 +35,15 @@ void ddtrace_telemetry_finalize(); void ddtrace_telemetry_lifecycle_end(void); void ddtrace_telemetry_register_services(ddog_SidecarTransport **sidecar); void ddtrace_telemetry_inc_spans_created(ddtrace_span_data *span); +// Called by the background sender thread (coms.c) to accumulate metrics atomically. +// Never touches the sidecar; the request thread flushes via ddtrace_telemetry_flush_bgs_metrics_if_due(). void ddtrace_telemetry_send_trace_api_metrics(trace_api_metrics metrics); +// Called from ddtrace_telemetry_finalize() to flush accumulated BGS metrics through +// the current thread's sidecar connection, at most once per flush interval. +void ddtrace_telemetry_flush_bgs_metrics_if_due(void); +// Force-flush accumulated BGS metrics regardless of the time gate. Call immediately +// before dropping the per-thread transport in GSHUTDOWN so no data is lost. +void ddtrace_telemetry_flush_bgs_metrics_final(void); // public API DDTRACE_PUBLIC void ddtrace_metric_register_buffer(zend_string *name, ddog_MetricType type, ddog_MetricNamespace ns);