Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ struct ProfilingScenario: Scenario {

RUMMonitor.shared().addAttribute(forKey: "scenario", value: "ContinuousProfiling")

Profiling.enable(with: .init(applicationLaunchSampleRate: .maxSampleRate, continuousSampleRate: .maxSampleRate))
Profiling.enable(
with: .init(
applicationLaunchSampleRate: .maxSampleRate,
continuousSampleRate: .maxSampleRate,
featureFlags: [.cpuTimeSamples: true]
)
)
}
}
6 changes: 5 additions & 1 deletion DatadogProfiling/Mach/dd_pprof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@
extern "C" {

dd_pprof_t* dd_pprof_create(uint64_t sampling_interval_ns) {
return dd_pprof_create_with_cpu_time(sampling_interval_ns, false);
}

dd_pprof_t* dd_pprof_create_with_cpu_time(uint64_t sampling_interval_ns, bool record_cpu_time) {
try {
auto* profiler = new dd::profiler::profile(sampling_interval_ns);
auto* profiler = new dd::profiler::profile(sampling_interval_ns, record_cpu_time);
return reinterpret_cast<dd_pprof_t*>(profiler);
} catch (...) {
return nullptr;
Expand Down
25 changes: 24 additions & 1 deletion DatadogProfiling/Mach/dd_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,23 @@ static double read_profiling_sample_rate() {
return sample_rate;
}

static bool read_profiling_record_cpu_time() {
CFStringRef suiteName = CFSTR(DD_PROFILING_USER_DEFAULTS_SUITE_NAME);
CFStringRef key = CFSTR(DD_PROFILING_RECORD_CPU_TIME_KEY);
CFPropertyListRef value = CFPreferencesCopyAppValue(key, suiteName);

bool result = false;

if (value) {
if (CFGetTypeID(value) == CFBooleanGetTypeID()) {
result = CFBooleanGetValue((CFBooleanRef)value);
}
CFRelease(value);
}

return result;
}

/**
* Deletes the DatadogProfiling defaults from the `UserDefaults`
* to be re-evaluated during `Profiling.enable()`.
Expand All @@ -163,9 +180,11 @@ void dd_delete_profiling_defaults() {
CFStringRef suiteName = CFSTR(DD_PROFILING_USER_DEFAULTS_SUITE_NAME);
CFStringRef isEnabledKey = CFSTR(DD_PROFILING_IS_ENABLED_KEY);
CFStringRef sampleRateKey = CFSTR(DD_PROFILING_APP_LAUNCH_SAMPLE_RATE_KEY);
CFStringRef recordCPUTimeKey = CFSTR(DD_PROFILING_RECORD_CPU_TIME_KEY);

CFPreferencesSetValue(isEnabledKey, NULL, suiteName, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
CFPreferencesSetValue(sampleRateKey, NULL, suiteName, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
CFPreferencesSetValue(recordCPUTimeKey, NULL, suiteName, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
CFPreferencesSynchronize(suiteName, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
}

Expand Down Expand Up @@ -353,7 +372,9 @@ class dd_profiler {

if (profiler) return true;

profile = new (std::nothrow) dd::profiler::profile(sampling_interval_ns);
record_cpu_time = read_profiling_record_cpu_time();

profile = new (std::nothrow) dd::profiler::profile(sampling_interval_ns, record_cpu_time);
if (!profile) {
status = DD_PROFILER_STATUS_ALLOCATION_FAILED;
return false;
Expand All @@ -362,6 +383,7 @@ class dd_profiler {

sampling_config_t config = SAMPLING_CONFIG_DEFAULT;
config.sampling_interval_nanos = sampling_interval_ns;
config.record_cpu_time = record_cpu_time ? 1 : 0;

profiler = new (std::nothrow) mach_sampling_profiler(&config, callback, this, hard_limit_bytes);
if (!profiler) {
Expand All @@ -383,6 +405,7 @@ class dd_profiler {
uint64_t hard_limit_bytes = DD_PROFILER_DEFAULT_HARD_LIMIT_BYTES;
uint64_t sampling_interval_ns = SAMPLING_CONFIG_DEFAULT_INTERVAL_NANOS;
int64_t server_time_offset_ns = 0;
bool record_cpu_time = false;

/**
* Mutex protecting the profile pointer.
Expand Down
9 changes: 9 additions & 0 deletions DatadogProfiling/Mach/include/dd_pprof.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ typedef struct profile dd_pprof_t;
*/
dd_pprof_t* dd_pprof_create(uint64_t sampling_interval_ns);

/**
* Create a new pprof profile aggregator with optional CPU-time sample values.
*
* @param sampling_interval_ns The sampling interval in nanoseconds
* @param record_cpu_time Whether samples should include CPU time as a second value
* @return Pointer to the created profile, or NULL on failure
*/
dd_pprof_t* dd_pprof_create_with_cpu_time(uint64_t sampling_interval_ns, bool record_cpu_time);

/**
* Destroy a pprof profile aggregator and free all associated memory
*
Expand Down
8 changes: 7 additions & 1 deletion DatadogProfiling/Mach/include/dd_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ typedef struct stack_trace {
uint64_t timestamp;
/** Actual sampling interval in nanoseconds for this sample */
uint64_t sampling_interval_nanos;
/** CPU time consumed by this thread since the previous sample */
uint64_t cpu_time_nanos;
/** The stack frames array */
stack_frame_t* frames;
/** Number of frames in the trace */
Expand All @@ -74,6 +76,8 @@ typedef struct sampling_config {
uint32_t max_thread_count; // default: 100
/** QoS class for the sampling thread */
qos_class_t qos_class;
/** Whether samples should include a CPU-time value */
uint8_t record_cpu_time;
} sampling_config_t;

/**
Expand All @@ -100,7 +104,8 @@ static const sampling_config_t SAMPLING_CONFIG_DEFAULT = {
SAMPLING_CONFIG_DEFAULT_BUFFER_SIZE, // max_buffer_size
SAMPLING_CONFIG_DEFAULT_STACK_DEPTH, // max_stack_depth
SAMPLING_CONFIG_DEFAULT_THREAD_COUNT, // max_thread_count
QOS_CLASS_USER_INTERACTIVE // qos_class
QOS_CLASS_USER_INTERACTIVE, // qos_class
0 // record_cpu_time
};

/**
Expand All @@ -122,6 +127,7 @@ typedef void (*stack_trace_callback_t)(stack_trace_t* traces, size_t count, void
#define DD_PROFILING_USER_DEFAULTS_SUITE_NAME "com.datadoghq.ios-sdk.profiling"
#define DD_PROFILING_IS_ENABLED_KEY "is_profiling_enabled"
#define DD_PROFILING_APP_LAUNCH_SAMPLE_RATE_KEY "profiling_app_launch_sample_rate"
#define DD_PROFILING_RECORD_CPU_TIME_KEY "profiling_record_cpu_time"

#ifdef __cplusplus

Expand Down
14 changes: 13 additions & 1 deletion DatadogProfiling/Mach/include/mach_sampling_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <memory>
#include <mutex>
#include <pthread.h>
#include <unordered_map>
#include <vector>

#ifdef __cplusplus
Expand Down Expand Up @@ -165,13 +166,23 @@ class mach_sampling_profiler {
* @param thread The thread to sample
* @param interval_nanos The actual sampling interval in nanoseconds for this sample
*/
void sample_thread(thread_t thread, uint64_t interval_nanos);
void sample_thread(thread_t thread, uint64_t interval_nanos, uint64_t cpu_time_nanos);

/**
* @brief Returns true when the thread is owned by the profiler itself.
*/
bool is_profiler_internal_thread(thread_t thread) const;

/**
* @brief Returns CPU time consumed since the previous observation for this thread.
*/
uint64_t thread_cpu_time_delta_nanos(thread_t thread);

/**
* @brief Removes CPU-time state for threads no longer present in the task.
*/
void prune_thread_cpu_time_state(const thread_t* threads, mach_msg_type_number_t count);

private:
/**
* @brief Static entry point for the sampling thread
Expand All @@ -184,6 +195,7 @@ class mach_sampling_profiler {
std::mutex state_mutex;
/// Indicates whether `sampling_thread` currently refers to a live session thread.
std::atomic<bool> has_sampling_thread{false};
std::unordered_map<thread_t, uint64_t> previous_thread_cpu_time_nanos;
};

} // namespace dd::profiler
Expand Down
14 changes: 13 additions & 1 deletion DatadogProfiling/Mach/include/profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class profile {
* @brief Construct a new profile aggregator
* @param sampling_interval_ns Sampling interval in nanoseconds
*/
explicit profile(uint64_t sampling_interval_ns);
explicit profile(uint64_t sampling_interval_ns, bool record_cpu_time = false);
~profile() = default;

profile(const profile&) = delete;
Expand Down Expand Up @@ -189,10 +189,16 @@ class profile {

/** @brief Get cached string ID for "wall-time" */
uint32_t wall_time_str_id() const { return _wall_time_str_id; }

/** @brief Get cached string ID for "cpu-time" */
uint32_t cpu_time_str_id() const { return _cpu_time_str_id; }

/** @brief Get cached string ID for "nanoseconds" */
uint32_t nanoseconds_str_id() const { return _nanoseconds_str_id; }

/** @brief Whether samples include a CPU-time value in addition to wall-time */
bool cpu_time_enabled() const { return _record_cpu_time; }

/** @brief Number of labels exported for the sample */
size_t label_count(const sample_t& sample) const { return sample.labels.size() + 1; }

Expand Down Expand Up @@ -238,12 +244,18 @@ class profile {

/** @brief Profile sampling interval in nanoseconds */
uint64_t _sampling_interval_ns;

/** @brief Whether samples include CPU time as a second value */
bool _record_cpu_time;

/** @brief Cached string ID for empty string */
uint32_t _empty_str_id;

/** @brief Cached string ID for "wall-time" */
uint32_t _wall_time_str_id;

/** @brief Cached string ID for "cpu-time" */
uint32_t _cpu_time_str_id;

/** @brief Cached string ID for "nanoseconds" */
uint32_t _nanoseconds_str_id;
Expand Down
80 changes: 77 additions & 3 deletions DatadogProfiling/Mach/mach_sampling_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <setjmp.h>
#include <mach/thread_act.h>
#include <mach/thread_status.h>
#include <mach/thread_info.h>
#include <mach/machine/thread_state.h>
#include <new>
#include <utility>
Expand Down Expand Up @@ -160,11 +161,32 @@ bool stack_trace_init(stack_trace_t* trace, uint32_t max_depth, uint64_t interva
trace->thread_name = nullptr;
trace->timestamp = 0;
trace->sampling_interval_nanos = interval_nanos;
trace->cpu_time_nanos = 0;
trace->frame_count = 0;
trace->frames = (stack_frame_t*)malloc(max_depth * sizeof(stack_frame_t));
return trace->frames != nullptr;
}

static bool thread_cpu_time_nanos(thread_t thread, uint64_t* cpu_time_nanos) {
if (!cpu_time_nanos) return false;

thread_basic_info_data_t info{};
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
if (thread_info(thread, THREAD_BASIC_INFO, reinterpret_cast<thread_info_t>(&info), &count) != KERN_SUCCESS) {
return false;
}

const uint64_t user_time_nanos =
(static_cast<uint64_t>(info.user_time.seconds) * 1000000000ULL)
+ (static_cast<uint64_t>(info.user_time.microseconds) * 1000ULL);
const uint64_t system_time_nanos =
(static_cast<uint64_t>(info.system_time.seconds) * 1000000000ULL)
+ (static_cast<uint64_t>(info.system_time.microseconds) * 1000ULL);

*cpu_time_nanos = user_time_nanos + system_time_nanos;
return true;
}

/**
* Destroys a stack trace, freeing the thread name and frames array.
*
Expand Down Expand Up @@ -396,6 +418,7 @@ bool mach_sampling_profiler::start_sampling() {

// Clear any leftover data from previous runs
sample_buffer.clear();
previous_thread_cpu_time_nanos.clear();
if (sample_buffer.capacity() < config.max_buffer_size) {
sample_buffer.reserve(config.max_buffer_size);
}
Expand Down Expand Up @@ -491,9 +514,10 @@ bool mach_sampling_profiler::is_profiler_internal_thread(thread_t thread) const
* @param thread The thread to sample
* @param interval_nanos The actual sampling interval in nanoseconds for this sample
*/
void mach_sampling_profiler::sample_thread(thread_t thread, uint64_t interval_nanos) {
void mach_sampling_profiler::sample_thread(thread_t thread, uint64_t interval_nanos, uint64_t cpu_time_nanos) {
stack_trace_t trace;
if (!stack_trace_init(&trace, config.max_stack_depth, interval_nanos)) return;
trace.cpu_time_nanos = cpu_time_nanos;

// Get thread info
stack_trace_get_thread_info(&trace, thread);
Expand All @@ -519,6 +543,53 @@ void mach_sampling_profiler::sample_thread(thread_t thread, uint64_t interval_na
}
}

uint64_t mach_sampling_profiler::thread_cpu_time_delta_nanos(thread_t thread) {
if (!config.record_cpu_time) {
return 0;
}

uint64_t current_cpu_time_nanos = 0;
if (!thread_cpu_time_nanos(thread, &current_cpu_time_nanos)) {
return 0;
}

auto result = previous_thread_cpu_time_nanos.emplace(thread, current_cpu_time_nanos);
if (result.second) {
return 0;
}

const uint64_t previous_cpu_time_nanos = result.first->second;
result.first->second = current_cpu_time_nanos;

if (current_cpu_time_nanos < previous_cpu_time_nanos) {
return 0;
}

return current_cpu_time_nanos - previous_cpu_time_nanos;
}

void mach_sampling_profiler::prune_thread_cpu_time_state(const thread_t* threads, mach_msg_type_number_t count) {
if (!config.record_cpu_time || previous_thread_cpu_time_nanos.empty()) {
return;
}

for (auto it = previous_thread_cpu_time_nanos.begin(); it != previous_thread_cpu_time_nanos.end();) {
bool is_live_thread = false;
for (mach_msg_type_number_t i = 0; i < count; i++) {
if (threads[i] == it->first) {
is_live_thread = true;
break;
}
}

if (is_live_thread) {
++it;
} else {
it = previous_thread_cpu_time_nanos.erase(it);
}
}
}

/**
* Main sampling loop that collects stack traces from threads.
*/
Expand All @@ -535,7 +606,8 @@ void mach_sampling_profiler::main() {
}

if (config.profile_current_thread_only) {
sample_thread(pthread_mach_thread_np(target_thread), interval_nanos);
const thread_t thread = pthread_mach_thread_np(target_thread);
sample_thread(thread, interval_nanos, thread_cpu_time_delta_nanos(thread));
if (sample_buffer.size() >= config.max_buffer_size) {
worker->enqueue_active_buffer(sample_buffer);
}
Expand All @@ -557,13 +629,15 @@ void mach_sampling_profiler::main() {
// Skip profiler-owned threads to avoid self-noise in customer profiles.
if (is_profiler_internal_thread(threads[i])) continue;

sample_thread(threads[i], interval_nanos);
sample_thread(threads[i], interval_nanos, thread_cpu_time_delta_nanos(threads[i]));

if (sample_buffer.size() >= config.max_buffer_size) {
worker->enqueue_active_buffer(sample_buffer);
}
}

prune_thread_cpu_time_state(threads, count);

// Clean up thread references
for (mach_msg_type_number_t i = 0; i < count; i++) {
mach_port_deallocate(mach_task_self(), threads[i]);
Expand Down
7 changes: 6 additions & 1 deletion DatadogProfiling/Mach/profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ std::string uuid_string(const uuid_t uuid) {
*
* @param sampling_interval_ns Sampling interval in nanoseconds
*/
profile::profile(uint64_t sampling_interval_ns)
profile::profile(uint64_t sampling_interval_ns, bool record_cpu_time)
: _sampling_interval_ns(sampling_interval_ns)
, _record_cpu_time(record_cpu_time)
, _epoch_offset(uptime_epoch_offset())
, _server_time_offset_ns(0)
, _start_timestamp(0)
Expand All @@ -95,6 +96,7 @@ profile::profile(uint64_t sampling_interval_ns)
// Pre-intern common strings for performance
_empty_str_id = intern_string("");
_wall_time_str_id = intern_string("wall-time");
_cpu_time_str_id = _record_cpu_time ? intern_string("cpu-time") : 0;
_nanoseconds_str_id = intern_string("nanoseconds");
_end_timestamp_ns_str_id = intern_string("end_timestamp_ns");
_thread_id_str_id = intern_string("thread id");
Expand Down Expand Up @@ -176,6 +178,9 @@ void profile::add_samples(const stack_trace_t* traces, size_t count, binary_imag
sample.timestamp_uptime_ns = trace.timestamp;
sample.labels = std::move(labels);
sample.values = {static_cast<int64_t>(trace.sampling_interval_nanos)};
if (_record_cpu_time) {
sample.values.push_back(static_cast<int64_t>(trace.cpu_time_nanos));
}

_samples.push_back(std::move(sample));

Expand Down
Loading