Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public void GetThreadTebTest()
IAbstractRuntime runtimeService = runtime.GetService<IAbstractRuntime>();
Assert.NotNull(runtimeService);

ClrThreadInfo[] threads = runtimeService.EnumerateThreads().ToArray();
ClrThreadInfo[] threads = runtimeService.EnumerateThreads(int.MaxValue).ToArray();

foreach (ClrThread thread in runtime.Threads)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ namespace Microsoft.Diagnostics.Runtime.AbstractDac
/// </summary>
public interface IAbstractRuntime
{
IEnumerable<ClrThreadInfo> EnumerateThreads();
IEnumerable<AppDomainInfo> EnumerateAppDomains();
IEnumerable<ulong> GetModuleList(ulong appDomain);
IEnumerable<ClrThreadInfo> EnumerateThreads(int maxCount);
IEnumerable<AppDomainInfo> EnumerateAppDomains(int maxCount);
IEnumerable<ulong> GetModuleList(ulong appDomain, int maxCount);
IEnumerable<ClrHandleInfo> EnumerateHandles();
IEnumerable<JitManagerInfo> EnumerateClrJitManagers();
string? GetJitHelperFunctionName(ulong address);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
Expand Down Expand Up @@ -26,15 +26,16 @@ public interface IAbstractThreadHelpers

/// <summary>
/// Enumerates the stack trace of this method. Note that in the event of bugs or corrupted
/// state, this can occasionally produce bad data and run "forever", so be sure to break out of
/// the loop when a threshold is reached when calling it.
/// state, this can occasionally produce bad data and run "forever", so <paramref name="maxFrames" />
/// is enforced inside the DAC stack walk.
/// </summary>
/// <param name="osThreadId">The thread to enumerate.</param>
/// <param name="includeContext">Whether to calculate and include the thread's CONTEXT record or not.
/// Registers are always in the Windows CONTEXT format, as that's what the OS uses.</param>
/// <param name="maxFrames">The maximum number of stack frames to walk.</param>
/// <param name="traceErrors">Whether or not to Trace any errors encountered.</param>
/// <returns>An enumeration of stack frames.</returns>
IEnumerable<StackFrameInfo> EnumerateStackTrace(uint osThreadId, bool includeContext, bool traceErrors);
IEnumerable<StackFrameInfo> EnumerateStackTrace(uint osThreadId, bool includeContext, int maxFrames, bool traceErrors);
}

/// <summary>
Expand Down
62 changes: 46 additions & 16 deletions src/Microsoft.Diagnostics.Runtime/ClrHeap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public sealed class ClrHeap : IClrHeap
private volatile SyncBlockContainer? _syncBlocks;
private volatile ClrSegment? _currSegment;
private volatile ArrayPool<ObjectCorruption>? _objectCorruptionPool;
private readonly object _heapCacheLock = new();
private ulong _lastComFlags;

internal ClrHeap(ClrRuntime runtime, IMemoryReader memoryReader, IAbstractHeap helpers, IAbstractTypeHelpers typeHelpers)
Expand Down Expand Up @@ -796,13 +797,23 @@ public IEnumerable<MemoryRange> EnumerateAllocationContexts()
private SyncBlockContainer GetSyncBlocks()
{
SyncBlockContainer? container = _syncBlocks;
if (container is null)
if (container is not null)
return container;

// Serialize cache population so two concurrent DAC EnumerateSyncBlocks
// calls don't each build a SyncBlockContainer (with one's partial
// results getting permanently published).
lock (_heapCacheLock)
{
container = new SyncBlockContainer(Helpers.EnumerateSyncBlocks());
Interlocked.CompareExchange(ref _syncBlocks, container, null);
}
container = _syncBlocks;
if (container is null)
{
container = new SyncBlockContainer(Helpers.EnumerateSyncBlocks());
_syncBlocks = container;
}

return container;
return container;
}
}

internal ClrThinLock? GetThinlock(ulong address)
Expand Down Expand Up @@ -1165,16 +1176,26 @@ private Dictionary<ulong, ulong> GetAllocationContexts()
if (result is not null)
return result;

result = new();
foreach (MemoryRange allocContext in Helpers.EnumerateThreadAllocationContexts())
result[allocContext.Start] = allocContext.End;
// Serialize cache population so the DAC's per-runtime
// EnumerateThreadAllocationContexts call doesn't race with itself
// and produce two divergent dictionaries (the winner's published).
lock (_heapCacheLock)
{
result = _allocationContexts;
if (result is not null)
return result;

foreach (ClrSubHeap subHeap in SubHeaps)
if (subHeap.AllocationContext.Start < subHeap.AllocationContext.End)
result[subHeap.AllocationContext.Start] = subHeap.AllocationContext.End;
result = new();
foreach (MemoryRange allocContext in Helpers.EnumerateThreadAllocationContexts())
result[allocContext.Start] = allocContext.End;

Interlocked.CompareExchange(ref _allocationContexts, result, null);
return result;
foreach (ClrSubHeap subHeap in SubHeaps)
if (subHeap.AllocationContext.Start < subHeap.AllocationContext.End)
result[subHeap.AllocationContext.Start] = subHeap.AllocationContext.End;

_allocationContexts = result;
return result;
}
}

private (ulong Source, ulong Target)[] GetDependentHandles()
Expand All @@ -1183,10 +1204,19 @@ private Dictionary<ulong, ulong> GetAllocationContexts()
if (handles is not null)
return handles;

handles = Helpers.EnumerateDependentHandles().OrderBy(r => r.Source).ToArray();
// Serialize cache population: concurrent DAC handle enumerations can return
// partial results, and the previous CompareExchange pattern would publish
// the first-to-finish (possibly truncated) array as the cached value.
lock (_heapCacheLock)
{
handles = _dependentHandles;
if (handles is not null)
return handles;

Interlocked.CompareExchange(ref _dependentHandles, handles, null);
return handles;
handles = Helpers.EnumerateDependentHandles().OrderBy(r => r.Source).ToArray();
_dependentHandles = handles;
return handles;
}
}

private IEnumerable<ClrRoot> EnumerateFinalizers(IEnumerable<MemoryRange> memoryRanges)
Expand Down
141 changes: 105 additions & 36 deletions src/Microsoft.Diagnostics.Runtime/ClrRuntime.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
Expand All @@ -25,7 +25,13 @@ public sealed class ClrRuntime : IClrRuntime
private readonly IServiceProvider _services;
private volatile ClrHeap? _heap;
private ImmutableArray<ClrThread> _threads;
// ImmutableArray<T> is a struct and cannot be marked volatile, so the lock-free
// double-checked reads of _threads/_handles below use these volatile flags as the
// acquire/release barrier that publishes the array contents safely.
private volatile bool _threadsPublished;
private volatile DomainAndModules? _domainAndModules;
private ImmutableArray<ClrHandle> _handles;
private volatile bool _handlesPublished;

private IAbstractRuntime? _runtime;
private IAbstractComHelpers? _comHelpers;
Expand All @@ -36,6 +42,7 @@ public sealed class ClrRuntime : IClrRuntime
private string? _stressLogFailureReason;
private bool _stressLogProbed;
private readonly object _stressLogGate = new();
private readonly object _runtimeCacheLock = new();

internal ClrRuntime(ClrInfo clrInfo, IServiceProvider services)
{
Expand All @@ -59,9 +66,20 @@ public void FlushCachedData()
throw new NotSupportedException($"This version of CLR debugging does not support flushing the runtime.");

_controller.Flush();
_domainAndModules = null;
_threads = default;
_heap = null;

// Reset the managed caches under the same lock their builders publish under, so a
// builder that is mid-population cannot republish pre-flush data after the reset.
lock (_runtimeCacheLock)
{
_domainAndModules = null;
// Clear the published flag (volatile) before resetting the array so a racing
// reader either sees the old published array or falls into the lock to rebuild.
_threadsPublished = false;
_threads = default;
_handlesPublished = false;
_handles = default;
_heap = null;
}

lock (_stressLogGate)
{
Expand Down Expand Up @@ -205,25 +223,33 @@ public ImmutableArray<ClrThread> Threads
{
get
{
if (!_threads.IsDefault)
if (_threadsPublished)
return _threads;

IAbstractThreadHelpers? threadHelpers = GetService<IAbstractThreadHelpers>();
ImmutableArray<ClrThread>.Builder builder = ImmutableArray.CreateBuilder<ClrThread>();

int max = DataTarget.Options.Limits.MaxThreads;
foreach (ClrThreadInfo data in GetDacRuntime().EnumerateThreads())
// Serialize cache population: concurrent DAC EnumerateThreads calls can
// truncate each other and the CAS winner's partial result would become
// the permanent cached value.
lock (_runtimeCacheLock)
{
builder.Add(new ClrThread(DataTarget.DataReader, this, threadHelpers, data));
if (_threadsPublished)
return _threads;

if (max-- == 0)
break;
}
IAbstractThreadHelpers? threadHelpers = GetService<IAbstractThreadHelpers>();
ImmutableArray<ClrThread>.Builder builder = ImmutableArray.CreateBuilder<ClrThread>();

ImmutableArray<ClrThread> threads = builder.MoveOrCopyToImmutable();
ImmutableInterlocked.InterlockedCompareExchange(ref _threads, threads, _threads);
int max = DataTarget.Options.Limits.MaxThreads;
foreach (ClrThreadInfo data in GetDacRuntime().EnumerateThreads(max))
{
if (builder.Count >= max)
break;

return _threads;
builder.Add(new ClrThread(DataTarget.DataReader, this, threadHelpers, data));
}

_threads = builder.MoveOrCopyToImmutable();
_threadsPublished = true;
return _threads;
}
}
}

Expand Down Expand Up @@ -275,7 +301,29 @@ public ImmutableArray<ClrThread> Threads
/// depending on the state of the process when we attempt to walk the handle table.
/// </summary>
/// <returns>An enumeration of GC handles in the process.</returns>
public IEnumerable<ClrHandle> EnumerateHandles() => GetDacRuntime().EnumerateHandles().Select(r => new ClrHandle(this, r));
public IEnumerable<ClrHandle> EnumerateHandles()
{
if (_handlesPublished)
return _handles;

// Serialize cache population: SOSDac.EnumerateHandles is a stateful DAC
// enumerator and concurrent calls on the same SOSDac can return truncated
// (or wildly wrong) results, manifesting as off-by-one or order-of-magnitude
// root-count mismatches during parallel EnumerateRoots() calls.
lock (_runtimeCacheLock)
{
if (_handlesPublished)
return _handles;

ImmutableArray<ClrHandle>.Builder builder = ImmutableArray.CreateBuilder<ClrHandle>();
foreach (ClrHandleInfo info in GetDacRuntime().EnumerateHandles())
builder.Add(new ClrHandle(this, info));

_handles = builder.MoveOrCopyToImmutable();

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't previously cached - is caching it intentional?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, walking the handle table is expensive so I wanted to cache it. But this did remind me that there's a correctness issue with Flush. Using a volatile publish will fix this. It's ugly, but it's fast. I wish I hadn't chosen to use ImmutableArray so long ago.

_handlesPublished = true;
return _handles;
}
}

/// <summary>
/// Gets the GC heap of the process.
Expand All @@ -285,21 +333,31 @@ public ClrHeap Heap
get
{
ClrHeap? heap = _heap;
while (heap is null) // Flush can cause a race.
if (heap is not null)
return heap;

// Serialize cache population so we never construct two ClrHeap
// instances for the same runtime (each carries its own internal
// caches; readers obtaining different heaps would observe
// divergent state).
lock (_runtimeCacheLock)
{
IAbstractHeap? heapHelpers = GetService<IAbstractHeap>();
IAbstractTypeHelpers? typeHelpers = GetService<IAbstractTypeHelpers>();
heap = _heap;
if (heap is null)
{
IAbstractHeap? heapHelpers = GetService<IAbstractHeap>();
IAbstractTypeHelpers? typeHelpers = GetService<IAbstractTypeHelpers>();

// These are defined as non-nullable but just in case, double check we have a non-null instance.
if (heapHelpers is null || typeHelpers is null)
throw new InvalidDataException("Unable to create a ClrHeap for this runtime. This may indicate that the CLR wasn't fully initialized at the time the dump was taken, or the dump is corrupt or truncated.");
// These are defined as non-nullable but just in case, double check we have a non-null instance.
if (heapHelpers is null || typeHelpers is null)
throw new InvalidDataException("Unable to create a ClrHeap for this runtime. This may indicate that the CLR wasn't fully initialized at the time the dump was taken, or the dump is corrupt or truncated.");

heap = new(this, DataTarget.DataReader, heapHelpers, typeHelpers);
Interlocked.CompareExchange(ref _heap, heap, null);
heap = _heap;
}
heap = new(this, DataTarget.DataReader, heapHelpers, typeHelpers);
_heap = heap;
}

return heap;
return heap;
}
}
}

Expand Down Expand Up @@ -486,13 +544,24 @@ public void Dispose()
private DomainAndModules GetAppDomainData()
{
DomainAndModules? data = _domainAndModules;
if (data is null)
if (data is not null)
return data;

// Serialize cache population: InitAppDomainData performs heavy DAC
// enumeration of AppDomains and Modules. Concurrent callers would
// race on the underlying DAC (truncating results) AND the previous
// unguarded `_domainAndModules = data` write left the "winner" as
// whichever thread happened to assign last.
lock (_runtimeCacheLock)
{
data = InitAppDomainData();
_domainAndModules = data;
data = _domainAndModules;
if (data is null)
{
data = InitAppDomainData();
_domainAndModules = data;
}
return data;
}

return data;
}

private DomainAndModules InitAppDomainData()
Expand All @@ -506,7 +575,7 @@ private DomainAndModules InitAppDomainData()

int domainCount = 0;
ImmutableArray<ClrAppDomain>.Builder builder = ImmutableArray.CreateBuilder<ClrAppDomain>();
foreach (AppDomainInfo domainInfo in GetDacRuntime().EnumerateAppDomains())
foreach (AppDomainInfo domainInfo in GetDacRuntime().EnumerateAppDomains(limits.MaxAppDomains))
{
if (domainCount++ >= limits.MaxAppDomains)
break;
Expand Down Expand Up @@ -534,7 +603,7 @@ private DomainAndModules InitAppDomainData()
IAbstractModuleHelpers moduleHelpers = GetServiceOrThrow<IAbstractModuleHelpers>();
IAbstractClrNativeHeaps? nativeHeaps = GetService<IAbstractClrNativeHeaps>();
ImmutableArray<ClrModule>.Builder moduleBuilder = ImmutableArray.CreateBuilder<ClrModule>();
foreach (ulong moduleAddress in GetDacRuntime().GetModuleList(domain.Address))
foreach (ulong moduleAddress in GetDacRuntime().GetModuleList(domain.Address, limits.MaxModules - modules.Count))
{
if (modules.Count >= limits.MaxModules)
break;
Expand Down
Loading