Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions build/RunTestsOnHelix.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ set DOTNET_ROOT=%HELIX_CORRELATION_PAYLOAD%\d
set PATH=%DOTNET_ROOT%;%PATH%
set TestFullMSBuild=%1

REM Set DOTNET_HOST_PATH so MSBuild task hosts can locate the dotnet executable.
REM Without this, tasks from NuGet packages that use TaskHostFactory fail with MSB4216.
set DOTNET_HOST_PATH=%DOTNET_ROOT%\dotnet.exe

REM Ensure Visual Studio instances allow preview SDKs
PowerShell -ExecutionPolicy ByPass -NoProfile -File "%HELIX_CORRELATION_PAYLOAD%\t\eng\enable-preview-sdks.ps1"

Expand All @@ -35,14 +39,16 @@ dotnet new --debug:ephemeral-hive
dotnet nuget list source --configfile %TestExecutionDirectory%\nuget.config
if exist %TestExecutionDirectory%\Testpackages dotnet nuget add source %TestExecutionDirectory%\Testpackages --name testpackages --configfile %TestExecutionDirectory%\nuget.config

dotnet nuget remove source dotnet6-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet6-internal-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet7-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet7-internal-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source richnav --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source vs-impl --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet-libraries-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet-tools-transport --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet-libraries --configfile %TestExecutionDirectory%\nuget.config
dotnet nuget remove source dotnet-eng --configfile %TestExecutionDirectory%\nuget.config
REM Remove feeds not needed for tests. Errors from non-existent sources
REM (e.g. internal-transport feeds only present in internal builds) are ignored.
dotnet nuget remove source dotnet6-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet6-internal-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet7-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet7-internal-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source richnav --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source vs-impl --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet-libraries-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet-tools-transport --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet-libraries --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget remove source dotnet-eng --configfile %TestExecutionDirectory%\nuget.config 2>nul
dotnet nuget list source --configfile %TestExecutionDirectory%\nuget.config
30 changes: 19 additions & 11 deletions build/RunTestsOnHelix.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ export MicrosoftNETBuildExtensionsTargets=$HELIX_CORRELATION_PAYLOAD/ex/msbuildE
export DOTNET_ROOT=$HELIX_CORRELATION_PAYLOAD/d
export PATH=$DOTNET_ROOT:$PATH

# Set DOTNET_HOST_PATH so MSBuild task hosts can locate the dotnet executable.
# Without this, tasks from NuGet packages that use TaskHostFactory (e.g. ComputeWasmBuildAssets
# from WebAssembly SDK, ComputeManagedAssemblies from ILLink) fail with MSB4216 on macOS
# because the task host process cannot find the dotnet host to launch.
export DOTNET_HOST_PATH=$DOTNET_ROOT/dotnet

export TestExecutionDirectory=$(realpath "$(mktemp -d "${TMPDIR:-/tmp}"/dotnetSdkTests.XXXXXXXX)")
export DOTNET_CLI_HOME=$TestExecutionDirectory/.dotnet
cp -a $HELIX_CORRELATION_PAYLOAD/t/TestExecutionDirectoryFiles/. $TestExecutionDirectory/
Expand All @@ -22,15 +28,17 @@ dotnet new --debug:ephemeral-hive

dotnet nuget list source --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget add source $TestExecutionDirectory/Testpackages --configfile $TestExecutionDirectory/NuGet.config
#Remove feeds not needed for tests
dotnet nuget remove source dotnet6-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet6-internal-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet7-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet7-internal-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source richnav --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source vs-impl --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet-libraries-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet-tools-transport --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet-libraries --configfile $TestExecutionDirectory/NuGet.config
dotnet nuget remove source dotnet-eng --configfile $TestExecutionDirectory/NuGet.config
# Remove feeds not needed for tests. Use || true to avoid errors when a source
# doesn't exist (e.g. internal-transport feeds are only present in internal builds).
dotnet nuget remove source dotnet6-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet6-internal-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet7-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet7-internal-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source richnav --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source vs-impl --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet-libraries-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet-tools-transport --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet-libraries --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget remove source dotnet-eng --configfile $TestExecutionDirectory/NuGet.config || true
dotnet nuget list source --configfile $TestExecutionDirectory/NuGet.config

37 changes: 27 additions & 10 deletions src/BlazorWasmSdk/Tasks/GZipCompress.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ public class GZipCompress : Task
[Required]
public string OutputDirectory { get; set; }

// Retry count for transient file I/O errors (e.g., antivirus locks on CI machines).
private const int MaxRetries = 3;
private const int RetryDelayMs = 200;

public override bool Execute()
{
CompressedFiles = new ITaskItem[FilesToCompress.Length];
Expand Down Expand Up @@ -56,18 +60,31 @@ public override bool Execute()
Log.LogMessage(MessageImportance.Low, "Compressing '{0}' because file is newer than '{1}'.", inputFullPath, outputRelativePath);
}

try
// Retry on IOException to handle transient file locks from antivirus, file
// indexing, or parallel MSBuild nodes on CI machines (see dotnet/sdk#53424).
for (int attempt = 1; attempt <= MaxRetries; attempt++)
{
using var sourceStream = File.OpenRead(file.ItemSpec);
using var fileStream = File.Create(outputRelativePath);
using var stream = new GZipStream(fileStream, CompressionLevel.Optimal);
try
{
using var sourceStream = File.OpenRead(file.ItemSpec);
using var fileStream = File.Create(outputRelativePath);
using var stream = new GZipStream(fileStream, CompressionLevel.Optimal);

sourceStream.CopyTo(stream);
}
catch (Exception e)
{
Log.LogErrorFromException(e);
return;
sourceStream.CopyTo(stream);
return; // Success
}
catch (IOException) when (attempt < MaxRetries)
{
Log.LogMessage(MessageImportance.Low,
"Retrying compression of '{0}' (attempt {1}/{2}) due to transient I/O error.",
file.ItemSpec, attempt, MaxRetries);
Thread.Sleep(RetryDelayMs * attempt);
}
catch (Exception e)
{
Log.LogErrorFromException(e);
return;
}
}
});

Expand Down
15 changes: 14 additions & 1 deletion src/Dotnet.Watch/Watch/Aspire/AspireServiceFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ public async ValueTask DisposeAsync()
_isDisposed = true;

// wait for all in-flight process initialization to complete:
// If no session initialization is in-flight (_pendingSessionInitializationCount == 0),
// the semaphore will never be released by StartProjectAsync's finally block.
// Release it here to prevent a deadlock. Protect against the race where
// StartProjectAsync's finally block releases concurrently.
if (Volatile.Read(ref _pendingSessionInitializationCount) == 0)
{
try { _postDisposalSessionInitializationCompleted.Release(); }
catch (SemaphoreFullException) { }
}

await _postDisposalSessionInitializationCompleted.WaitAsync(CancellationToken.None);

// terminate all active sessions:
Expand Down Expand Up @@ -174,7 +184,10 @@ public async ValueTask StartProjectAsync(string dcpId, string sessionId, Project
{
if (Interlocked.Decrement(ref _pendingSessionInitializationCount) == 0 && _isDisposed)
{
_postDisposalSessionInitializationCompleted.Release();
// Guard against double-release: DisposeAsync may have already released
// the semaphore if it observed count==0 before we decremented.
try { _postDisposalSessionInitializationCompleted.Release(); }
catch (SemaphoreFullException) { }
}
}

Expand Down
38 changes: 34 additions & 4 deletions test/Microsoft.DotNet.HotReload.Test.Utilities/AwaitableProcess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@ namespace Microsoft.DotNet.Watch.UnitTests
{
internal sealed class AwaitableProcess : IAsyncDisposable
{
// cancel just before we hit timeout used on CI (XUnitWorkItemTimeout value in sdk\test\UnitTests.proj)
// Maximum time to wait for a single line of output from the process.
// On CI (Helix), cap at 5 minutes. The HELIX_WORK_ITEM_TIMEOUT is the total budget
// for ALL tests in the work item (~2h), which is far too long for a single
// wait-for-output operation. If a process produces no output for 5 minutes,
// it's deadlocked (e.g., dotnet-watch shutdown race in AspireServiceFactory).
// Capping here turns a 2-hour partition-blocking hang into a 5-minute clean failure.
private static readonly TimeSpan s_maxPerOperationTimeout = TimeSpan.FromMinutes(5);

private static readonly TimeSpan s_timeout = Environment.GetEnvironmentVariable("HELIX_WORK_ITEM_TIMEOUT") is { } value
? TimeSpan.Parse(value).Subtract(TimeSpan.FromSeconds(10)) : TimeSpan.FromMinutes(10);
? Min(TimeSpan.Parse(value).Subtract(TimeSpan.FromSeconds(10)), s_maxPerOperationTimeout)
: TimeSpan.FromMinutes(10);

private static TimeSpan Min(TimeSpan a, TimeSpan b) => a < b ? a : b;

private readonly List<string> _lines = [];

Expand Down Expand Up @@ -226,6 +236,17 @@ public async ValueTask DisposeAsync()
{
}

// Close stdin before killing. This unblocks PhysicalConsole.ListenToStandardInputAsync()
// which reads from stdin with CancellationToken.None and no timeout.
// Without this, the stdin reader can keep the process alive after Kill() on some platforms.
try
{
Process.StandardInput.Close();
}
catch
{
}

try
{
Process.Kill(entireProcessTree: true);
Expand All @@ -234,8 +255,17 @@ public async ValueTask DisposeAsync()
{
}

// ensure process has exited
await _processExitAwaiter;
// Wait for process exit with a timeout to prevent hanging the test if Kill() fails.
// The WaitForProcessExitAsync loop checks HasExited every 1 second, so 30s is generous.
using var exitTimeout = new CancellationTokenSource(TimeSpan.FromSeconds(30));
try
{
await _processExitAwaiter.WaitAsync(exitTimeout.Token);
}
catch (OperationCanceledException)
{
Logger.Log($"Process {Id} did not exit within 30 seconds after Kill()");
}

Process.Dispose();

Expand Down
14 changes: 9 additions & 5 deletions test/Microsoft.DotNet.HotReload.Test.Utilities/WatchableApp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,16 @@ public ProcessStartInfo GetProcessStartInfo(string workingDirectory, string test
info.Environment.Add("Microsoft_CodeAnalysis_EditAndContinue_LogDir", testOutputPath);
info.Environment.Add("DOTNET_CLI_CONTEXT_VERBOSE", "trace");

// suppress all timeouts:
info.Environment.Add("DCP_IDE_REQUEST_TIMEOUT_SECONDS", "100000");
info.Environment.Add("DCP_IDE_NOTIFICATION_TIMEOUT_SECONDS", "100000");
info.Environment.Add("DCP_IDE_NOTIFICATION_KEEPALIVE_SECONDS", "100000");
// Use generous but bounded timeouts for DCP operations in CI.
// Previous values of 100,000 seconds (~27 hours) effectively disabled timeouts,
// causing tests to hang for the full Helix work item duration (~2 hours) when
// a DCP operation deadlocked. 300 seconds (5 minutes) per operation is generous
// for slow CI machines while ensuring natural failure recovery.
info.Environment.Add("DCP_IDE_REQUEST_TIMEOUT_SECONDS", "300");
info.Environment.Add("DCP_IDE_NOTIFICATION_TIMEOUT_SECONDS", "300");
info.Environment.Add("DCP_IDE_NOTIFICATION_KEEPALIVE_SECONDS", "300");
info.Environment.Add("ASPIRE_ALLOW_UNSECURED_TRANSPORT", "1");
info.Environment.Add("ASPIRE_WATCH_PIPE_CONNECTION_TIMEOUT_SECONDS", "100000");
info.Environment.Add("ASPIRE_WATCH_PIPE_CONNECTION_TIMEOUT_SECONDS", "300");

// override defaults:
foreach (var (name, value) in EnvironmentVariables)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ public async Task Dispatcher_ProcessSimultaneousConnections_HitsKeepAliveTimeout
return connectionTask;
}

readySource.SetResult(true);
readySource.TrySetResult(true);
return new TaskCompletionSource<Connection>().Task;
});

Expand All @@ -382,11 +382,18 @@ public async Task Dispatcher_ProcessSimultaneousConnections_HitsKeepAliveTimeout
}
};
var keepAlive = TimeSpan.FromSeconds(1);
var dispatcherTask = Task.Run(() =>

// Use Task.Factory.StartNew with LongRunning to run the dispatcher on a dedicated
// OS thread instead of a thread pool thread. The dispatcher's Run() method uses
// blocking Task.WaitAny() which permanently blocks its thread. On Helix CI agents
// running many tests in parallel, blocking a thread pool thread contributes to pool
// starvation, which prevents Task.Delay timer callbacks from firing, causing the
// keep-alive timeout to never complete and the test to hang indefinitely.
var dispatcherTask = Task.Factory.StartNew(() =>
{
var dispatcher = new DefaultRequestDispatcher(connectionHost.Object, compilerHost, CancellationToken.None, eventBus, keepAlive);
dispatcher.Run();
});
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default);

// Wait for all connections to be created.
await readySource.Task;
Expand All @@ -402,7 +409,10 @@ public async Task Dispatcher_ProcessSimultaneousConnections_HitsKeepAliveTimeout

// Act
// Now dispatcher should be in an idle state with no active connections.
await dispatcherTask;
// Use WaitAsync as a safety net: if the keep-alive timeout still can't fire
// (e.g. extreme thread pool starvation), fail the test after 60s instead of
// hanging for 60+ minutes and blocking the entire CI job.
await dispatcherTask.WaitAsync(TimeSpan.FromSeconds(60));

// Assert
Assert.False(eventBus.HasDetectedBadConnection);
Expand Down
17 changes: 17 additions & 0 deletions test/TestAssets/Directory.Build.targets
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
<!-- Prevent test asset projects from picking up the repo's root Directory.Build.targets. -->
<Project>

<!-- For packable Exe projects (DotNetCliToolReference tools targeting netcoreapp2.2),
include the auto-generated runtimeconfig.json in the NuGet package so the dotnet
host can find it adjacent to the DLL. This enables RollForward=LatestMajor to
work correctly, allowing tools to run on machines that only have .NET 6.0+
installed (common on Helix CI agents that lack .NET Core 2.2). Without this,
tools fail with FrameworkMissingFailure (exit code 0x80008096) because the host
cannot roll forward from 2.2.0 without a runtimeconfig.json specifying the
rollForward policy. -->
<Target Name="IncludeRuntimeConfigInPackage"
AfterTargets="Build"
Condition="'$(OutputType)' == 'Exe' AND '$(IsPackable)' == 'true' AND '$(GenerateRuntimeConfigurationFiles)' != 'false'">
<ItemGroup>
<BuildOutputInPackage Include="$(ProjectRuntimeConfigFilePath)"
Condition="'$(ProjectRuntimeConfigFilePath)' != '' AND Exists('$(ProjectRuntimeConfigFilePath)')" />
</ItemGroup>
</Target>

</Project>
Loading