From dc57c96831366786f0434c2b3aa4e9198ad860f6 Mon Sep 17 00:00:00 2001 From: Tingluo Huang Date: Thu, 12 Mar 2026 10:46:50 -0400 Subject: [PATCH 1/2] Report infra_error for action download failures. --- src/Runner.Worker/ActionManager.cs | 149 ++++++++++++++------------ src/Sdk/DTWebApi/WebApi/Exceptions.cs | 19 ++++ 2 files changed, 102 insertions(+), 66 deletions(-) diff --git a/src/Runner.Worker/ActionManager.cs b/src/Runner.Worker/ActionManager.cs index 38c2ab8b320..6c066a150b4 100644 --- a/src/Runner.Worker/ActionManager.cs +++ b/src/Runner.Worker/ActionManager.cs @@ -115,6 +115,14 @@ public sealed class ActionManager : RunnerService, IActionManager executionContext.Result = TaskResult.Failed; throw; } + catch (FailedToDownloadActionException ex) + { + // Log the error and fail the PrepareActionsAsync Initialization. + Trace.Error($"Caught exception from PrepareActionsAsync Initialization: {ex}"); + executionContext.InfrastructureError(ex.InnerException?.Message ?? ex.Message, category: "error_download_action"); + executionContext.Result = TaskResult.Failed; + throw; + } catch (InvalidActionArchiveException ex) { // Log the error and fail the PrepareActionsAsync Initialization. @@ -1157,93 +1165,102 @@ private async Task DownloadRepositoryArchive(IExecutionContext executionContext, // Allow up to 20 * 60s for any action to be downloaded from github graph. int timeoutSeconds = 20 * 60; - while (retryCount < 3) + try { - string requestId = string.Empty; - using (var actionDownloadTimeout = new CancellationTokenSource(TimeSpan.FromSeconds(timeoutSeconds))) - using (var actionDownloadCancellation = CancellationTokenSource.CreateLinkedTokenSource(actionDownloadTimeout.Token, executionContext.CancellationToken)) + while (retryCount < 3) { - try + string requestId = string.Empty; + using (var actionDownloadTimeout = new CancellationTokenSource(TimeSpan.FromSeconds(timeoutSeconds))) + using (var actionDownloadCancellation = CancellationTokenSource.CreateLinkedTokenSource(actionDownloadTimeout.Token, executionContext.CancellationToken)) { - //open zip stream in async mode - using (FileStream fs = new(archiveFile, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: _defaultFileStreamBufferSize, useAsync: true)) - using (var httpClientHandler = HostContext.CreateHttpClientHandler()) - using (var httpClient = new HttpClient(httpClientHandler)) + try { - httpClient.DefaultRequestHeaders.Authorization = CreateAuthHeader(downloadAuthToken); - - httpClient.DefaultRequestHeaders.UserAgent.AddRange(HostContext.UserAgents); - using (var response = await httpClient.GetAsync(downloadUrl)) + //open zip stream in async mode + using (FileStream fs = new(archiveFile, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: _defaultFileStreamBufferSize, useAsync: true)) + using (var httpClientHandler = HostContext.CreateHttpClientHandler()) + using (var httpClient = new HttpClient(httpClientHandler)) { - requestId = UrlUtil.GetGitHubRequestId(response.Headers); - if (!string.IsNullOrEmpty(requestId)) - { - Trace.Info($"Request URL: {downloadUrl} X-GitHub-Request-Id: {requestId} Http Status: {response.StatusCode}"); - } + httpClient.DefaultRequestHeaders.Authorization = CreateAuthHeader(downloadAuthToken); - if (response.IsSuccessStatusCode) + httpClient.DefaultRequestHeaders.UserAgent.AddRange(HostContext.UserAgents); + using (var response = await httpClient.GetAsync(downloadUrl)) { - using (var result = await response.Content.ReadAsStreamAsync()) + requestId = UrlUtil.GetGitHubRequestId(response.Headers); + if (!string.IsNullOrEmpty(requestId)) { - await result.CopyToAsync(fs, _defaultCopyBufferSize, actionDownloadCancellation.Token); - await fs.FlushAsync(actionDownloadCancellation.Token); + Trace.Info($"Request URL: {downloadUrl} X-GitHub-Request-Id: {requestId} Http Status: {response.StatusCode}"); + } - // download succeed, break out the retry loop. - break; + if (response.IsSuccessStatusCode) + { + using (var result = await response.Content.ReadAsStreamAsync()) + { + await result.CopyToAsync(fs, _defaultCopyBufferSize, actionDownloadCancellation.Token); + await fs.FlushAsync(actionDownloadCancellation.Token); + + // download succeed, break out the retry loop. + break; + } + } + else if (response.StatusCode == HttpStatusCode.NotFound) + { + // It doesn't make sense to retry in this case, so just stop + throw new ActionNotFoundException(new Uri(downloadUrl), requestId); + } + else + { + // Something else bad happened, let's go to our retry logic + response.EnsureSuccessStatusCode(); } - } - else if (response.StatusCode == HttpStatusCode.NotFound) - { - // It doesn't make sense to retry in this case, so just stop - throw new ActionNotFoundException(new Uri(downloadUrl), requestId); - } - else - { - // Something else bad happened, let's go to our retry logic - response.EnsureSuccessStatusCode(); } } } - } - catch (OperationCanceledException) when (executionContext.CancellationToken.IsCancellationRequested) - { - Trace.Info("Action download has been cancelled."); - throw; - } - catch (OperationCanceledException ex) when (!executionContext.CancellationToken.IsCancellationRequested && retryCount >= 2) - { - Trace.Info($"Action download final retry timeout after {timeoutSeconds} seconds."); - throw new TimeoutException($"Action '{downloadUrl}' download has timed out. Error: {ex.Message} {requestId}"); - } - catch (ActionNotFoundException) - { - Trace.Info($"The action at '{downloadUrl}' does not exist"); - throw; - } - catch (Exception ex) when (retryCount < 2) - { - retryCount++; - Trace.Error($"Fail to download archive '{downloadUrl}' -- Attempt: {retryCount}"); - Trace.Error(ex); - if (actionDownloadTimeout.Token.IsCancellationRequested) + catch (OperationCanceledException) when (executionContext.CancellationToken.IsCancellationRequested) { - // action download didn't finish within timeout - executionContext.Warning($"Action '{downloadUrl}' didn't finish download within {timeoutSeconds} seconds. {requestId}"); + Trace.Info("Action download has been cancelled."); + throw; } - else + catch (OperationCanceledException ex) when (!executionContext.CancellationToken.IsCancellationRequested && retryCount >= 2) + { + Trace.Info($"Action download final retry timeout after {timeoutSeconds} seconds."); + throw new TimeoutException($"Action '{downloadUrl}' download has timed out. Error: {ex.Message} {requestId}"); + } + catch (ActionNotFoundException) { - executionContext.Warning($"Failed to download action '{downloadUrl}'. Error: {ex.Message} {requestId}"); + Trace.Info($"The action at '{downloadUrl}' does not exist"); + throw; + } + catch (Exception ex) when (retryCount < 2) + { + retryCount++; + Trace.Error($"Fail to download archive '{downloadUrl}' -- Attempt: {retryCount}"); + Trace.Error(ex); + if (actionDownloadTimeout.Token.IsCancellationRequested) + { + // action download didn't finish within timeout + executionContext.Warning($"Action '{downloadUrl}' didn't finish download within {timeoutSeconds} seconds. {requestId}"); + } + else + { + executionContext.Warning($"Failed to download action '{downloadUrl}'. Error: {ex.Message} {requestId}"); + } } } - } - if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF"))) - { - var backOff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30)); - executionContext.Warning($"Back off {backOff.TotalSeconds} seconds before retry."); - await Task.Delay(backOff); + if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF"))) + { + var backOff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30)); + executionContext.Warning($"Back off {backOff.TotalSeconds} seconds before retry."); + await Task.Delay(backOff); + } } } + catch (Exception ex) when (!(ex is OperationCanceledException) && !executionContext.CancellationToken.IsCancellationRequested) + { + Trace.Error($"Failed to download archive '{downloadUrl}' after {retryCount + 1} attempts."); + Trace.Error(ex); + throw new FailedToDownloadActionException($"Failed to download archive '{downloadUrl}' after {retryCount + 1} attempts.", ex); + } ArgUtil.NotNullOrEmpty(archiveFile, nameof(archiveFile)); executionContext.Debug($"Download '{downloadUrl}' to '{archiveFile}'"); diff --git a/src/Sdk/DTWebApi/WebApi/Exceptions.cs b/src/Sdk/DTWebApi/WebApi/Exceptions.cs index ee47f137063..90b93e04ec3 100644 --- a/src/Sdk/DTWebApi/WebApi/Exceptions.cs +++ b/src/Sdk/DTWebApi/WebApi/Exceptions.cs @@ -2556,6 +2556,25 @@ private FailedToResolveActionDownloadInfoException(SerializationInfo info, Strea } } + [Serializable] + public sealed class FailedToDownloadActionException : DistributedTaskException + { + public FailedToDownloadActionException(String message) + : base(message) + { + } + + public FailedToDownloadActionException(String message, Exception innerException) + : base(message, innerException) + { + } + + private FailedToDownloadActionException(SerializationInfo info, StreamingContext context) + : base(info, context) + { + } + } + [Serializable] public sealed class InvalidActionArchiveException : DistributedTaskException { From 1ea77d68a38480def5bffd92c60d8b10e9dc78cc Mon Sep 17 00:00:00 2001 From: Tingluo Huang Date: Thu, 12 Mar 2026 12:51:55 -0400 Subject: [PATCH 2/2] . --- src/Test/L0/Worker/ActionManagerL0.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Test/L0/Worker/ActionManagerL0.cs b/src/Test/L0/Worker/ActionManagerL0.cs index 5aa1f2dbc20..c16dd55ae04 100644 --- a/src/Test/L0/Worker/ActionManagerL0.cs +++ b/src/Test/L0/Worker/ActionManagerL0.cs @@ -198,7 +198,8 @@ public async void PrepareActions_DownloadUnknownActionFromGraph_OnPremises_Legac Func action = async () => await _actionManager.PrepareActionsAsync(_ec.Object, actions); //Assert - await Assert.ThrowsAsync(action); + var ex = await Assert.ThrowsAsync(action); + Assert.IsType(ex.InnerException); var watermarkFile = Path.Combine(_hc.GetDirectory(WellKnownDirectory.Actions), ActionName, "main.completed"); Assert.False(File.Exists(watermarkFile));