Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b3cb470
support background steps
lokesh755 May 12, 2026
1002fd1
fix l0 tests
lokesh755 May 12, 2026
f74e5c6
resolve PR comments
lokesh755 May 14, 2026
4d32df6
only run main stage in background
lokesh755 May 14, 2026
9937faf
implicit wait-all only for uncovered steps
lokesh755 May 14, 2026
3520669
increase delay to avoid flakiness
lokesh755 May 14, 2026
ce32f15
Configurable background steps concurrency cap
lokesh755 May 14, 2026
000b3bd
fix l0 test
lokesh755 May 14, 2026
2fbefec
resolve pr comments
lokesh755 May 15, 2026
804a893
resolve pr comments
lokesh755 May 26, 2026
5913ea5
resolve pr comments
lokesh755 May 26, 2026
15762cf
Merge branch 'main' into lokesh755-background-steps
lokesh755 May 26, 2026
59bece8
unblock foreground steps from bg semaphore
lokesh755 May 26, 2026
4944e63
use timelinerecord to store the background step metadata
lokesh755 May 27, 2026
c787934
move deferred outputs outside setoutput method
lokesh755 May 27, 2026
826a427
prefere max bg steps value from the job context
lokesh755 May 27, 2026
5ccf8d3
merged wait and wait-all implementations
lokesh755 May 27, 2026
e7c7f88
refactored cancellation to make it cleaner
lokesh755 May 27, 2026
d2b95ea
added debug statements in defferred code paths
lokesh755 May 27, 2026
1736185
use JobExtensionRunner for executing background wait,cancel steps
lokesh755 Jun 2, 2026
4f57f13
add parallel group id
lokesh755 Jun 2, 2026
428f25d
Allow array for of stepids for cancel step
lokesh755 Jun 2, 2026
c54a06e
resolve tests
lokesh755 Jun 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/Runner.Common/JobServerQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,16 @@ private List<TimelineRecord> MergeTimelineRecords(List<TimelineRecord> timelineR
timelineRecord.Variables[variable.Key] = variable.Value.Clone();
}
}

// Merge background step metadata
if (rec.IsBackground)
{
timelineRecord.IsBackground = rec.IsBackground;
}
timelineRecord.StepType = rec.StepType ?? timelineRecord.StepType;
timelineRecord.WaitStepIds = rec.WaitStepIds ?? timelineRecord.WaitStepIds;
timelineRecord.CancelStepId = rec.CancelStepId ?? timelineRecord.CancelStepId;
timelineRecord.ParallelGroupId = rec.ParallelGroupId ?? timelineRecord.ParallelGroupId;
}
else
{
Expand Down
6 changes: 3 additions & 3 deletions src/Runner.Worker/ActionCommandManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ private void ValidateStopToken(IExecutionContext context, string stopToken)
Message = $"Invoked ::stopCommand:: with token: [{stopToken}]",
Type = JobTelemetryType.ActionCommand
};
context.Global.JobTelemetry.Add(telemetry);
lock (context.Global.CollectionLock) { context.Global.JobTelemetry.Add(telemetry); }
}

if (isTokenInvalid && !allowUnsecureStopCommandTokens)
Expand Down Expand Up @@ -326,7 +326,7 @@ public void ProcessCommand(IExecutionContext context, string line, ActionCommand
Type = JobTelemetryType.ActionCommand,
Message = "DeprecatedCommand: set-output"
};
context.Global.JobTelemetry.Add(telemetry);
lock (context.Global.CollectionLock) { context.Global.JobTelemetry.Add(telemetry); }
}

if (!command.Properties.TryGetValue(SetOutputCommandProperties.Name, out string outputName) || string.IsNullOrEmpty(outputName))
Expand Down Expand Up @@ -372,7 +372,7 @@ public void ProcessCommand(IExecutionContext context, string line, ActionCommand
Type = JobTelemetryType.ActionCommand,
Message = "DeprecatedCommand: save-state"
};
context.Global.JobTelemetry.Add(telemetry);
lock (context.Global.CollectionLock) { context.Global.JobTelemetry.Add(telemetry); }
}

if (!command.Properties.TryGetValue(SaveStateCommandProperties.Name, out string stateName) || string.IsNullOrEmpty(stateName))
Expand Down
22 changes: 14 additions & 8 deletions src/Runner.Worker/ActionManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1068,11 +1068,14 @@ private async Task DownloadRepositoryActionAsync(IExecutionContext executionCont
}

executionContext.Debug($"Created symlink from cached directory '{cacheDirectory}' to '{destDirectory}'");
executionContext.Global.JobTelemetry.Add(new JobTelemetry()
lock (executionContext.Global.CollectionLock)
{
Type = JobTelemetryType.General,
Message = $"Action archive cache usage: {downloadInfo.ResolvedNameWithOwner}@{downloadInfo.ResolvedSha} use cache {useActionArchiveCache} has cache {hasActionArchiveCache} via symlink"
});
executionContext.Global.JobTelemetry.Add(new JobTelemetry()
{
Type = JobTelemetryType.General,
Message = $"Action archive cache usage: {downloadInfo.ResolvedNameWithOwner}@{downloadInfo.ResolvedSha} use cache {useActionArchiveCache} has cache {hasActionArchiveCache} via symlink"
});
}

Trace.Info("Finished getting action repository.");
return;
Expand Down Expand Up @@ -1108,11 +1111,14 @@ private async Task DownloadRepositoryActionAsync(IExecutionContext executionCont
}
}

executionContext.Global.JobTelemetry.Add(new JobTelemetry()
lock (executionContext.Global.CollectionLock)
{
Type = JobTelemetryType.General,
Message = $"Action archive cache usage: {downloadInfo.ResolvedNameWithOwner}@{downloadInfo.ResolvedSha} use cache {useActionArchiveCache} has cache {hasActionArchiveCache}"
});
executionContext.Global.JobTelemetry.Add(new JobTelemetry()
{
Type = JobTelemetryType.General,
Message = $"Action archive cache usage: {downloadInfo.ResolvedNameWithOwner}@{downloadInfo.ResolvedSha} use cache {useActionArchiveCache} has cache {hasActionArchiveCache}"
});
}

if (!useActionArchiveCache)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Runner.Worker/ActionManifestManagerWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ private void RecordMismatch(IExecutionContext context, string methodName)
{
context.Global.HasActionManifestMismatch = true;
var telemetry = new JobTelemetry { Type = JobTelemetryType.General, Message = $"ActionManifestMismatch: {methodName}" };
context.Global.JobTelemetry.Add(telemetry);
lock (context.Global.CollectionLock) { context.Global.JobTelemetry.Add(telemetry); }
}
}

Expand All @@ -456,7 +456,7 @@ private void RecordComparisonError(IExecutionContext context, string errorDetail
{
context.Global.HasActionManifestMismatch = true;
var telemetry = new JobTelemetry { Type = JobTelemetryType.General, Message = $"ActionManifestComparisonError: {errorDetails}" };
context.Global.JobTelemetry.Add(telemetry);
lock (context.Global.CollectionLock) { context.Global.JobTelemetry.Add(telemetry); }
}
}

Expand Down
26 changes: 26 additions & 0 deletions src/Runner.Worker/BackgroundStepContext.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using System;
using System.Threading;
using System.Threading.Tasks;

namespace GitHub.Runner.Worker
{
/// <summary>
/// Tracks a background step's execution state.
/// </summary>
internal sealed class BackgroundStepContext
{
public string StepId { get; }
public IStep Step { get; }
public Task ExecutionTask { get; set; }
public CancellationTokenSource Cts { get; set; }
public GitHub.DistributedTask.WebApi.TaskResult? Result { get; set; }
public bool IsCompleted => ExecutionTask?.IsCompleted ?? false;
public string ExternalId => Step.ExecutionContext.Id.ToString("N");

public BackgroundStepContext(string stepId, IStep step)
{
StepId = stepId;
Step = step;
}
}
}
31 changes: 31 additions & 0 deletions src/Runner.Worker/ControlFlowStepData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using System;

namespace GitHub.Runner.Worker
{
public enum ControlFlowType
{
Wait,
WaitAll,
Cancel,
}

/// <summary>
/// Data for control-flow steps (wait, wait-all, cancel).
/// Used with JobExtensionRunner instead of dedicated IStep implementations.
/// </summary>
public sealed class ControlFlowStepData
{
public ControlFlowType Type { get; set; }
public Guid StepId { get; set; }
public string StepName { get; set; }

// Wait: IDs of background steps to wait for
public string[] WaitStepIds { get; set; }

// Cancel: IDs of background steps to cancel
public string[] CancelStepIds { get; set; }

// Parallel group ID for grouping steps in the UI
public string ParallelGroupId { get; set; }
}
}
91 changes: 87 additions & 4 deletions src/Runner.Worker/ExecutionContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ public interface IExecutionContext : IRunnerService
IExecutionContext CreateChild(Guid recordId, string displayName, string refName, string scopeName, string contextName, ActionRunStage stage, Dictionary<string, string> intraActionState = null, int? recordOrder = null, IPagingLogger logger = null, bool isEmbedded = false, List<Issue> embeddedIssueCollector = null, CancellationTokenSource cancellationTokenSource = null, Guid embeddedId = default(Guid), string siblingScopeName = null, TimeSpan? timeout = null);
IExecutionContext CreateEmbeddedChild(string scopeName, string contextName, Guid embeddedId, ActionRunStage stage, Dictionary<string, string> intraActionState = null, string siblingScopeName = null);

// Background step deferral properties
Dictionary<string, string> DeferredOutputs { get; set; }
Dictionary<string, string> DeferredEnvironmentVariables { get; set; }
List<string> DeferredPrependPath { get; set; }
bool DeferOutcomeConclusion { get; set; }

// logging
long Write(string tag, string message);
void QueueAttachFile(string type, string name, string filePath);
Expand All @@ -100,11 +106,18 @@ public interface IExecutionContext : IRunnerService
void SetGitHubContext(string name, string value);
void SetOutput(string name, string value, out string reference);
void SetTimeout(TimeSpan? timeout);

// Background step deferral flush methods
void FlushDeferredOutputs();
void FlushDeferredEnvironment();
void FlushDeferredOutcomeConclusion();

void AddIssue(Issue issue, ExecutionContextLogOptions logOptions);
void Progress(int percentage, string currentOperation = null);
void UpdateDetailTimelineRecord(TimelineRecord record);

void UpdateTimelineRecordDisplayName(string displayName);
void SetBackgroundStepMetadata(bool isBackground = false, string stepType = null, string[] waitStepIds = null, string cancelStepId = null, string parallelGroupId = null);

// matchers
void Add(OnMatcherChanged handler);
Expand Down Expand Up @@ -279,6 +292,12 @@ public JobContext JobContext

public List<string> StepEnvironmentOverrides { get; } = new List<string>();

// Background step deferral properties
public Dictionary<string, string> DeferredOutputs { get; set; }
public Dictionary<string, string> DeferredEnvironmentVariables { get; set; }
public List<string> DeferredPrependPath { get; set; }
public bool DeferOutcomeConclusion { get; set; }

public override void Initialize(IHostContext hostContext)
{
base.Initialize(hostContext);
Expand Down Expand Up @@ -511,6 +530,12 @@ public TaskResult Complete(TaskResult? result = null, string currentOperation =
Annotations = new List<Annotation>()
};

// Populate background step metadata from timeline record fields
stepResult.IsBackground = _record.IsBackground;
stepResult.StepType = _record.StepType;
stepResult.WaitStepIds = _record.WaitStepIds;
stepResult.CancelStepId = _record.CancelStepId;

_record.Issues?.ForEach(issue =>
{
var annotation = issue.ToAnnotation();
Expand All @@ -536,7 +561,7 @@ public TaskResult Complete(TaskResult? result = null, string currentOperation =
var annotation = issue.ToAnnotation();
if (annotation != null)
{
Global.JobAnnotations.Add(annotation.Value);
lock (Global.CollectionLock) { Global.JobAnnotations.Add(annotation.Value); }
if (annotation.Value.IsInfrastructureIssue && string.IsNullOrEmpty(Global.InfrastructureFailureCategory))
{
Global.InfrastructureFailureCategory = issue.Category;
Expand All @@ -554,11 +579,22 @@ public TaskResult Complete(TaskResult? result = null, string currentOperation =

_logger.End();

UpdateGlobalStepsContext();
if (!DeferOutcomeConclusion)
Comment thread
lokesh755 marked this conversation as resolved.
{
UpdateGlobalStepsContext();
}

return Result.Value;
}

public void FlushDeferredOutcomeConclusion()
{
if (DeferOutcomeConclusion)
{
UpdateGlobalStepsContext();
}
}

public void UpdateGlobalStepsContext()
{
// Skip if generated context name. Generated context names start with "__". After 3.2 the server will never send an empty context name.
Expand Down Expand Up @@ -634,6 +670,40 @@ public void SetOutput(string name, string value, out string reference)
Global.StepsContext.SetOutput(ScopeName, ContextName, name, value, out reference);
}

public void FlushDeferredOutputs()
{
if (DeferredOutputs == null || DeferredOutputs.Count == 0)
{
return;
}

foreach (var kvp in DeferredOutputs)
{
Global.StepsContext.SetOutput(ScopeName, ContextName, kvp.Key, kvp.Value, out _);
}
}

public void FlushDeferredEnvironment()
{
if (DeferredEnvironmentVariables != null)
{
foreach (var kvp in DeferredEnvironmentVariables)
{
Global.EnvironmentVariables[kvp.Key] = kvp.Value;
SetEnvContext(kvp.Key, kvp.Value);
}
}

if (DeferredPrependPath != null)
{
foreach (var path in DeferredPrependPath)
{
Global.PrependPath.RemoveAll(x => string.Equals(x, path, StringComparison.CurrentCulture));
Global.PrependPath.Add(path);
}
}
}

public void SetTimeout(TimeSpan? timeout)
{
if (timeout != null)
Expand Down Expand Up @@ -807,6 +877,16 @@ public void UpdateTimelineRecordDisplayName(string displayName)
_jobServerQueue.QueueTimelineRecordUpdate(_mainTimelineId, _record);
}

public void SetBackgroundStepMetadata(bool isBackground = false, string stepType = null, string[] waitStepIds = null, string cancelStepId = null, string parallelGroupId = null)
{
_record.IsBackground = isBackground;
_record.StepType = stepType;
_record.WaitStepIds = waitStepIds;
_record.CancelStepId = cancelStepId;
_record.ParallelGroupId = parallelGroupId;
_jobServerQueue.QueueTimelineRecordUpdate(_mainTimelineId, _record);
}

public void InitializeJob(Pipelines.AgentJobRequestMessage message, CancellationToken token)
{
// Validation
Expand Down Expand Up @@ -1191,7 +1271,7 @@ public void PublishStepTelemetry()
}

Trace.Info($"Publish step telemetry for current step {StringUtil.ConvertToJson(StepTelemetry)}.");
Global.StepsTelemetry.Add(StepTelemetry);
lock (Global.CollectionLock) { Global.StepsTelemetry.Add(StepTelemetry); }
_stepTelemetryPublished = true;
}
}
Expand Down Expand Up @@ -1330,7 +1410,10 @@ public void ApplyContinueOnError(TemplateToken continueOnErrorToken)
Trace.Info($"Updated step result (continue on error)");
}

UpdateGlobalStepsContext();
if (!DeferOutcomeConclusion)
{
UpdateGlobalStepsContext();
}
}

internal IPipelineTemplateEvaluator ToPipelineTemplateEvaluatorInternal(bool allowServiceContainerCommand, ObjectTemplating.ITraceWriter traceWriter = null)
Expand Down
38 changes: 33 additions & 5 deletions src/Runner.Worker/FileCommandManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,18 @@ public void ProcessCommand(IExecutionContext context, string filePath, Container
{
continue;
}
context.Global.PrependPath.RemoveAll(x => string.Equals(x, line, StringComparison.CurrentCulture));
context.Global.PrependPath.Add(line);
if (context.DeferredPrependPath != null)
{
// Background step: buffer path additions until wait/wait-all
context.DeferredPrependPath.RemoveAll(x => string.Equals(x, line, StringComparison.CurrentCulture));
context.DeferredPrependPath.Add(line);
context.Debug($"Deferred prepend path '{line}' for background step");
}
else
{
context.Global.PrependPath.RemoveAll(x => string.Equals(x, line, StringComparison.CurrentCulture));
context.Global.PrependPath.Add(line);
}
}
}
}
Expand Down Expand Up @@ -172,8 +182,17 @@ private static void SetEnvironmentVariable(
string name,
string value)
{
context.Global.EnvironmentVariables[name] = value;
context.SetEnvContext(name, value);
if (context.DeferredEnvironmentVariables != null)
{
// Background step: buffer env changes until wait/wait-all
context.DeferredEnvironmentVariables[name] = value;
Comment thread
lokesh755 marked this conversation as resolved.
context.Debug($"Deferred env '{name}' for background step");
}
else
{
context.Global.EnvironmentVariables[name] = value;
context.SetEnvContext(name, value);
}
context.Debug($"{name}='{value}'");
}

Expand Down Expand Up @@ -302,7 +321,16 @@ public void ProcessCommand(IExecutionContext context, string filePath, Container
var pairs = new EnvFileKeyValuePairs(context, filePath);
foreach (var pair in pairs)
{
context.SetOutput(pair.Key, pair.Value, out var reference);
if (context.DeferredOutputs != null)
{
// Background step: buffer outputs until wait/wait-all
context.DeferredOutputs[pair.Key] = pair.Value;
context.Debug($"Deferred output '{pair.Key}' for background step");
}
else
{
context.SetOutput(pair.Key, pair.Value, out var reference);
}
context.Debug($"Set output {pair.Key} = {pair.Value}");
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/Runner.Worker/GlobalContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ namespace GitHub.Runner.Worker
{
public sealed class GlobalContext
{
// Lock for thread-safe access to shared collections during concurrent background step execution
public readonly object CollectionLock = new object();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using lock, should we change everything to be thread-safe, ex: using concurrecnybag/dictionary/queue, etc.
So we won't have problem for any future code that forgot to add lock

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I looked into this. The main challenge is that List uses .AddRange() in a couple of places and there's no ConcurrentHashSet in .NET, so swapping types ends up touching ~20 call sites across 8 files. Feels too invasive for this PR, happy to do it as a follow-up.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can front load the change in its own PR, and change all the consumers.
i am pretty sure we will forget calling lock on this in the future and cause bugs.


public ContainerInfo Container { get; set; }
public List<ServiceEndpoint> Endpoints { get; set; }
public IDictionary<String, String> EnvironmentVariables { get; set; }
Expand Down
Loading
Loading