diff --git a/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go b/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go index 90049d27bc..8cff49c8d4 100644 --- a/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go +++ b/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go @@ -315,7 +315,7 @@ type AutoscalingRunnerSetStatus struct { CurrentRunners int `json:"currentRunners"` // +optional - State string `json:"state"` + Phase AutoscalingRunnerSetPhase `json:"phase"` // EphemeralRunner counts separated by the stage ephemeral runners are in, taken from the EphemeralRunnerSet @@ -327,6 +327,30 @@ type AutoscalingRunnerSetStatus struct { FailedEphemeralRunners int `json:"failedEphemeralRunners"` } +type AutoscalingRunnerSetPhase string + +const ( + // AutoscalingRunnerSetPhasePending phase means that the listener is not + // yet started + AutoscalingRunnerSetPhasePending AutoscalingRunnerSetPhase = "Pending" + AutoscalingRunnerSetPhaseRunning AutoscalingRunnerSetPhase = "Running" + AutoscalingRunnerSetPhaseOutdated AutoscalingRunnerSetPhase = "Outdated" +) + +func (ars *AutoscalingRunnerSet) Hash() string { + type data struct { + Spec *AutoscalingRunnerSetSpec + Labels map[string]string + } + + d := &data{ + Spec: ars.Spec.DeepCopy(), + Labels: ars.Labels, + } + + return hash.ComputeTemplateHash(d) +} + func (ars *AutoscalingRunnerSet) ListenerSpecHash() string { arsSpec := ars.Spec.DeepCopy() spec := arsSpec diff --git a/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go b/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go index 295bf25b6d..f9b4df528c 100644 --- a/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go +++ b/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go @@ -48,7 +48,7 @@ type EphemeralRunner struct { } func (er *EphemeralRunner) IsDone() bool { - return er.Status.Phase == corev1.PodSucceeded || er.Status.Phase == corev1.PodFailed + return er.Status.Phase == EphemeralRunnerPhaseSucceeded || er.Status.Phase == EphemeralRunnerPhaseFailed || er.Status.Phase == EphemeralRunnerPhaseOutdated } func (er *EphemeralRunner) HasJob() bool { @@ -143,14 +143,14 @@ type EphemeralRunnerStatus struct { // The PodSucceded phase should be set only when confirmed that EphemeralRunner // actually executed the job and has been removed from the service. // +optional - Phase corev1.PodPhase `json:"phase,omitempty"` + Phase EphemeralRunnerPhase `json:"phase,omitempty"` // +optional Reason string `json:"reason,omitempty"` // +optional Message string `json:"message,omitempty"` // +optional - RunnerId int `json:"runnerId,omitempty"` + RunnerID int `json:"runnerId,omitempty"` // +optional RunnerName string `json:"runnerName,omitempty"` @@ -158,7 +158,7 @@ type EphemeralRunnerStatus struct { Failures map[string]metav1.Time `json:"failures,omitempty"` // +optional - JobRequestId int64 `json:"jobRequestId,omitempty"` + JobRequestID int64 `json:"jobRequestId,omitempty"` // +optional JobID string `json:"jobId,omitempty"` @@ -170,12 +170,33 @@ type EphemeralRunnerStatus struct { JobWorkflowRef string `json:"jobWorkflowRef,omitempty"` // +optional - WorkflowRunId int64 `json:"workflowRunId,omitempty"` + WorkflowRunID int64 `json:"workflowRunId,omitempty"` // +optional JobDisplayName string `json:"jobDisplayName,omitempty"` } +// EphemeralRunnerPhase is the phase of the ephemeral runner. +// It must be a superset of the pod phase. +type EphemeralRunnerPhase string + +const ( + // EphemeralRunnerPhasePending is a phase set when the ephemeral runner is + // being provisioned and is not yet online. + EphemeralRunnerPhasePending EphemeralRunnerPhase = "Pending" + // EphemeralRunnerPhaseRunning is a phase set when the ephemeral runner is online and + // waiting for a job to execute. + EphemeralRunnerPhaseRunning EphemeralRunnerPhase = "Running" + // EphemeralRunnerPhaseSucceeded is a phase set when the ephemeral runner + // successfully executed the job and has been removed from the service. + EphemeralRunnerPhaseSucceeded EphemeralRunnerPhase = "Succeeded" + // EphemeralRunnerPhaseFailed is a phase set when the ephemeral runner + // fails with unrecoverable failure. + EphemeralRunnerPhaseFailed EphemeralRunnerPhase = "Failed" + // EphemeralRunnerPhaseOutdated is a special phase that indicates the runner is outdated and should be upgraded. + EphemeralRunnerPhaseOutdated EphemeralRunnerPhase = "Outdated" +) + func (s *EphemeralRunnerStatus) LastFailure() metav1.Time { var maxTime metav1.Time if len(s.Failures) == 0 { diff --git a/apis/actions.github.com/v1alpha1/ephemeralrunnerset_types.go b/apis/actions.github.com/v1alpha1/ephemeralrunnerset_types.go index a105fd4eff..229bb252fc 100644 --- a/apis/actions.github.com/v1alpha1/ephemeralrunnerset_types.go +++ b/apis/actions.github.com/v1alpha1/ephemeralrunnerset_types.go @@ -42,8 +42,20 @@ type EphemeralRunnerSetStatus struct { RunningEphemeralRunners int `json:"runningEphemeralRunners"` // +optional FailedEphemeralRunners int `json:"failedEphemeralRunners"` + // +optional + Phase EphemeralRunnerSetPhase `json:"phase"` } +// EphemeralRunnerSetPhase is the phase of the ephemeral runner set resource +type EphemeralRunnerSetPhase string + +const ( + EphemeralRunnerSetPhaseRunning EphemeralRunnerSetPhase = "Running" + // EphemeralRunnerSetPhaseOutdated is set when at least one ephemeral runner + // contains the outdated phase + EphemeralRunnerSetPhaseOutdated EphemeralRunnerSetPhase = "Outdated" +) + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:printcolumn:JSONPath=".spec.replicas",name="DesiredReplicas",type="integer" diff --git a/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_autoscalingrunnersets.yaml b/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_autoscalingrunnersets.yaml index 1b18467f7e..a24d52bfd6 100644 --- a/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_autoscalingrunnersets.yaml +++ b/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_autoscalingrunnersets.yaml @@ -16547,10 +16547,10 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + type: string runningEphemeralRunners: type: integer - state: - type: string type: object type: object served: true diff --git a/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_ephemeralrunnersets.yaml b/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_ephemeralrunnersets.yaml index 5c4103fbd2..a2c8f787be 100644 --- a/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_ephemeralrunnersets.yaml +++ b/charts/gha-runner-scale-set-controller-experimental/crds/actions.github.com_ephemeralrunnersets.yaml @@ -8318,6 +8318,9 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + description: EphemeralRunnerSetPhase is the phase of the ephemeral runner set resource + type: string runningEphemeralRunners: type: integer required: diff --git a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml index 1b18467f7e..a24d52bfd6 100644 --- a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml +++ b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml @@ -16547,10 +16547,10 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + type: string runningEphemeralRunners: type: integer - state: - type: string type: object type: object served: true diff --git a/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunnersets.yaml b/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunnersets.yaml index 5c4103fbd2..a2c8f787be 100644 --- a/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunnersets.yaml +++ b/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunnersets.yaml @@ -8318,6 +8318,9 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + description: EphemeralRunnerSetPhase is the phase of the ephemeral runner set resource + type: string runningEphemeralRunners: type: integer required: diff --git a/cmd/ghalistener/scaler/scaler.go b/cmd/ghalistener/scaler/scaler.go index 3159b1db35..51cb0362ae 100644 --- a/cmd/ghalistener/scaler/scaler.go +++ b/cmd/ghalistener/scaler/scaler.go @@ -110,10 +110,10 @@ func (w *Scaler) HandleJobStarted(ctx context.Context, jobInfo *scaleset.JobStar patch, err := json.Marshal( &v1alpha1.EphemeralRunner{ Status: v1alpha1.EphemeralRunnerStatus{ - JobRequestId: jobInfo.RunnerRequestID, + JobRequestID: jobInfo.RunnerRequestID, JobRepositoryName: fmt.Sprintf("%s/%s", jobInfo.OwnerName, jobInfo.RepositoryName), JobID: jobInfo.JobID, - WorkflowRunId: jobInfo.WorkflowRunID, + WorkflowRunID: jobInfo.WorkflowRunID, JobWorkflowRef: jobInfo.JobWorkflowRef, JobDisplayName: jobInfo.JobDisplayName, }, diff --git a/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml b/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml index 1b18467f7e..a24d52bfd6 100644 --- a/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml +++ b/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml @@ -16547,10 +16547,10 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + type: string runningEphemeralRunners: type: integer - state: - type: string type: object type: object served: true diff --git a/config/crd/bases/actions.github.com_ephemeralrunnersets.yaml b/config/crd/bases/actions.github.com_ephemeralrunnersets.yaml index 5c4103fbd2..a2c8f787be 100644 --- a/config/crd/bases/actions.github.com_ephemeralrunnersets.yaml +++ b/config/crd/bases/actions.github.com_ephemeralrunnersets.yaml @@ -8318,6 +8318,9 @@ spec: type: integer pendingEphemeralRunners: type: integer + phase: + description: EphemeralRunnerSetPhase is the phase of the ephemeral runner set resource + type: string runningEphemeralRunners: type: integer required: diff --git a/controllers/actions.github.com/autoscalingrunnerset_controller.go b/controllers/actions.github.com/autoscalingrunnerset_controller.go index c2d48d1614..d32aa93b5d 100644 --- a/controllers/actions.github.com/autoscalingrunnerset_controller.go +++ b/controllers/actions.github.com/autoscalingrunnerset_controller.go @@ -22,6 +22,7 @@ import ( "sort" "strconv" "strings" + "time" "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" "github.com/actions/actions-runner-controller/build" @@ -46,6 +47,7 @@ const ( // This is used to determine if the values have changed, so we can // re-create listener. annotationKeyValuesHash = "actions.github.com/values-hash" + annotationKeyChangeHash = "actions.github.com/change-hash" autoscalingRunnerSetFinalizerName = "autoscalingrunnerset.actions.github.com/finalizer" runnerScaleSetIDAnnotationKey = "runner-scale-set-id" @@ -104,32 +106,16 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl } log.Info("Deleting resources") - done, err := r.cleanupListener(ctx, autoscalingRunnerSet, log) + done, err := r.cleanUpResources(ctx, autoscalingRunnerSet, nil, log) if err != nil { - log.Error(err, "Failed to clean up listener") + log.Error(err, "Failed to clean up resources during deletion") return ctrl.Result{}, err } if !done { - // we are going to get notified anyway to proceed with rest of the - // cleanup. No need to re-queue - log.Info("Waiting for listener to be deleted") - return ctrl.Result{}, nil - } - - done, err = r.cleanupEphemeralRunnerSets(ctx, autoscalingRunnerSet, log) - if err != nil { - log.Error(err, "Failed to clean up ephemeral runner sets") - return ctrl.Result{}, err - } - if !done { - log.Info("Waiting for ephemeral runner sets to be deleted") - return ctrl.Result{}, nil - } - - err = r.deleteRunnerScaleSet(ctx, autoscalingRunnerSet, log) - if err != nil { - log.Error(err, "Failed to delete runner scale set") - return ctrl.Result{}, err + log.Info("Waiting for resources to be cleaned up before removing finalizer") + return ctrl.Result{ + RequeueAfter: 5 * time.Second, + }, nil } if err := r.removeFinalizersFromDependentResources(ctx, autoscalingRunnerSet, log); err != nil { @@ -176,34 +162,54 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl } log.Info("Successfully added finalizer") - return ctrl.Result{}, nil } - scaleSetIDRaw, ok := autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey] - if !ok { - // Need to create a new runner scale set on Actions service - log.Info("Runner scale set id annotation does not exist. Creating a new runner scale set.") - return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log) - } + if targetHash := autoscalingRunnerSet.Hash(); autoscalingRunnerSet.Annotations[annotationKeyChangeHash] != targetHash { + if err := patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) { + if obj.Annotations == nil { + obj.Annotations = map[string]string{} + } + obj.Annotations[annotationKeyChangeHash] = targetHash + }); err != nil { + log.Error(err, "Failed to update autoscaling runner set with change hash annotation") + return ctrl.Result{}, err + } - if id, err := strconv.Atoi(scaleSetIDRaw); err != nil || id <= 0 { - log.Info("Runner scale set id annotation is not an id, or is <= 0. Creating a new runner scale set.") - // something modified the scaleSetId. Try to create one - return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log) + if err := r.updateStatus(ctx, autoscalingRunnerSet, nil, v1alpha1.AutoscalingRunnerSetPhasePending, log); err != nil { + log.Error(err, "Failed to update autoscaling runner set status to pending") + return ctrl.Result{}, err + } } - // Make sure the runner group of the scale set is up to date - currentRunnerGroupName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName] - if !ok || (len(autoscalingRunnerSet.Spec.RunnerGroup) > 0 && !strings.EqualFold(currentRunnerGroupName, autoscalingRunnerSet.Spec.RunnerGroup)) { - log.Info("AutoScalingRunnerSet runner group changed. Updating the runner scale set.") - return r.updateRunnerScaleSetRunnerGroup(ctx, autoscalingRunnerSet, log) - } + outdated := autoscalingRunnerSet.Status.Phase == v1alpha1.AutoscalingRunnerSetPhaseOutdated + + if !outdated { + scaleSetIDRaw, ok := autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey] + if !ok { + // Need to create a new runner scale set on Actions service + log.Info("Runner scale set id annotation does not exist. Creating a new runner scale set.") + return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log) + } + + if id, err := strconv.Atoi(scaleSetIDRaw); err != nil || id <= 0 { + log.Info("Runner scale set id annotation is not an id, or is <= 0. Creating a new runner scale set.") + // something modified the scaleSetId. Try to create one + return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log) + } - // Make sure the runner scale set name is up to date - currentRunnerScaleSetName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName] - if !ok || (len(autoscalingRunnerSet.Spec.RunnerScaleSetName) > 0 && !strings.EqualFold(currentRunnerScaleSetName, autoscalingRunnerSet.Spec.RunnerScaleSetName)) { - log.Info("AutoScalingRunnerSet runner scale set name changed. Updating the runner scale set.") - return r.updateRunnerScaleSetName(ctx, autoscalingRunnerSet, log) + // Make sure the runner group of the scale set is up to date + currentRunnerGroupName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName] + if !ok || (len(autoscalingRunnerSet.Spec.RunnerGroup) > 0 && !strings.EqualFold(currentRunnerGroupName, autoscalingRunnerSet.Spec.RunnerGroup)) { + log.Info("AutoScalingRunnerSet runner group changed. Updating the runner scale set.") + return r.updateRunnerScaleSetRunnerGroup(ctx, autoscalingRunnerSet, log) + } + + // Make sure the runner scale set name is up to date + currentRunnerScaleSetName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName] + if !ok || (len(autoscalingRunnerSet.Spec.RunnerScaleSetName) > 0 && !strings.EqualFold(currentRunnerScaleSetName, autoscalingRunnerSet.Spec.RunnerScaleSetName)) { + log.Info("AutoScalingRunnerSet runner scale set name changed. Updating the runner scale set.") + return r.updateRunnerScaleSetName(ctx, autoscalingRunnerSet, log) + } } existingRunnerSets, err := r.listEphemeralRunnerSets(ctx, autoscalingRunnerSet) @@ -213,7 +219,7 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl } latestRunnerSet := existingRunnerSets.latest() - if latestRunnerSet == nil { + if latestRunnerSet == nil && !outdated { log.Info("Latest runner set does not exist. Creating a new runner set.") return r.createEphemeralRunnerSet(ctx, autoscalingRunnerSet, log) } @@ -222,6 +228,8 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl log.Info("Find existing ephemeral runner set", "name", runnerSet.Name, "specHash", runnerSet.Annotations[annotationKeyRunnerSpecHash]) } + outdated = outdated || (latestRunnerSet != nil && latestRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated) + // Make sure the AutoscalingListener is up and running in the controller namespace listener := new(v1alpha1.AutoscalingListener) listenerFound := true @@ -235,6 +243,30 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl log.Info("AutoscalingListener does not exist.") } + if outdated { + log.Info("Ephemeral runner set is outdated") + if autoscalingRunnerSet.Status.Phase != v1alpha1.AutoscalingRunnerSetPhaseOutdated { + if err := r.updateStatus(ctx, autoscalingRunnerSet, latestRunnerSet, v1alpha1.AutoscalingRunnerSetPhaseOutdated, log); err != nil { + log.Error(err, "Failed to update autoscaling runner set status to outdated") + return ctrl.Result{}, err + } + } + + done, err := r.cleanUpResources(ctx, autoscalingRunnerSet, latestRunnerSet, log) + if err != nil { + log.Error(err, "Failed to clean up resources for outdated runner set") + return ctrl.Result{}, err + } + if done { + return ctrl.Result{}, nil + } + + log.Info("Waiting for resources to be cleaned up for outdated runner set") + return ctrl.Result{ + RequeueAfter: 5 * time.Second, + }, nil + } + // Our listener pod is out of date, so we need to delete it to get a new recreate. listenerValuesHashChanged := listener.Annotations[annotationKeyValuesHash] != autoscalingRunnerSet.Annotations[annotationKeyValuesHash] listenerSpecHashChanged := listener.Annotations[annotationKeyRunnerSpecHash] != autoscalingRunnerSet.ListenerSpecHash() @@ -291,20 +323,69 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl return r.createAutoScalingListenerForRunnerSet(ctx, autoscalingRunnerSet, latestRunnerSet, log) } - // Update the status of autoscaling runner set. - if latestRunnerSet.Status.CurrentReplicas != autoscalingRunnerSet.Status.CurrentRunners { + if err := r.updateStatus(ctx, autoscalingRunnerSet, latestRunnerSet, v1alpha1.AutoscalingRunnerSetPhaseRunning, log); err != nil { + log.Error(err, "Failed to update autoscaling runner set status to running") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *AutoscalingRunnerSetReconciler) cleanUpResources(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, latestRunnerSet *v1alpha1.EphemeralRunnerSet, log logr.Logger) (bool, error) { + log.Info("Deleting the listener") + done, err := r.cleanupListener(ctx, autoscalingRunnerSet, log) + if err != nil { + log.Error(err, "Failed to clean up listener") + return false, err + } + + if !done { + log.Info("Waiting for listener to be deleted") + return false, nil + } + + log.Info("deleting ephemeral runner sets") + done, err = r.cleanupEphemeralRunnerSets(ctx, autoscalingRunnerSet, log) + if err != nil { + log.Error(err, "Failed to clean up ephemeral runner sets") + return false, err + } + if !done { + log.Info("Waiting for ephemeral runner sets to be deleted") + return false, nil + } + + log.Info("deleting runner scale set") + err = r.deleteRunnerScaleSet(ctx, autoscalingRunnerSet, log) + if err != nil { + log.Error(err, "Failed to delete runner scale set") + return false, err + } + + return true, nil +} + +// Update the status of autoscaling runner set if necessary +func (r *AutoscalingRunnerSetReconciler) updateStatus(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, phase v1alpha1.AutoscalingRunnerSetPhase, log logr.Logger) error { + countDiff := ephemeralRunnerSet != nil && ephemeralRunnerSet.Status.CurrentReplicas != autoscalingRunnerSet.Status.CurrentRunners + phaseDiff := phase != autoscalingRunnerSet.Status.Phase + if countDiff || phaseDiff { if err := patchSubResource(ctx, r.Status(), autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) { - obj.Status.CurrentRunners = latestRunnerSet.Status.CurrentReplicas - obj.Status.PendingEphemeralRunners = latestRunnerSet.Status.PendingEphemeralRunners - obj.Status.RunningEphemeralRunners = latestRunnerSet.Status.RunningEphemeralRunners - obj.Status.FailedEphemeralRunners = latestRunnerSet.Status.FailedEphemeralRunners + obj.Status.Phase = phase + var ephemeralRunnerSetStatus v1alpha1.EphemeralRunnerSetStatus + if ephemeralRunnerSet != nil { + ephemeralRunnerSetStatus = ephemeralRunnerSet.Status + } + obj.Status.CurrentRunners = ephemeralRunnerSetStatus.CurrentReplicas + obj.Status.PendingEphemeralRunners = ephemeralRunnerSetStatus.PendingEphemeralRunners + obj.Status.RunningEphemeralRunners = ephemeralRunnerSetStatus.RunningEphemeralRunners + obj.Status.FailedEphemeralRunners = ephemeralRunnerSetStatus.FailedEphemeralRunners }); err != nil { log.Error(err, "Failed to update autoscaling runner set status with current runner count") - return ctrl.Result{}, err + return err } } - - return ctrl.Result{}, nil + return nil } // Prevents overprovisioning of runners. diff --git a/controllers/actions.github.com/autoscalingrunnerset_controller_test.go b/controllers/actions.github.com/autoscalingrunnerset_controller_test.go index b5d63346ee..a2d9a6b259 100644 --- a/controllers/actions.github.com/autoscalingrunnerset_controller_test.go +++ b/controllers/actions.github.com/autoscalingrunnerset_controller_test.go @@ -171,10 +171,18 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() { return "", nil } - return fmt.Sprintf("%s_%s", created.Annotations[runnerScaleSetIDAnnotationKey], created.Annotations[AnnotationKeyGitHubRunnerGroupName]), nil + return fmt.Sprintf( + "%s_%s", + created.Annotations[runnerScaleSetIDAnnotationKey], + created.Annotations[AnnotationKeyGitHubRunnerGroupName], + ), nil }, autoscalingRunnerSetTestTimeout, - autoscalingRunnerSetTestInterval).Should(BeEquivalentTo("1_testgroup"), "RunnerScaleSet should be created/fetched and update the AutoScalingRunnerSet's annotation") + autoscalingRunnerSetTestInterval, + ).Should( + BeEquivalentTo("1_testgroup"), + "RunnerScaleSet should be created/fetched and update the AutoScalingRunnerSet's annotation", + ) Eventually( func() (string, error) { @@ -548,7 +556,7 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() { desiredStatus := v1alpha1.AutoscalingRunnerSetStatus{ CurrentRunners: activeRunnerSet.Status.CurrentReplicas, - State: "", + Phase: v1alpha1.AutoscalingRunnerSetPhaseRunning, PendingEphemeralRunners: activeRunnerSet.Status.PendingEphemeralRunners, RunningEphemeralRunners: activeRunnerSet.Status.RunningEphemeralRunners, FailedEphemeralRunners: activeRunnerSet.Status.FailedEphemeralRunners, @@ -654,7 +662,7 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() { desiredStatus := v1alpha1.AutoscalingRunnerSetStatus{ CurrentRunners: statusUpdate.Status.CurrentReplicas, - State: "", + Phase: v1alpha1.AutoscalingRunnerSetPhaseRunning, PendingEphemeralRunners: statusUpdate.Status.PendingEphemeralRunners, RunningEphemeralRunners: statusUpdate.Status.RunningEphemeralRunners, FailedEphemeralRunners: statusUpdate.Status.FailedEphemeralRunners, diff --git a/controllers/actions.github.com/constants.go b/controllers/actions.github.com/constants.go index ad1841fca2..588c95beb7 100644 --- a/controllers/actions.github.com/constants.go +++ b/controllers/actions.github.com/constants.go @@ -12,6 +12,9 @@ const ( const ( EnvVarRunnerJITConfig = "ACTIONS_RUNNER_INPUT_JITCONFIG" EnvVarRunnerExtraUserAgent = "GITHUB_ACTIONS_RUNNER_EXTRA_USER_AGENT" + // Environment variable setting the exit code to return when the runner version is deprecated. + // This is used by the runner to signal to the controller that it should switch off the scaleset. + EnvVarRunnerDeprecatedExitCode = "ACTIONS_RUNNER_RETURN_VERSION_DEPRECATED_EXIT_CODE" ) // Environment variable names used to set proxy variables for containers diff --git a/controllers/actions.github.com/ephemeralrunner_controller.go b/controllers/actions.github.com/ephemeralrunner_controller.go index bb06790d62..616c0e044b 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller.go +++ b/controllers/actions.github.com/ephemeralrunner_controller.go @@ -201,7 +201,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } } - if ephemeralRunner.Status.RunnerId == 0 { + if ephemeralRunner.Status.RunnerID == 0 { log.Info("Updating ephemeral runner status with runnerId and runnerName") runnerID, err := strconv.Atoi(string(secret.Data["runnerId"])) if err != nil { @@ -216,12 +216,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ runnerName := string(secret.Data["runnerName"]) if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { - obj.Status.RunnerId = runnerID + obj.Status.RunnerID = runnerID obj.Status.RunnerName = runnerName }); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w", err) } - ephemeralRunner.Status.RunnerId = runnerID + ephemeralRunner.Status.RunnerID = runnerID ephemeralRunner.Status.RunnerName = runnerName log.Info("Updated ephemeral runner status with runnerId and runnerName") } @@ -324,7 +324,8 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log) } - if cs.State.Terminated.ExitCode == 0 { + switch cs.State.Terminated.ExitCode { + case 0: log.Info("Runner container has succeeded but pod is in failed phase; Assume successful exit") // If the pod is in a failed state, that means that at least one container exited with non-zero exit code. // If the runner container exits with 0, we assume that the runner has finished successfully. @@ -335,6 +336,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } return ctrl.Result{}, nil + case 7: + if err := r.markAsOutdated(ctx, ephemeralRunner, log); err != nil { + log.Error(err, "Failed to set ephemeral runner to phase Outdated") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } log.Error( @@ -357,6 +364,13 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } return ctrl.Result{}, nil + case cs.State.Terminated.ExitCode == 7: // outdated + if err := r.markAsOutdated(ctx, ephemeralRunner, log); err != nil { + log.Error(err, "Failed to set ephemeral runner to phase Outdated") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + case cs.State.Terminated.ExitCode != 0: // failed log.Info("Ephemeral runner container failed", "exitCode", cs.State.Terminated.ExitCode) return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log) @@ -390,7 +404,7 @@ func (r *EphemeralRunnerReconciler) deleteEphemeralRunnerOrPod(ctx context.Conte log.Error(err, "Failed to get actions client for removing the runner from the service") return nil } - if err := actionsClient.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerId)); err != nil { + if err := actionsClient.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerID)); err != nil { log.Error(err, "Failed to remove the runner from the service") return nil } @@ -549,7 +563,7 @@ func (r *EphemeralRunnerReconciler) cleanupRunnerLinkedSecrets(ctx context.Conte func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, errMessage string, reason string, log logr.Logger) error { log.Info("Updating ephemeral runner status to Failed") if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { - obj.Status.Phase = corev1.PodFailed + obj.Status.Phase = v1alpha1.EphemeralRunnerPhaseFailed obj.Status.Reason = reason obj.Status.Message = errMessage }); err != nil { @@ -565,6 +579,24 @@ func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralR return nil } +func (r *EphemeralRunnerReconciler) markAsOutdated(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) error { + log.Info("Updating ephemeral runner status to Outdated") + + if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { + obj.Status.Phase = v1alpha1.EphemeralRunnerPhaseOutdated + obj.Status.Reason = "Outdated" + obj.Status.Message = "Runner is deprecated" + }); err != nil { + return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %w", err) + } + + log.Info("Removing the runner from the service") + if err := r.deleteRunnerFromService(ctx, ephemeralRunner, log); err != nil { + return fmt.Errorf("failed to remove the runner from service: %w", err) + } + return nil +} + // deletePodAsFailed is responsible for deleting the pod and updating the .Status.Failures for tracking failure count. // It should not be responsible for setting the status to Failed. // @@ -722,7 +754,7 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp log.Info("Created ephemeral runner pod", "runnerScaleSetId", runner.Spec.RunnerScaleSetID, "runnerName", runner.Status.RunnerName, - "runnerId", runner.Status.RunnerId, + "runnerId", runner.Status.RunnerID, "configUrl", runner.Spec.GitHubConfigUrl, "podName", newPod.Name) @@ -765,7 +797,8 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context, } } - phaseChanged := ephemeralRunner.Status.Phase != pod.Status.Phase + phase := v1alpha1.EphemeralRunnerPhase(pod.Status.Phase) + phaseChanged := ephemeralRunner.Status.Phase != phase readyChanged := ready != ephemeralRunner.Status.Ready if !phaseChanged && !readyChanged { @@ -780,7 +813,7 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context, "ready", ready, ) err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { - obj.Status.Phase = pod.Status.Phase + obj.Status.Phase = phase obj.Status.Ready = ready obj.Status.Reason = pod.Status.Reason obj.Status.Message = pod.Status.Message @@ -799,13 +832,13 @@ func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context, return fmt.Errorf("failed to get actions client for runner: %w", err) } - log.Info("Removing runner from the service", "runnerId", ephemeralRunner.Status.RunnerId) - err = client.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerId)) + log.Info("Removing runner from the service", "runnerId", ephemeralRunner.Status.RunnerID) + err = client.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerID)) if err != nil { return fmt.Errorf("failed to remove runner from the service: %w", err) } - log.Info("Removed runner from the service", "runnerId", ephemeralRunner.Status.RunnerId) + log.Info("Removed runner from the service", "runnerId", ephemeralRunner.Status.RunnerID) return nil } diff --git a/controllers/actions.github.com/ephemeralrunner_controller_test.go b/controllers/actions.github.com/ephemeralrunner_controller_test.go index bbb8727527..559d9469e7 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunner_controller_test.go @@ -511,7 +511,7 @@ var _ = Describe("EphemeralRunner", func() { updated := new(v1alpha1.EphemeralRunner) Eventually( - func() (corev1.PodPhase, error) { + func() (v1alpha1.EphemeralRunnerPhase, error) { err := k8sClient.Get( ctx, client.ObjectKey{Name: invalideEphemeralRunner.Name, Namespace: invalideEphemeralRunner.Namespace}, @@ -524,7 +524,7 @@ var _ = Describe("EphemeralRunner", func() { }, ephemeralRunnerTimeout, ephemeralRunnerInterval, - ).Should(BeEquivalentTo(corev1.PodFailed)) + ).Should(BeEquivalentTo(v1alpha1.EphemeralRunnerPhaseFailed)) Expect(updated.Status.Reason).Should(Equal("InvalidPod")) Expect(updated.Status.Message).Should(Equal("Failed to create the pod: pods \"invalid-ephemeral-runner\" is forbidden: no PriorityClass with name notexist was found")) @@ -676,7 +676,7 @@ var _ = Describe("EphemeralRunner", func() { if err != nil { return 0, err } - return updatedEphemeralRunner.Status.RunnerId, nil + return updatedEphemeralRunner.Status.RunnerID, nil }, ephemeralRunnerTimeout, ephemeralRunnerInterval, @@ -711,7 +711,7 @@ var _ = Describe("EphemeralRunner", func() { var updated *v1alpha1.EphemeralRunner Eventually( - func() (corev1.PodPhase, error) { + func() (v1alpha1.EphemeralRunnerPhase, error) { updated = new(v1alpha1.EphemeralRunner) err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) if err != nil { @@ -833,10 +833,10 @@ var _ = Describe("EphemeralRunner", func() { Expect(err).To(BeNil(), "failed to patch pod status") Consistently( - func() (corev1.PodPhase, error) { + func() (v1alpha1.EphemeralRunnerPhase, error) { updated := new(v1alpha1.EphemeralRunner) if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated); err != nil { - return corev1.PodUnknown, err + return "Unknown", err } return updated.Status.Phase, nil }, @@ -1059,7 +1059,7 @@ var _ = Describe("EphemeralRunner", func() { Expect(err).To(BeNil()) Eventually( - func() (corev1.PodPhase, error) { + func() (v1alpha1.EphemeralRunnerPhase, error) { updated := new(v1alpha1.EphemeralRunner) if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated); err != nil { return "", err @@ -1068,7 +1068,7 @@ var _ = Describe("EphemeralRunner", func() { }, ephemeralRunnerTimeout, ephemeralRunnerInterval, - ).Should(BeEquivalentTo(corev1.PodRunning)) + ).Should(BeEquivalentTo(v1alpha1.EphemeralRunnerPhaseRunning)) // set phase to succeeded pod.Status.Phase = corev1.PodSucceeded @@ -1076,7 +1076,7 @@ var _ = Describe("EphemeralRunner", func() { Expect(err).To(BeNil()) Consistently( - func() (corev1.PodPhase, error) { + func() (v1alpha1.EphemeralRunnerPhase, error) { updated := new(v1alpha1.EphemeralRunner) if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated); err != nil { return "", err @@ -1084,7 +1084,7 @@ var _ = Describe("EphemeralRunner", func() { return updated.Status.Phase, nil }, ephemeralRunnerTimeout, - ).Should(BeEquivalentTo(corev1.PodRunning)) + ).Should(BeEquivalentTo(v1alpha1.EphemeralRunnerPhaseRunning)) }) }) diff --git a/controllers/actions.github.com/ephemeralrunnerset_controller.go b/controllers/actions.github.com/ephemeralrunnerset_controller.go index 75c4667e85..7f81216d1d 100644 --- a/controllers/actions.github.com/ephemeralrunnerset_controller.go +++ b/controllers/actions.github.com/ephemeralrunnerset_controller.go @@ -125,6 +125,11 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } + if ephemeralRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated { + log.Info("ephemeral runner set is outdated, waiting for autoscaling runner set to remove it") + return ctrl.Result{}, nil + } + // Create proxy secret if not present if ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy != nil { proxySecret := new(corev1.Secret) @@ -145,25 +150,25 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R // Find all EphemeralRunner with matching namespace and own by this EphemeralRunnerSet. ephemeralRunnerList := new(v1alpha1.EphemeralRunnerList) - err := r.List( + if err := r.List( ctx, ephemeralRunnerList, client.InNamespace(req.Namespace), client.MatchingFields{resourceOwnerKey: req.Name}, - ) - if err != nil { + ); err != nil { log.Error(err, "Unable to list child ephemeral runners") return ctrl.Result{}, err } - ephemeralRunnerState := newEphemeralRunnerState(ephemeralRunnerList) + ephemeralRunnersByState := newEphemeralRunnersByStates(ephemeralRunnerList) log.Info("Ephemeral runner counts", - "pending", len(ephemeralRunnerState.pending), - "running", len(ephemeralRunnerState.running), - "finished", len(ephemeralRunnerState.finished), - "failed", len(ephemeralRunnerState.failed), - "deleting", len(ephemeralRunnerState.deleting), + "outdated", len(ephemeralRunnersByState.outdated), + "pending", len(ephemeralRunnersByState.pending), + "running", len(ephemeralRunnersByState.running), + "finished", len(ephemeralRunnersByState.finished), + "failed", len(ephemeralRunnersByState.failed), + "deleting", len(ephemeralRunnersByState.deleting), ) if r.PublishMetrics { @@ -183,16 +188,16 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R Organization: parsedURL.Organization, Enterprise: parsedURL.Enterprise, }, - len(ephemeralRunnerState.pending), - len(ephemeralRunnerState.running), - len(ephemeralRunnerState.failed), + len(ephemeralRunnersByState.pending), + len(ephemeralRunnersByState.running), + len(ephemeralRunnersByState.failed), ) } - total := ephemeralRunnerState.scaleTotal() - if ephemeralRunnerSet.Spec.PatchID == 0 || ephemeralRunnerSet.Spec.PatchID != ephemeralRunnerState.latestPatchID { + total := ephemeralRunnersByState.scaleTotal() + if ephemeralRunnerSet.Spec.PatchID == 0 || ephemeralRunnerSet.Spec.PatchID != ephemeralRunnersByState.latestPatchID { defer func() { - if err := r.cleanupFinishedEphemeralRunners(ctx, ephemeralRunnerState.finished, log); err != nil { + if err := r.cleanupFinishedEphemeralRunners(ctx, ephemeralRunnersByState.finished, log); err != nil { log.Error(err, "failed to cleanup finished ephemeral runners") } }() @@ -217,8 +222,8 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R if err := r.deleteIdleEphemeralRunners( ctx, ephemeralRunnerSet, - ephemeralRunnerState.pending, - ephemeralRunnerState.running, + ephemeralRunnersByState.pending, + ephemeralRunnersByState.running, count, log, ); err != nil { @@ -228,25 +233,41 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R } } + return ctrl.Result{}, r.updateStatus(ctx, ephemeralRunnerSet, ephemeralRunnersByState, log) +} + +func (r *EphemeralRunnerSetReconciler) updateStatus(ctx context.Context, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, state *ephemeralRunnersByState, log logr.Logger) error { + total := state.scaleTotal() + var phase v1alpha1.EphemeralRunnerSetPhase + switch { + case len(state.outdated) > 0: + phase = v1alpha1.EphemeralRunnerSetPhaseOutdated + case ephemeralRunnerSet.Status.Phase == "": + phase = v1alpha1.EphemeralRunnerSetPhaseRunning + default: + phase = ephemeralRunnerSet.Status.Phase + } desiredStatus := v1alpha1.EphemeralRunnerSetStatus{ CurrentReplicas: total, - PendingEphemeralRunners: len(ephemeralRunnerState.pending), - RunningEphemeralRunners: len(ephemeralRunnerState.running), - FailedEphemeralRunners: len(ephemeralRunnerState.failed), + Phase: phase, + PendingEphemeralRunners: len(state.pending), + RunningEphemeralRunners: len(state.running), + FailedEphemeralRunners: len(state.failed), } // Update the status if needed. if ephemeralRunnerSet.Status != desiredStatus { log.Info("Updating status with current runners count", "count", total) + ephemeralRunnerSet := ephemeralRunnerSet.DeepCopy() + ephemeralRunnerSet.Status.CurrentReplicas = -1 // ALWAYS update current replicas if err := patchSubResource(ctx, r.Status(), ephemeralRunnerSet, func(obj *v1alpha1.EphemeralRunnerSet) { obj.Status = desiredStatus }); err != nil { log.Error(err, "Failed to update status with current runners count") - return ctrl.Result{}, err + return err } } - - return ctrl.Result{}, nil + return nil } func (r *EphemeralRunnerSetReconciler) cleanupFinishedEphemeralRunners(ctx context.Context, finishedEphemeralRunners []*v1alpha1.EphemeralRunner, log logr.Logger) error { @@ -290,7 +311,6 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte return false, fmt.Errorf("failed to list child ephemeral runners: %w", err) } - log.Info("Actual Ephemeral runner counts", "count", len(ephemeralRunnerList.Items)) // only if there are no ephemeral runners left, return true if len(ephemeralRunnerList.Items) == 0 { err := r.cleanUpProxySecret(ctx, ephemeralRunnerSet, log) @@ -301,7 +321,7 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte return true, nil } - ephemeralRunnerState := newEphemeralRunnerState(ephemeralRunnerList) + ephemeralRunnerState := newEphemeralRunnersByStates(ephemeralRunnerList) log.Info("Clean up runner counts", "pending", len(ephemeralRunnerState.pending), @@ -309,11 +329,12 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte "finished", len(ephemeralRunnerState.finished), "failed", len(ephemeralRunnerState.failed), "deleting", len(ephemeralRunnerState.deleting), + "outdated", len(ephemeralRunnerState.outdated), ) - log.Info("Cleanup finished or failed ephemeral runners") + log.Info("Cleanup terminated ephemeral runners") var errs []error - for _, ephemeralRunner := range append(ephemeralRunnerState.finished, ephemeralRunnerState.failed...) { + for _, ephemeralRunner := range ephemeralRunnerState.terminated() { log.Info("Deleting ephemeral runner", "name", ephemeralRunner.Name) if err := r.Delete(ctx, ephemeralRunner); err != nil && !kerrors.IsNotFound(err) { errs = append(errs, err) @@ -359,7 +380,7 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte func (r *EphemeralRunnerSetReconciler) createEphemeralRunners(ctx context.Context, runnerSet *v1alpha1.EphemeralRunnerSet, count int, log logr.Logger) error { // Track multiple errors at once and return the bundle. errs := make([]error, 0) - for i := 0; i < count; i++ { + for i := range count { ephemeralRunner := r.newEphemeralRunner(runnerSet) if runnerSet.Spec.EphemeralRunnerSpec.Proxy != nil { ephemeralRunner.Spec.ProxySecretRef = proxyEphemeralRunnerSetSecretName(runnerSet) @@ -448,7 +469,7 @@ func (r *EphemeralRunnerSetReconciler) deleteIdleEphemeralRunners(ctx context.Co for runners.next() { ephemeralRunner := runners.object() isDone := ephemeralRunner.IsDone() - if !isDone && ephemeralRunner.Status.RunnerId == 0 { + if !isDone && ephemeralRunner.Status.RunnerID == 0 { log.Info("Skipping ephemeral runner since it is not registered yet", "name", ephemeralRunner.Name) continue } @@ -457,7 +478,7 @@ func (r *EphemeralRunnerSetReconciler) deleteIdleEphemeralRunners(ctx context.Co log.Info( "Skipping ephemeral runner since it is running a job", "name", ephemeralRunner.Name, - "workflowRunId", ephemeralRunner.Status.WorkflowRunId, + "workflowRunId", ephemeralRunner.Status.WorkflowRunID, "jobId", ephemeralRunner.Status.JobID, ) continue @@ -482,21 +503,21 @@ func (r *EphemeralRunnerSetReconciler) deleteIdleEphemeralRunners(ctx context.Co } func (r *EphemeralRunnerSetReconciler) deleteEphemeralRunnerWithActionsClient(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, actionsClient multiclient.Client, log logr.Logger) (bool, error) { - if err := actionsClient.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerId)); err != nil { + if err := actionsClient.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerID)); err != nil { if errors.Is(err, scaleset.JobStillRunningError) { - log.Info("Runner is still running a job, skipping deletion", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerId) + log.Info("Runner is still running a job, skipping deletion", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerID) return false, nil } return false, err } - log.Info("Deleting ephemeral runner after removing from the service", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerId) + log.Info("Deleting ephemeral runner after removing from the service", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerID) if err := r.Delete(ctx, ephemeralRunner); err != nil && !kerrors.IsNotFound(err) { return false, err } - log.Info("Deleted ephemeral runner", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerId) + log.Info("Deleted ephemeral runner", "name", ephemeralRunner.Name, "runnerId", ephemeralRunner.Status.RunnerID) return true, nil } @@ -553,18 +574,19 @@ func (s *ephemeralRunnerStepper) len() int { return len(s.items) } -type ephemeralRunnerState struct { +type ephemeralRunnersByState struct { pending []*v1alpha1.EphemeralRunner running []*v1alpha1.EphemeralRunner finished []*v1alpha1.EphemeralRunner failed []*v1alpha1.EphemeralRunner deleting []*v1alpha1.EphemeralRunner + outdated []*v1alpha1.EphemeralRunner latestPatchID int } -func newEphemeralRunnerState(ephemeralRunnerList *v1alpha1.EphemeralRunnerList) *ephemeralRunnerState { - var ephemeralRunnerState ephemeralRunnerState +func newEphemeralRunnersByStates(ephemeralRunnerList *v1alpha1.EphemeralRunnerList) *ephemeralRunnersByState { + var ephemeralRunnerState ephemeralRunnersByState for i := range ephemeralRunnerList.Items { r := &ephemeralRunnerList.Items[i] @@ -578,12 +600,14 @@ func newEphemeralRunnerState(ephemeralRunnerList *v1alpha1.EphemeralRunnerList) } switch r.Status.Phase { - case corev1.PodRunning: + case v1alpha1.EphemeralRunnerPhaseRunning: ephemeralRunnerState.running = append(ephemeralRunnerState.running, r) - case corev1.PodSucceeded: + case v1alpha1.EphemeralRunnerPhaseSucceeded: ephemeralRunnerState.finished = append(ephemeralRunnerState.finished, r) - case corev1.PodFailed: + case v1alpha1.EphemeralRunnerPhaseFailed: ephemeralRunnerState.failed = append(ephemeralRunnerState.failed, r) + case v1alpha1.EphemeralRunnerPhaseOutdated: + ephemeralRunnerState.outdated = append(ephemeralRunnerState.outdated, r) default: // Pending or no phase should be considered as pending. // @@ -595,6 +619,10 @@ func newEphemeralRunnerState(ephemeralRunnerList *v1alpha1.EphemeralRunnerList) return &ephemeralRunnerState } -func (s *ephemeralRunnerState) scaleTotal() int { +func (s *ephemeralRunnersByState) terminated() []*v1alpha1.EphemeralRunner { + return append(s.finished, append(s.failed, s.outdated...)...) +} + +func (s *ephemeralRunnersByState) scaleTotal() int { return len(s.pending) + len(s.running) + len(s.failed) } diff --git a/controllers/actions.github.com/ephemeralrunnerset_controller_test.go b/controllers/actions.github.com/ephemeralrunnerset_controller_test.go index cd8a15306b..4a0a41b378 100644 --- a/controllers/actions.github.com/ephemeralrunnerset_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunnerset_controller_test.go @@ -159,10 +159,10 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Set status to simulate a configured EphemeralRunner refetch := false for i, runner := range runnerList.Items { - if runner.Status.RunnerId == 0 { + if runner.Status.RunnerID == 0 { updatedRunner := runner.DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning - updatedRunner.Status.RunnerId = i + 100 + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning + updatedRunner.Status.RunnerID = i + 100 err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runner)) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") refetch = true @@ -219,10 +219,10 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Set status to simulate a configured EphemeralRunner refetch := false for i, runner := range runnerList.Items { - if runner.Status.RunnerId == 0 { + if runner.Status.RunnerID == 0 { updatedRunner := runner.DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning - updatedRunner.Status.RunnerId = i + 100 + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning + updatedRunner.Status.RunnerID = i + 100 err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runner)) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") refetch = true @@ -382,12 +382,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { ).Should(BeEquivalentTo(2), "2 EphemeralRunner should be created") updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -460,12 +460,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { ).Should(BeEquivalentTo(2), "2 EphemeralRunner should be created") updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodPending + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhasePending err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -512,12 +512,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { ).Should(BeEquivalentTo(2), "2 EphemeralRunner should be created") updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -547,7 +547,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { } for _, runner := range runnerList.Items { - if runner.Status.Phase == corev1.PodSucceeded { + if runner.Status.Phase == v1alpha1.EphemeralRunnerPhaseSucceeded { return fmt.Errorf("Runner %s is in Succeeded phase", runner.Name) } } @@ -586,12 +586,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { ).Should(BeEquivalentTo(2), "2 EphemeralRunner should be created") updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodPending + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhasePending err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -607,9 +607,9 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { succeeded := 0 for _, runner := range runnerList.Items { switch runner.Status.Phase { - case corev1.PodSucceeded: + case v1alpha1.EphemeralRunnerPhaseSucceeded: succeeded++ - case corev1.PodPending: + case v1alpha1.EphemeralRunnerPhasePending: pending++ } } @@ -649,7 +649,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { return fmt.Errorf("Expected 1 runner, got %d", len(runnerList.Items)) } - if runnerList.Items[0].Status.Phase != corev1.PodPending { + if runnerList.Items[0].Status.Phase != v1alpha1.EphemeralRunnerPhasePending { return fmt.Errorf("Expected runner to be in Pending, got %s", runnerList.Items[0].Status.Phase) } @@ -661,7 +661,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Now, the ephemeral runner finally is done and we can scale down to 0 updatedRunner = runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -707,12 +707,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Put one runner in Pending and one in Running updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodPending + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhasePending err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -730,9 +730,9 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { for _, runner := range runnerList.Items { switch runner.Status.Phase { - case corev1.PodPending: + case v1alpha1.EphemeralRunnerPhasePending: pending++ - case corev1.PodRunning: + case v1alpha1.EphemeralRunnerPhaseRunning: running++ } @@ -777,12 +777,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Now, let's say ephemeral runner controller patched these ephemeral runners with the registration. updatedRunner = runnerList.Items[0].DeepCopy() - updatedRunner.Status.RunnerId = 1 + updatedRunner.Status.RunnerID = 1 err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.RunnerId = 2 + updatedRunner.Status.RunnerID = 2 err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -830,12 +830,12 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { // Put one runner in Succeeded and one in Running updatedRunner := runnerList.Items[0].DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") updatedRunner = runnerList.Items[1].DeepCopy() - updatedRunner.Status.Phase = corev1.PodRunning + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runnerList.Items[1])) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") @@ -854,9 +854,9 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { for _, runner := range runnerList.Items { switch runner.Status.Phase { - case corev1.PodSucceeded: + case v1alpha1.EphemeralRunnerPhaseSucceeded: succeeded++ - case corev1.PodRunning: + case v1alpha1.EphemeralRunnerPhaseRunning: running++ } } @@ -898,7 +898,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { } for _, runner := range runnerList.Items { - if runner.Status.Phase == corev1.PodSucceeded { + if runner.Status.Phase == v1alpha1.EphemeralRunnerPhaseSucceeded { return fmt.Errorf("Expected no runners in Succeeded phase, got one") } } @@ -943,7 +943,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { var failedOriginal *v1alpha1.EphemeralRunner var empty []*v1alpha1.EphemeralRunner for _, runner := range runnerList.Items { - switch runner.Status.RunnerId { + switch runner.Status.RunnerID { case 101: pendingOriginal = runner.DeepCopy() case 102: @@ -962,8 +962,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { empty = empty[1:] pending := pendingOriginal.DeepCopy() - pending.Status.RunnerId = 101 - pending.Status.Phase = corev1.PodPending + pending.Status.RunnerID = 101 + pending.Status.Phase = v1alpha1.EphemeralRunnerPhasePending err = k8sClient.Status().Patch(ctx, pending, client.MergeFrom(pendingOriginal)) if err != nil { @@ -976,8 +976,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { runningOriginal = empty[0] empty = empty[1:] running := runningOriginal.DeepCopy() - running.Status.RunnerId = 102 - running.Status.Phase = corev1.PodRunning + running.Status.RunnerID = 102 + running.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning err = k8sClient.Status().Patch(ctx, running, client.MergeFrom(runningOriginal)) if err != nil { @@ -990,8 +990,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { failedOriginal = empty[0] failed := pendingOriginal.DeepCopy() - failed.Status.RunnerId = 103 - failed.Status.Phase = corev1.PodFailed + failed.Status.RunnerID = 103 + failed.Status.Phase = v1alpha1.EphemeralRunnerPhaseFailed err = k8sClient.Status().Patch(ctx, failed, client.MergeFrom(failedOriginal)) if err != nil { @@ -1006,6 +1006,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { ).Should(BeTrue(), "Failed to eventually update to one pending, one running and one failed") desiredStatus := v1alpha1.EphemeralRunnerSetStatus{ + Phase: v1alpha1.EphemeralRunnerSetPhaseRunning, CurrentReplicas: 3, PendingEphemeralRunners: 1, RunningEphemeralRunners: 1, @@ -1052,6 +1053,7 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { PendingEphemeralRunners: 0, RunningEphemeralRunners: 0, FailedEphemeralRunners: 1, + Phase: v1alpha1.EphemeralRunnerSetPhaseRunning, } Eventually( @@ -1070,7 +1072,13 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() { err = k8sClient.Delete(ctx, &runnerList.Items[0]) Expect(err).To(BeNil(), "Failed to delete failed ephemeral runner") - desiredStatus = v1alpha1.EphemeralRunnerSetStatus{} // empty + desiredStatus = v1alpha1.EphemeralRunnerSetStatus{ + CurrentReplicas: 0, + PendingEphemeralRunners: 0, + RunningEphemeralRunners: 0, + FailedEphemeralRunners: 0, + Phase: v1alpha1.EphemeralRunnerSetPhaseRunning, + } Eventually( func() (v1alpha1.EphemeralRunnerSetStatus, error) { updated := new(v1alpha1.EphemeralRunnerSet) @@ -1222,10 +1230,10 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func( // Set status to simulate a configured EphemeralRunner refetch := false for i, runner := range runnerList.Items { - if runner.Status.RunnerId == 0 { + if runner.Status.RunnerID == 0 { updatedRunner := runner.DeepCopy() - updatedRunner.Status.Phase = corev1.PodSucceeded - updatedRunner.Status.RunnerId = i + 100 + updatedRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseSucceeded + updatedRunner.Status.RunnerID = i + 100 err = k8sClient.Status().Patch(ctx, updatedRunner, client.MergeFrom(&runner)) Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunner") refetch = true @@ -1355,8 +1363,8 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func( ).Should(BeEquivalentTo(1), "failed to create ephemeral runner") runner := runnerList.Items[0].DeepCopy() - runner.Status.Phase = corev1.PodRunning - runner.Status.RunnerId = 100 + runner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning + runner.Status.RunnerID = 100 err = k8sClient.Status().Patch(ctx, runner, client.MergeFrom(&runnerList.Items[0])) Expect(err).NotTo(HaveOccurred(), "failed to update ephemeral runner status") diff --git a/controllers/actions.github.com/resourcebuilder.go b/controllers/actions.github.com/resourcebuilder.go index be0f8986ea..14a89cbaa1 100644 --- a/controllers/actions.github.com/resourcebuilder.go +++ b/controllers/actions.github.com/resourcebuilder.go @@ -716,6 +716,10 @@ func (b *ResourceBuilder) newEphemeralRunnerPod(runner *v1alpha1.EphemeralRunner Name: EnvVarRunnerExtraUserAgent, Value: fmt.Sprintf("actions-runner-controller/%s", build.Version), }, + corev1.EnvVar{ + Name: EnvVarRunnerDeprecatedExitCode, + Value: "1", + }, ) c.Env = append(c.Env, envs...) }