You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Sep 15, 2023. It is now read-only.
Copyright2019TheKubernetes Authors.
LicensedundertheApacheLicense, Version2.0 (the"License");
youmaynotusethisfileexceptincompliancewiththe License.
YoumayobtainacopyoftheLicenseathttp://www.apache.org/licenses/LICENSE-2.0Unlessrequiredbyapplicablelaworagreedtoinwriting, softwaredistributedundertheLicenseisdistributedonan"AS IS"BASIS,
WITHOUTWARRANTIESORCONDITIONSOFANYKIND, eitherexpressor implied.
SeetheLicenseforthespecificlanguagegoverningpermissionsandlimitationsundertheLicense.
*/packagecontrollersimport (
"context""fmt""strings""github.com/go-logr/logr""github.com/pkg/errors"apierrors"k8s.io/apimachinery/pkg/api/errors"metav1"k8s.io/apimachinery/pkg/apis/meta/v1""k8s.io/client-go/tools/record""sigs.k8s.io/cluster-api/util/patch"ctrl"sigs.k8s.io/controller-runtime""sigs.k8s.io/controller-runtime/pkg/client""sigs.k8s.io/controller-runtime/pkg/handler""sigs.k8s.io/controller-runtime/pkg/reconcile""sigs.k8s.io/controller-runtime/pkg/source"operatorv1"k8s.io/kubeadm/operator/api/v1alpha1"operatorerrors"k8s.io/kubeadm/operator/errors"
)
// RuntimeTaskGroupReconciler reconciles a RuntimeTaskGroup objecttypeRuntimeTaskGroupReconcilerstruct {
client.Clientrecorderrecord.EventRecorderLoglogr.Logger
}
// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch// +kubebuilder:rbac:groups=operator.kubeadm.x-k8s.io,resources=runtimetaskgroups,verbs=get;list;watch;create;update;patch;delete// +kubebuilder:rbac:groups=operator.kubeadm.x-k8s.io,resources=runtimetaskgroups/status,verbs=get;update;patch// SetupWithManager configures the controller for calling the reconcilerfunc (r*RuntimeTaskGroupReconciler) SetupWithManager(mgrctrl.Manager) error {
varmapFunc handler.ToRequestsFunc=func(o handler.MapObject) []reconcile.Request {
returnoperationToTaskGroupRequests(r.Client, o)
}
err:=ctrl.NewControllerManagedBy(mgr).
For(&operatorv1.RuntimeTaskGroup{}).
Owns(&operatorv1.RuntimeTask{}). // force reconcile TaskGroup every time one of the owned TaskGroups changeWatches( // force reconcile TaskGroup every time the parent operation changes&source.Kind{Type: &operatorv1.Operation{}},
&handler.EnqueueRequestsFromMapFunc{ToRequests: mapFunc},
).
Complete(r)
r.recorder=mgr.GetEventRecorderFor("runtime-taskgroup-controller")
returnerr
}
// Reconcile a runtimetaskgroupfunc (r*RuntimeTaskGroupReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, rerrerror) {
ctx:=context.Background()
log:=r.Log.WithValues("task-group", req.NamespacedName)
// Fetch the TaskGroup instancetaskgroup:=&operatorv1.RuntimeTaskGroup{}
iferr:=r.Client.Get(ctx, req.NamespacedName, taskgroup); err!=nil {
ifapierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}
// Ignore the TaskGroup if it is already completed or failediftaskgroup.Status.CompletionTime!=nil {
return ctrl.Result{}, nil
}
// Fetch the Operation instanceoperation, err:=getOwnerOperation(ctx, r.Client, taskgroup.ObjectMeta)
iferr!=nil {
return ctrl.Result{}, err
}
// Initialize the patch helperpatchHelper, err:=patch.NewHelper(taskgroup, r)
iferr!=nil {
return ctrl.Result{}, err
}
// Always attempt to Patch the TaskGroup object and status after each reconciliation.deferfunc() {
iferr:=patchHelper.Patch(ctx, taskgroup); err!=nil {
log.Error(err, "failed to patch TaskGroup")
ifrerr==nil {
rerr=err
}
}
}()
// Reconcile the TaskGroupiferr:=r.reconcileTaskGroup(operation, taskgroup, log); err!=nil {
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
func (r*RuntimeTaskGroupReconciler) reconcileTaskGroup(operation*operatorv1.Operation, taskgroup*operatorv1.RuntimeTaskGroup, log logr.Logger) (errerror) {
// gets relevant settings from top level objectsexecutionMode:=operation.Spec.GetTypedOperationExecutionMode()
operationPaused:=operation.Status.Paused// Reconcile paused override from top level objectsr.reconcilePauseOverride(operationPaused, taskgroup)
// Handle deleted TaskGroupif!taskgroup.DeletionTimestamp.IsZero() {
err=r.reconcileDelete(taskgroup)
iferr!=nil {
returnerr
}
}
// Handle non-deleted TaskGroup// gets controlled tasks items (desired vs actual)tasks, err:=r.reconcileTasks(executionMode, taskgroup, log)
iferr!=nil {
returnerr
}
err=r.reconcileNormal(executionMode, taskgroup, tasks, log)
iferr!=nil {
returnerr
}
// Always reconcile Phase at the endr.reconcilePhase(taskgroup)
returnnil
}
func (r*RuntimeTaskGroupReconciler) reconcilePauseOverride(operationPausedbool, taskgroup*operatorv1.RuntimeTaskGroup) {
// record paused override state change, if anytaskgrouppaused:=operationPausedrecordPausedChange(r.recorder, taskgroup, taskgroup.Status.Paused, taskgrouppaused, "by top level objects")
// update status with paused override setting from top level objectstaskgroup.Status.Paused=taskgrouppaused
}
func (r*RuntimeTaskGroupReconciler) reconcileTasks(executionMode operatorv1.OperationExecutionMode, taskgroup*operatorv1.RuntimeTaskGroup, log logr.Logger) (*taskReconcileList, error) {
// gets all the Node object matching the taskgroup.Spec.NodeSelector// those are the Node where the task taskgroup.Spec.Template should be replicated (desired tasks)nodes, err:=listNodesBySelector(r.Client, &taskgroup.Spec.NodeSelector)
iferr!=nil {
returnnil, errors.Wrap(err, "failed to list nodes")
}
desired:=filterNodes(nodes, taskgroup.Spec.GetTypedTaskGroupNodeFilter())
// gets all the Task objects matching the taskgroup.Spec.Selector.// those are the current Task objects controlled by this deploymentcurrent, err:=listTasksBySelector(r.Client, &taskgroup.Spec.Selector)
iferr!=nil {
returnnil, errors.Wrap(err, "failed to list tasks")
}
log.Info("reconciling", "Nodes", len(desired), "Tasks", len(current.Items))
// match current and desired state, so the controller can determine what is necessary to do nexttasks:=reconcileTasks(desired, current)
// update replica counterstaskgroup.Status.Nodes=int32(len(tasks.all))
taskgroup.Status.RunningNodes=int32(len(tasks.running))
taskgroup.Status.SucceededNodes=int32(len(tasks.completed))
taskgroup.Status.FailedNodes=int32(len(tasks.failed))
taskgroup.Status.InvalidNodes=int32(len(tasks.invalid))
returntasks, nil
}
func (r*RuntimeTaskGroupReconciler) reconcileNormal(executionMode operatorv1.OperationExecutionMode, taskgroup*operatorv1.RuntimeTaskGroup, tasks*taskReconcileList, log logr.Logger) error {
// If the TaskGroup doesn't have finalizer, add it.//if !util.Contains(taskgroup.Finalizers, operatorv1alpha1.TaskGroupFinalizer) {// taskgroup.Finalizers = append(taskgroup.Finalizers, operatorv1alpha1.TaskGroupFinalizer)//}// If there are Tasks not yet completed (pending or running), cleanup error messages (required e.g. after recovery)// NB. It is necessary to give priority to running vs errors so the operation controller keeps alive/restarts// the DaemonsSet for processing tasksiftasks.activeTasks() >0 {
taskgroup.Status.ResetError()
} else {
// if there are invalid combinations (e.g. a Node with more than one Task, or a Task without a Node),// set the error and stop creating new Tasksiflen(tasks.invalid) >0 {
taskgroup.Status.SetError(
operatorerrors.NewRuntimeTaskGroupReconciliationError("something invalid"),
)
returnnil
}
// if there are failed tasks// set the error and stop creating new Tasksiflen(tasks.failed) >0 {
taskgroup.Status.SetError(
operatorerrors.NewRuntimeTaskGroupReplicaError("something failed"),
)
returnnil
}
}
// TODO: manage adopt tasks/tasks to be orphaned// if nil, set the TaskGroup start timeiftaskgroup.Status.StartTime==nil {
taskgroup.Status.SetStartTime()
//TODO: add a signature so we can detect if someone/something changes the taskgroup while it is processedreturnnil
}
// if the completed Task have reached the number of expected Task, the TaskGroup is completed// NB. we are doing this before checking pause because if everything is completed, does not make sense to pauseiflen(tasks.completed) ==len(tasks.all) {
// NB. we are setting this condition explicitly in order to avoid that the taskGroup accidentally// restarts to create taskstaskgroup.Status.SetCompletionTime()
returnnil
}
// if the TaskGroup is paused, returniftaskgroup.Status.Paused {
returnnil
}
// otherwise, proceed creating tasks// if there are still Tasks to be creatediflen(tasks.tobeCreated) >0 {
//TODO: manage different deployment strategy e.g. parallel// if there no existing Tasks not yet completed (pending or running)iftasks.activeTasks() ==0 {
// create a Task for the next node in the ordered sequencenextNode:=tasks.tobeCreated[0].node.Namelog.WithValues("node-name", nextNode).Info("creating task")
err:=r.createTasksReplica(executionMode, taskgroup, nextNode)
iferr!=nil {
ifstrings.Contains(err.Error(), "already exists") {
log.WithValues("node-name", nextNode).Info("task already exists")
returnnil
}
returnerrors.Wrap(err, "Failed to create Task replica")
}
}
}
returnnil
}
func (r*RuntimeTaskGroupReconciler) createTasksReplica(executionMode operatorv1.OperationExecutionMode, taskgroup*operatorv1.RuntimeTaskGroup, nodeNamestring) error {
r.Log.Info("Creating task replica", "node", nodeName)
gv:=operatorv1.GroupVersionpaused:=falseifexecutionMode==operatorv1.OperationExecutionModeControlled {
paused=true
}
// todo: use a template instead of a new object; currently the template labels are not set successfullyr.Log.Info("template logs", "taskgroup", taskgroup, "labels", taskgroup.Spec.Template.GetObjectMeta().GetLabels())
labels:=taskgroup.Spec.Template.Labelsiflen(labels) ==0 {
labels=taskgroup.Labels
}
task:=&operatorv1.RuntimeTask{
TypeMeta: metav1.TypeMeta{
Kind: "RuntimeTask",
APIVersion: gv.String(),
},
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", taskgroup.Name, nodeName), //TODO: GeneratedName?Namespace: taskgroup.Namespace,
// we should use the same labels as the taskgroup templateLabels: labels,
Annotations: taskgroup.Spec.Template.Annotations,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(taskgroup, taskgroup.GroupVersionKind())},
},
Spec: operatorv1.RuntimeTaskSpec{
NodeName: nodeName,
Commands: taskgroup.Spec.Template.Spec.Commands,
},
Status: operatorv1.RuntimeTaskStatus{
Phase: string(operatorv1.RuntimeTaskPhasePending),
Paused: paused,
},
}
returnr.Client.Create(context.Background(), task)
}
func (r*RuntimeTaskGroupReconciler) reconcileDelete(taskgroup*operatorv1.RuntimeTaskGroup) error {
// TaskGroup is deleted so remove the finalizer.//taskgroup.Finalizers = util.Filter(taskgroup.Finalizers, operatorv1alpha1.TaskGroupFinalizer)returnnil
}
func (r*RuntimeTaskGroupReconciler) reconcilePhase(taskgroup*operatorv1.RuntimeTaskGroup) {
// Set the phase to "deleting" if the deletion timestamp is set.if!taskgroup.DeletionTimestamp.IsZero() {
taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhaseDeleted)
return
}
// Set the phase to "failed" if any of Status.ErrorReason or Status.ErrorMessage is not nil.iftaskgroup.Status.ErrorReason!=nil||taskgroup.Status.ErrorMessage!=nil {
taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhaseFailed)
return
}
// Set the phase to "succeeded" if completion date is set.iftaskgroup.Status.CompletionTime!=nil {
taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhaseSucceeded)
return
}
// Set the phase to "paused" if paused set.iftaskgroup.Status.Paused {
taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhasePaused)
return
}
// Set the phase to "running" if start date is set.iftaskgroup.Status.StartTime!=nil {
taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhaseRunning)
return
}
// Set the phase to "pending".taskgroup.Status.SetTypedPhase(operatorv1.RuntimeTaskGroupPhasePending)
}
ewfilemode100644ndex0000000..80d378e++b/controllers/runtimetaskgroup_controller_test.go
799c179567484f7c8a3089ff73130cebf1d65dda
The text was updated successfully, but these errors were encountered:
manage adopt tasks/tasks to be orphaned
kubeadm-operator/controllers/runtimetaskgroup_controller.go
Line 223 in dfef3ab
799c179567484f7c8a3089ff73130cebf1d65dda
The text was updated successfully, but these errors were encountered: