/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)
type SidecarControl interface {
//*****common*****//
// get sidecarset
GetSidecarset() *appsv1alpha1.SidecarSet
// when sidecarSet is not active, it will not perform injections and upgrades process.
// You can re-implement the function IsActiveSidecarSet to indicate that this sidecarSet is no longer working by adding some sidecarSet flags,
// for example: sidecarSet.Annotations[sidecarset.kruise.io/disabled] = "true"
IsActiveSidecarSet() bool
//*****inject portion*****//
// whether need inject the volumeMount into container
// when ShareVolumePolicy is enabled, the sidecar container will share the other container's VolumeMounts in the pod(don't contains the injected sidecar container).
// You can reimplement the function NeedToInjectVolumeMount to filter out some of the volumes that don't need to be shared
NeedToInjectVolumeMount(volumeMount v1.VolumeMount) bool
// when update pod, judge whether inject sidecar container into pod
// one can customize validation to allow sidecar addition after pod creation, and reimplement NeedToInjectInUpdatedPod to enable such injection in sidecarset
NeedToInjectInUpdatedPod(pod, oldPod *v1.Pod, sidecarContainer *appsv1alpha1.SidecarContainer, injectedEnvs []v1.EnvVar,
injectedMounts []v1.VolumeMount) (needInject bool, existSidecars []*appsv1alpha1.SidecarContainer, existVolumes []v1.Volume)
// IsPodAvailabilityChanged check whether pod changed on updating trigger re-inject sidecar container
// For update pod injection sidecar container scenario, this method can filter out many invalid update events, thus improving the overall webhook performance.
IsPodAvailabilityChanged(pod, oldPod *v1.Pod) bool
//*****upgrade portion*****//
// IsPodStateConsistent indicates whether pod.spec and pod.status are consistent after updating the sidecar containers
IsPodStateConsistent(pod *v1.Pod, sidecarContainers sets.String) bool
// IsPodReady indicates whether pod is fully ready
// 1. pod.Status.Phase == v1.PodRunning
// 2. pod.condition PodReady == true
// 3. whether empty sidecar container is HotUpgradeEmptyImage
IsPodReady(pod *v1.Pod) bool
// upgrade pod sidecar container to sidecarSet latest version
// if container==nil means no change, no need to update, otherwise need to update
UpgradeSidecarContainer(sidecarContainer *appsv1alpha1.SidecarContainer, pod *v1.Pod) *v1.Container
// When upgrading the pod sidecar container, you need to record some in-place upgrade information in pod annotations,
// which is needed by the sidecarset controller to determine whether the upgrade is completed.
UpdatePodAnnotationsInUpgrade(changedContainers []string, pod *v1.Pod)
// Is sidecarset can upgrade pods,
// In Kubernetes native scenarios, only Container Image upgrades are allowed
// When modifying other fields of the container, e.g. volumemounts, the sidecarSet will not depart to upgrade the sidecar container logic in-place,
// and needs to be done by rebuilding the pod
// consistent indicates pod.spec and pod.status is consistent,
// when pod.spec.image is v2 and pod.status.image is v1, then it is inconsistent.
IsSidecarSetUpgradable(pod *v1.Pod) (canUpgrade, consistent bool)
}
func New(cs *appsv1alpha1.SidecarSet) SidecarControl {
return &commonControl{SidecarSet: cs}
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
"crypto/sha256"
"encoding/json"
"fmt"
"k8s.io/apimachinery/pkg/util/rand"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
)
// SidecarSetHash returns a hash of the SidecarSet.
// The Containers are taken into account.
func SidecarSetHash(sidecarSet *appsv1alpha1.SidecarSet) (string, error) {
encoded, err := encodeSidecarSet(sidecarSet)
if err != nil {
return "", err
}
h := rand.SafeEncodeString(hash(encoded))
return h, nil
}
// SidecarSetHashWithoutImage calculates sidecars's container hash without its image
// we use this to determine if the sidecar reconcile needs to update a pod image
func SidecarSetHashWithoutImage(sidecarSet *appsv1alpha1.SidecarSet) (string, error) {
ss := sidecarSet.DeepCopy()
for i := range ss.Spec.Containers {
ss.Spec.Containers[i].Image = ""
}
for i := range ss.Spec.InitContainers {
ss.Spec.InitContainers[i].Image = ""
}
encoded, err := encodeSidecarSet(ss)
if err != nil {
return "", err
}
return rand.SafeEncodeString(hash(encoded)), nil
}
func encodeSidecarSet(sidecarSet *appsv1alpha1.SidecarSet) (string, error) {
// json.Marshal sorts the keys in a stable order in the encoding
m := map[string]interface{}{"containers": sidecarSet.Spec.Containers}
// when k8s 1.28, if initContainer restartPolicy = Always, indicates it is sidecar container, so the hash needs to contain it.
initContainer := make([]appsv1alpha1.SidecarContainer, 0)
for i := range sidecarSet.Spec.InitContainers {
container := &sidecarSet.Spec.InitContainers[i]
if IsSidecarContainer(container.Container) {
initContainer = append(initContainer, *container)
}
}
if len(initContainer) > 0 {
m["initContainers"] = sidecarSet.Spec.InitContainers
}
data, err := json.Marshal(m)
if err != nil {
return "", err
}
return string(data), nil
}
// hash hashes `data` with sha256 and returns the hex string
func hash(data string) string {
return fmt.Sprintf("%x", sha256.Sum256([]byte(data)))
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
"bytes"
"context"
"encoding/json"
"fmt"
apps "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/controller/history"
"sigs.k8s.io/controller-runtime/pkg/client"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
webhookutil "github.com/openkruise/kruise/pkg/webhook/util"
)
var (
patchCodec = scheme.Codecs.LegacyCodec(appsv1alpha1.SchemeGroupVersion)
)
type HistoryControl interface {
CreateControllerRevision(parent metav1.Object, revision *apps.ControllerRevision, collisionCount *int32) (*apps.ControllerRevision, error)
NewRevision(s *appsv1alpha1.SidecarSet, namespace string, revision int64, collisionCount *int32) (*apps.ControllerRevision, error)
NextRevision(revisions []*apps.ControllerRevision) int64
GetRevisionSelector(s *appsv1alpha1.SidecarSet) labels.Selector
GetHistorySidecarSet(sidecarSet *appsv1alpha1.SidecarSet, revisionInfo *appsv1alpha1.SidecarSetInjectRevision) (*appsv1alpha1.SidecarSet, error)
}
type realControl struct {
Client client.Client
}
func NewHistoryControl(client client.Client) HistoryControl {
return &realControl{
Client: client,
}
}
func (r *realControl) NewRevision(s *appsv1alpha1.SidecarSet, namespace string, revision int64, collisionCount *int32) (
*apps.ControllerRevision, error,
) {
patch, err := r.getPatch(s)
if err != nil {
return nil, err
}
cr, err := history.NewControllerRevision(s,
s.GetObjectKind().GroupVersionKind(),
s.Labels,
runtime.RawExtension{Raw: patch},
revision,
collisionCount)
if err != nil {
return nil, err
}
cr.SetNamespace(namespace)
if cr.Labels == nil {
cr.Labels = make(map[string]string)
}
if cr.ObjectMeta.Annotations == nil {
cr.ObjectMeta.Annotations = make(map[string]string)
}
if s.Annotations[SidecarSetHashAnnotation] != "" {
cr.Annotations[SidecarSetHashAnnotation] = s.Annotations[SidecarSetHashAnnotation]
}
if s.Annotations[SidecarSetHashWithoutImageAnnotation] != "" {
cr.Annotations[SidecarSetHashWithoutImageAnnotation] = s.Annotations[SidecarSetHashWithoutImageAnnotation]
}
if s.Labels[appsv1alpha1.SidecarSetCustomVersionLabel] != "" {
cr.Labels[appsv1alpha1.SidecarSetCustomVersionLabel] = s.Labels[appsv1alpha1.SidecarSetCustomVersionLabel]
}
cr.Labels[SidecarSetKindName] = s.Name
for key, value := range s.Annotations {
cr.ObjectMeta.Annotations[key] = value
}
return cr, nil
}
// getPatch returns a strategic merge patch that can be applied to restore a SidecarSet to a
// previous version. If the returned error is nil the patch is valid. The current state that we save is just the
// PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously
// recorded patches.
func (r *realControl) getPatch(s *appsv1alpha1.SidecarSet) ([]byte, error) {
str, err := runtime.Encode(patchCodec, s)
if err != nil {
return nil, err
}
var raw map[string]interface{}
_ = json.Unmarshal(str, &raw)
objCopy := make(map[string]interface{})
specCopy := make(map[string]interface{})
// only copy some specified fields of s.Spec to specCopy
spec := raw["spec"].(map[string]interface{})
copySidecarSetSpecRevision(specCopy, spec)
objCopy["spec"] = specCopy
return json.Marshal(objCopy)
}
// NextRevision finds the next valid revision number based on revisions. If the length of revisions
// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method
// assumes that revisions has been sorted by Revision.
func (r *realControl) NextRevision(revisions []*apps.ControllerRevision) int64 {
count := len(revisions)
if count <= 0 {
return 1
}
return revisions[count-1].Revision + 1
}
func (r *realControl) GetRevisionSelector(s *appsv1alpha1.SidecarSet) labels.Selector {
labelSelector := &metav1.LabelSelector{
MatchLabels: map[string]string{
SidecarSetKindName: s.GetName(),
},
}
selector, err := util.ValidatedLabelSelectorAsSelector(labelSelector)
if err != nil {
// static error, just panic
panic("Incorrect label selector for ControllerRevision of SidecarSet.")
}
return selector
}
func (r *realControl) CreateControllerRevision(parent metav1.Object, revision *apps.ControllerRevision, collisionCount *int32) (*apps.ControllerRevision, error) {
if collisionCount == nil {
return nil, fmt.Errorf("collisionCount should not be nil")
}
// Clone the input
clone := revision.DeepCopy()
// Continue to attempt to create the revision updating the name with a new hash on each iteration
for {
hash := history.HashControllerRevision(revision, collisionCount)
// Update the revisions name
clone.Name = history.ControllerRevisionName(parent.GetName(), hash)
err := r.Client.Create(context.TODO(), clone)
if errors.IsAlreadyExists(err) {
exists := &apps.ControllerRevision{}
key := types.NamespacedName{
Namespace: clone.Namespace,
Name: clone.Name,
}
err = r.Client.Get(context.TODO(), key, exists)
if err != nil {
return nil, err
}
if bytes.Equal(exists.Data.Raw, clone.Data.Raw) {
return exists, nil
}
*collisionCount++
continue
}
return clone, err
}
}
func (r *realControl) GetHistorySidecarSet(sidecarSet *appsv1alpha1.SidecarSet, revisionInfo *appsv1alpha1.SidecarSetInjectRevision) (*appsv1alpha1.SidecarSet, error) {
revision, err := r.getControllerRevision(sidecarSet, revisionInfo)
if err != nil || revision == nil {
return nil, err
}
clone := sidecarSet.DeepCopy()
cloneBytes, err := runtime.Encode(patchCodec, clone)
if err != nil {
klog.ErrorS(err, "Failed to encode sidecarSet", "sidecarSet", klog.KRef("", sidecarSet.Name))
return nil, err
}
patched, err := strategicpatch.StrategicMergePatch(cloneBytes, revision.Data.Raw, clone)
if err != nil {
klog.ErrorS(err, "Failed to merge sidecarSet and controllerRevision", "sidecarSet", klog.KRef("", sidecarSet.Name), "controllerRevision", klog.KRef("", revision.Name))
return nil, err
}
// restore history from patch
restoredSidecarSet := &appsv1alpha1.SidecarSet{}
if err := json.Unmarshal(patched, restoredSidecarSet); err != nil {
return nil, err
}
// restore hash annotation and revision info
if err := restoreRevisionInfo(restoredSidecarSet, revision); err != nil {
return nil, err
}
return restoredSidecarSet, nil
}
func (r *realControl) getControllerRevision(set *appsv1alpha1.SidecarSet, revisionInfo *appsv1alpha1.SidecarSetInjectRevision) (*apps.ControllerRevision, error) {
if revisionInfo == nil {
return nil, nil
}
switch {
case revisionInfo.RevisionName != nil:
revision := &apps.ControllerRevision{}
revisionKey := types.NamespacedName{
Namespace: webhookutil.GetNamespace(),
Name: *revisionInfo.RevisionName,
}
if err := r.Client.Get(context.TODO(), revisionKey, revision); err != nil {
klog.ErrorS(err, "Failed to get controllerRevision for sidecarSet", "controllerRevision", klog.KRef("", *revisionInfo.RevisionName), "sidecarSet", klog.KRef("", set.Name))
return nil, err
}
return revision, nil
case revisionInfo.CustomVersion != nil:
listOpts := []client.ListOption{
client.InNamespace(webhookutil.GetNamespace()),
&client.ListOptions{LabelSelector: r.GetRevisionSelector(set)},
client.MatchingLabels{appsv1alpha1.SidecarSetCustomVersionLabel: *revisionInfo.CustomVersion},
}
revisionList := &apps.ControllerRevisionList{}
if err := r.Client.List(context.TODO(), revisionList, listOpts...); err != nil {
klog.ErrorS(err, "Failed to get controllerRevision for sidecarSet", "controllerRevision", klog.KRef("", *revisionInfo.CustomVersion),
"sidecarSet", klog.KRef("", set.Name), "customVersion", *revisionInfo.CustomVersion)
return nil, err
}
var revisions []*apps.ControllerRevision
for i := range revisionList.Items {
revisions = append(revisions, &revisionList.Items[i])
}
if len(revisions) == 0 {
return nil, generateNotFoundError(set)
}
history.SortControllerRevisions(revisions)
return revisions[len(revisions)-1], nil
}
klog.ErrorS(fmt.Errorf("Failed to get controllerRevision due to both empty revisionName and customVersion"), "Failed to get controllerRevision")
return nil, nil
}
func copySidecarSetSpecRevision(dst, src map[string]interface{}) {
// we will use patch instead of update operation to update pods in the future
dst["$patch"] = "replace"
// only record these revisions
dst["volumes"] = src["volumes"]
dst["containers"] = src["containers"]
dst["initContainers"] = src["initContainers"]
dst["imagePullSecrets"] = src["imagePullSecrets"]
dst["patchPodMetadata"] = src["patchPodMetadata"]
}
func restoreRevisionInfo(sidecarSet *appsv1alpha1.SidecarSet, revision *apps.ControllerRevision) error {
if sidecarSet.Annotations == nil {
sidecarSet.Annotations = map[string]string{}
}
if revision.Annotations[SidecarSetHashAnnotation] != "" {
sidecarSet.Annotations[SidecarSetHashAnnotation] = revision.Annotations[SidecarSetHashAnnotation]
} else {
hashCodeWithImage, err := SidecarSetHash(sidecarSet)
if err != nil {
return err
}
sidecarSet.Annotations[SidecarSetHashAnnotation] = hashCodeWithImage
}
if revision.Annotations[SidecarSetHashWithoutImageAnnotation] != "" {
sidecarSet.Annotations[SidecarSetHashWithoutImageAnnotation] = revision.Annotations[SidecarSetHashWithoutImageAnnotation]
} else {
hashCodeWithoutImage, err := SidecarSetHashWithoutImage(sidecarSet)
if err != nil {
return err
}
sidecarSet.Annotations[SidecarSetHashWithoutImageAnnotation] = hashCodeWithoutImage
}
sidecarSet.Status.LatestRevision = revision.Name
return nil
}
func MockSidecarSetForRevision(set *appsv1alpha1.SidecarSet) metav1.Object {
return &metav1.ObjectMeta{
UID: set.UID,
Name: set.Name,
Namespace: webhookutil.GetNamespace(),
}
}
func generateNotFoundError(set *appsv1alpha1.SidecarSet) error {
return errors.NewNotFound(schema.GroupResource{
Group: apps.GroupName,
Resource: "ControllerRevision",
}, set.Name)
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
var (
RevisionAdapterImpl = &revisionAdapterImpl{}
)
type revisionAdapterImpl struct{}
func (r *revisionAdapterImpl) EqualToRevisionHash(sidecarSetName string, obj metav1.Object, hash string) bool {
return GetPodSidecarSetRevision(sidecarSetName, obj) == hash
}
func (r *revisionAdapterImpl) WriteRevisionHash(obj metav1.Object, hash string) {
// No need to implement yet.
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
"encoding/json"
"github.com/openkruise/kruise/apis/apps/pub"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)
type commonControl struct {
*appsv1alpha1.SidecarSet
}
func (c *commonControl) GetSidecarset() *appsv1alpha1.SidecarSet {
return c.SidecarSet
}
func (c *commonControl) IsActiveSidecarSet() bool {
return true
}
func (c *commonControl) UpgradeSidecarContainer(sidecarContainer *appsv1alpha1.SidecarContainer, pod *v1.Pod) *v1.Container {
var nameToUpgrade, otherContainer, oldImage string
if IsHotUpgradeContainer(sidecarContainer) {
nameToUpgrade, otherContainer = findContainerToHotUpgrade(sidecarContainer, pod, c)
oldImage = util.GetContainer(otherContainer, pod).Image
} else {
nameToUpgrade = sidecarContainer.Name
oldImage = util.GetContainer(nameToUpgrade, pod).Image
}
// community in-place upgrades are only allowed to update image
if sidecarContainer.Image == oldImage {
return nil
}
container := util.GetContainer(nameToUpgrade, pod)
container.Image = sidecarContainer.Image
klog.V(3).InfoS("Upgraded pod container image", "pod", klog.KObj(pod), "containerName", nameToUpgrade,
"oldImage", oldImage, "newImage", container.Image)
return container
}
func (c *commonControl) NeedToInjectVolumeMount(volumeMount v1.VolumeMount) bool {
return true
}
func (c *commonControl) NeedToInjectInUpdatedPod(pod, oldPod *v1.Pod, sidecarContainer *appsv1alpha1.SidecarContainer,
injectedEnvs []v1.EnvVar, injectedMounts []v1.VolumeMount) (needInject bool, existSidecars []*appsv1alpha1.SidecarContainer, existVolumes []v1.Volume) {
return false, nil, nil
}
func (c *commonControl) IsPodReady(pod *v1.Pod) bool {
sidecarSet := c.GetSidecarset()
// check whether hot upgrade is complete
// map[string]string: {empty container name}->{sidecarSet.spec.containers[x].upgradeStrategy.HotUpgradeEmptyImage}
emptyContainers := map[string]string{}
for _, sidecarContainer := range sidecarSet.Spec.Containers {
if IsHotUpgradeContainer(&sidecarContainer) {
_, emptyContainer := GetPodHotUpgradeContainers(sidecarContainer.Name, pod)
emptyContainers[emptyContainer] = sidecarContainer.UpgradeStrategy.HotUpgradeEmptyImage
}
}
for _, container := range pod.Spec.Containers {
// If container is empty container, then its image must be empty image
if emptyImage := emptyContainers[container.Name]; emptyImage != "" && container.Image != emptyImage {
klog.V(5).InfoS("Pod sidecar empty container image wasn't empty image", "pod", klog.KObj(pod),
"containerName", container.Name, "containerImage", container.Image, "emptyImage", emptyImage)
return false
}
}
// 1. pod.Status.Phase == v1.PodRunning
// 2. pod.condition PodReady == true
return util.IsRunningAndReady(pod)
}
func (c *commonControl) UpdatePodAnnotationsInUpgrade(changedContainers []string, pod *v1.Pod) {
sidecarSet := c.GetSidecarset()
// record the ImageID, before update pod sidecar container
// if it is changed, indicates the update is complete.
// format: sidecarset.name -> appsv1alpha1.InPlaceUpdateState
sidecarUpdateStates := make(map[string]*pub.InPlaceUpdateState)
if stateStr := pod.Annotations[SidecarsetInplaceUpdateStateKey]; len(stateStr) > 0 {
if err := json.Unmarshal([]byte(stateStr), &sidecarUpdateStates); err != nil {
klog.ErrorS(err, "Failed to parse pod annotations value", "pod", klog.KObj(pod),
"annotation", SidecarsetInplaceUpdateStateKey, "value", stateStr)
}
}
inPlaceUpdateState, ok := sidecarUpdateStates[sidecarSet.Name]
if !ok {
inPlaceUpdateState = &pub.InPlaceUpdateState{
Revision: GetSidecarSetRevision(sidecarSet),
UpdateTimestamp: metav1.Now(),
}
}
// format: container.name -> pod.status.containers[container.name].ImageID
if inPlaceUpdateState.LastContainerStatuses == nil {
inPlaceUpdateState.LastContainerStatuses = make(map[string]pub.InPlaceUpdateContainerStatus)
}
cStatus := make(map[string]string, len(pod.Status.ContainerStatuses))
for i := range pod.Status.ContainerStatuses {
c := &pod.Status.ContainerStatuses[i]
cStatus[c.Name] = c.ImageID
}
for _, cName := range changedContainers {
updateStatus := pub.InPlaceUpdateContainerStatus{
ImageID: cStatus[cName],
}
// record status.ImageId before update pods in store
inPlaceUpdateState.LastContainerStatuses[cName] = updateStatus
}
// record sidecar container status information in pod's annotations
sidecarUpdateStates[sidecarSet.Name] = inPlaceUpdateState
by, _ := json.Marshal(sidecarUpdateStates)
pod.Annotations[SidecarsetInplaceUpdateStateKey] = string(by)
}
// only check sidecar container is consistent
func (c *commonControl) IsPodStateConsistent(pod *v1.Pod, sidecarContainers sets.String) bool {
if len(pod.Spec.Containers) != len(pod.Status.ContainerStatuses) {
return false
}
sidecarset := c.GetSidecarset()
if sidecarContainers.Len() == 0 {
sidecarContainers = GetSidecarContainersInPod(sidecarset)
}
allDigestImage := true
cImageIDs := util.GetPodContainerImageIDs(pod)
for _, container := range pod.Spec.Containers {
// only check whether sidecar container is consistent
if !sidecarContainers.Has(container.Name) {
continue
}
// whether image is digest format,
// for example: docker.io/busybox@sha256:a9286defaba7b3a519d585ba0e37d0b2cbee74ebfe590960b0b1d6a5e97d1e1d
if !util.IsImageDigest(container.Image) {
allDigestImage = false
break
}
imageID, ok := cImageIDs[container.Name]
if !ok {
return false
}
if !util.IsContainerImageEqual(container.Image, imageID) {
return false
}
}
// If all spec.container[x].image is digest format, only check digest imageId
if allDigestImage {
return true
}
// check container InplaceUpdate status
return IsSidecarContainerUpdateCompleted(pod, sets.NewString(sidecarset.Name), sidecarContainers)
}
func (c *commonControl) IsSidecarSetUpgradable(pod *v1.Pod) (canUpgrade, consistent bool) {
sidecarSet := c.GetSidecarset()
// k8s only allow modify pod.spec.container[x].image,
// only when annotations[SidecarSetHashWithoutImageAnnotation] is the same, sidecarSet can upgrade pods
if GetPodSidecarSetWithoutImageRevision(sidecarSet.Name, pod) != GetSidecarSetWithoutImageRevision(sidecarSet) {
return false, false
}
// cStatus: container.name -> containerStatus.Ready
cStatus := map[string]bool{}
for _, status := range pod.Status.ContainerStatuses {
cStatus[status.Name] = status.Ready
}
sidecarContainerList := GetSidecarContainersInPod(sidecarSet)
for _, sidecar := range sidecarContainerList.List() {
// when containerStatus.Ready == true and container non-consistent,
// indicates that sidecar container is in the process of being upgraded
// wait for the last upgrade to complete before performing this upgrade
if cStatus[sidecar] && !c.IsPodStateConsistent(pod, sets.NewString(sidecar)) {
return true, false
}
}
return true, true
}
func (c *commonControl) IsPodAvailabilityChanged(pod, oldPod *v1.Pod) bool {
return false
}
// isContainerInplaceUpdateCompleted checks whether imageID in container status has been changed since in-place update.
// If the imageID in containerStatuses has not been changed, we assume that kubelet has not updated containers in Pod.
func IsSidecarContainerUpdateCompleted(pod *v1.Pod, sidecarSets, containers sets.String) bool {
// format: sidecarset.name -> appsv1alpha1.InPlaceUpdateState
sidecarUpdateStates := make(map[string]*pub.InPlaceUpdateState)
// when the pod annotation not found, indicates the pod only injected sidecar container, and never inplace update
// then always think it update complete
if stateStr, ok := pod.Annotations[SidecarsetInplaceUpdateStateKey]; !ok {
return true
// this won't happen in practice, unless someone manually edit pod annotations
} else if err := json.Unmarshal([]byte(stateStr), &sidecarUpdateStates); err != nil {
klog.V(5).InfoS("Failed to parse pod annotations value", "pod", klog.KObj(pod),
"annotation", SidecarsetInplaceUpdateStateKey, "value", stateStr, "error", err)
return false
}
// The container imageId recorded before the in-place sidecar upgrade
// when the container imageId not found, indicates the pod only injected the sidecar container,
// and never in-place update sidecar, then always think it update complete
lastContainerStatus := make(map[string]pub.InPlaceUpdateContainerStatus)
for _, sidecarSetName := range sidecarSets.List() {
if inPlaceUpdateState, ok := sidecarUpdateStates[sidecarSetName]; ok {
for name, status := range inPlaceUpdateState.LastContainerStatuses {
lastContainerStatus[name] = status
}
}
}
containerImages := make(map[string]string, len(pod.Spec.Containers))
for i := range pod.Spec.Containers {
c := &pod.Spec.Containers[i]
containerImages[c.Name] = c.Image
}
for _, cs := range pod.Status.ContainerStatuses {
// only check containers set
if !containers.Has(cs.Name) {
continue
}
if oldStatus, ok := lastContainerStatus[cs.Name]; ok {
// we assume that users should not update workload template with new image
// which actually has the same imageID as the old image
if oldStatus.ImageID == cs.ImageID && containerImages[cs.Name] != cs.Image {
klog.V(5).InfoS("Pod container status imageID not changed, then inconsistent",
"pod", klog.KObj(pod), "containerStatusName", cs.Name)
return false
}
}
// If sidecar container status.ImageID changed, or this oldStatus ImageID don't exist
// indicate the sidecar container update is complete
}
return true
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
"context"
"encoding/json"
"fmt"
"reflect"
"regexp"
"strings"
jsonpatch "github.com/evanphx/json-patch"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/fieldpath"
"sigs.k8s.io/controller-runtime/pkg/client"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
"github.com/openkruise/kruise/pkg/util/configuration"
"github.com/openkruise/kruise/pkg/util/expectations"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
)
const (
SidecarSetKindName = "kruise.io/sidecarset-name"
// SidecarSetHashAnnotation represents the key of a sidecarSet hash
SidecarSetHashAnnotation = "kruise.io/sidecarset-hash"
// SidecarSetHashWithoutImageAnnotation represents the key of a sidecarset hash without images of sidecar
SidecarSetHashWithoutImageAnnotation = "kruise.io/sidecarset-hash-without-image"
// SidecarSetListAnnotation represent sidecarset list that injected pods
SidecarSetListAnnotation = "kruise.io/sidecarset-injected-list"
// SidecarEnvKey specifies the environment variable which record a container as injected
SidecarEnvKey = "IS_INJECTED"
// SidecarsetInplaceUpdateStateKey records the state of inplace-update.
// The value of annotation is SidecarsetInplaceUpdateStateKey.
SidecarsetInplaceUpdateStateKey string = "kruise.io/sidecarset-inplace-update-state"
// SidecarSetUpgradable is a pod condition to indicate whether the pod's sidecarset is upgradable
SidecarSetUpgradable corev1.PodConditionType = "SidecarSetUpgradable"
)
var (
// SidecarIgnoredNamespaces specifies the namespaces where Pods won't get injected
// SidecarIgnoredNamespaces = []string{"kube-system", "kube-public"}
// SubPathExprEnvReg format: $(ODD_NAME)、$(POD_NAME)...
SubPathExprEnvReg, _ = regexp.Compile(`\$\(([-._a-zA-Z][-._a-zA-Z0-9]*)\)`)
UpdateExpectations = expectations.NewUpdateExpectations(RevisionAdapterImpl)
)
type SidecarSetUpgradeSpec struct {
UpdateTimestamp metav1.Time `json:"updateTimestamp"`
SidecarSetHash string `json:"hash"`
SidecarSetName string `json:"sidecarSetName"`
SidecarList []string `json:"sidecarList"` // sidecarSet container list
SidecarSetControllerRevision string `json:"controllerRevision,omitempty"` // sidecarSet controllerRevision name
}
// PodMatchSidecarSet determines if pod match Selector of sidecar.
func PodMatchedSidecarSet(c client.Client, pod *corev1.Pod, sidecarSet *appsv1alpha1.SidecarSet) (bool, error) {
podNamespace := pod.Namespace
if podNamespace == "" {
podNamespace = "default"
}
//If Namespace is not empty, sidecarSet will only match the pods in the namespaces
if sidecarSet.Spec.Namespace != "" && sidecarSet.Spec.Namespace != podNamespace {
return false, nil
}
if sidecarSet.Spec.NamespaceSelector != nil &&
!IsSelectorNamespace(c, podNamespace, sidecarSet.Spec.NamespaceSelector) {
return false, nil
}
// if selector not matched, then continue
selector, err := util.ValidatedLabelSelectorAsSelector(sidecarSet.Spec.Selector)
if err != nil {
return false, err
}
if !selector.Empty() && selector.Matches(labels.Set(pod.Labels)) {
return true, nil
}
return false, nil
}
func IsSelectorNamespace(c client.Client, ns string, nsSelector *metav1.LabelSelector) bool {
selector, err := util.ValidatedLabelSelectorAsSelector(nsSelector)
if err != nil {
return false
}
nsObj := &corev1.Namespace{}
err = c.Get(context.TODO(), client.ObjectKey{Name: ns}, nsObj)
if err != nil {
return false
}
return selector.Matches(labels.Set(nsObj.Labels))
}
// FetchSidecarSetMatchedNamespace fetch sidecarSet matched namespaces
func FetchSidecarSetMatchedNamespace(c client.Client, sidecarSet *appsv1alpha1.SidecarSet) (sets.String, error) {
ns := sets.NewString()
//If Namespace is not empty, sidecarSet will only match the pods in the namespaces
if sidecarSet.Spec.Namespace != "" {
return ns.Insert(sidecarSet.Spec.Namespace), nil
}
// get more faster selector
selector, err := util.ValidatedLabelSelectorAsSelector(sidecarSet.Spec.NamespaceSelector)
if err != nil {
return nil, err
}
nsList := &corev1.NamespaceList{}
if err = c.List(context.TODO(), nsList, &client.ListOptions{LabelSelector: selector}, utilclient.DisableDeepCopy); err != nil {
return nil, err
}
for _, obj := range nsList.Items {
ns.Insert(obj.Name)
}
return ns, nil
}
// IsActivePod determines the pod whether need be injected and updated
func IsActivePod(pod *corev1.Pod) bool {
/*for _, namespace := range SidecarIgnoredNamespaces {
if pod.Namespace == namespace {
return false
}
}*/
return kubecontroller.IsPodActive(pod)
}
func GetSidecarSetRevision(sidecarSet *appsv1alpha1.SidecarSet) string {
return sidecarSet.Annotations[SidecarSetHashAnnotation]
}
func GetSidecarSetWithoutImageRevision(sidecarSet *appsv1alpha1.SidecarSet) string {
return sidecarSet.Annotations[SidecarSetHashWithoutImageAnnotation]
}
func GetPodSidecarSetRevision(sidecarSetName string, pod metav1.Object) string {
upgradeSpec := GetPodSidecarSetUpgradeSpecInAnnotations(sidecarSetName, SidecarSetHashAnnotation, pod)
return upgradeSpec.SidecarSetHash
}
func GetPodSidecarSetControllerRevision(sidecarSetName string, pod metav1.Object) string {
upgradeSpec := GetPodSidecarSetUpgradeSpecInAnnotations(sidecarSetName, SidecarSetHashAnnotation, pod)
return upgradeSpec.SidecarSetControllerRevision
}
func GetPodSidecarSetUpgradeSpecInAnnotations(sidecarSetName, annotationKey string, pod metav1.Object) SidecarSetUpgradeSpec {
annotations := pod.GetAnnotations()
hashKey := annotationKey
if annotations[hashKey] == "" {
return SidecarSetUpgradeSpec{}
}
sidecarSetHash := make(map[string]SidecarSetUpgradeSpec)
if err := json.Unmarshal([]byte(annotations[hashKey]), &sidecarSetHash); err != nil {
klog.ErrorS(err, "Failed to parse pod annotations value", "pod", klog.KObj(pod),
"annotations", hashKey, "value", annotations[hashKey])
// to be compatible with older sidecarSet hash struct, map[string]string
olderSidecarSetHash := make(map[string]string)
if err = json.Unmarshal([]byte(annotations[hashKey]), &olderSidecarSetHash); err != nil {
return SidecarSetUpgradeSpec{}
}
for k, v := range olderSidecarSetHash {
sidecarSetHash[k] = SidecarSetUpgradeSpec{
SidecarSetHash: v,
}
}
}
return sidecarSetHash[sidecarSetName]
}
func GetPodSidecarSetWithoutImageRevision(sidecarSetName string, pod metav1.Object) string {
upgradeSpec := GetPodSidecarSetUpgradeSpecInAnnotations(sidecarSetName, SidecarSetHashWithoutImageAnnotation, pod)
return upgradeSpec.SidecarSetHash
}
// whether this pod has been updated based on the latest sidecarSet
func IsPodSidecarUpdated(sidecarSet *appsv1alpha1.SidecarSet, pod *corev1.Pod) bool {
return GetSidecarSetRevision(sidecarSet) == GetPodSidecarSetRevision(sidecarSet.Name, pod)
}
// UpdatePodSidecarSetHash when sidecarSet in-place update sidecar container, Update sidecarSet hash in Pod annotations[kruise.io/sidecarset-hash]
func UpdatePodSidecarSetHash(pod *corev1.Pod, sidecarSet *appsv1alpha1.SidecarSet) {
hashKey := SidecarSetHashAnnotation
sidecarSetHash := make(map[string]SidecarSetUpgradeSpec)
if err := json.Unmarshal([]byte(pod.Annotations[hashKey]), &sidecarSetHash); err != nil {
klog.ErrorS(err, "Failed to unmarshal pod annotations", "pod", klog.KObj(pod), "annotations", hashKey)
// to be compatible with older sidecarSet hash struct, map[string]string
olderSidecarSetHash := make(map[string]string)
if err = json.Unmarshal([]byte(pod.Annotations[hashKey]), &olderSidecarSetHash); err == nil {
for k, v := range olderSidecarSetHash {
sidecarSetHash[k] = SidecarSetUpgradeSpec{
SidecarSetHash: v,
UpdateTimestamp: metav1.Now(),
SidecarSetName: sidecarSet.Name,
}
}
}
withoutImageHash := make(map[string]SidecarSetUpgradeSpec)
if err = json.Unmarshal([]byte(pod.Annotations[SidecarSetHashWithoutImageAnnotation]), &olderSidecarSetHash); err == nil {
for k, v := range olderSidecarSetHash {
withoutImageHash[k] = SidecarSetUpgradeSpec{
SidecarSetHash: v,
UpdateTimestamp: metav1.Now(),
SidecarSetName: sidecarSet.Name,
}
}
newWithoutImageHash, _ := json.Marshal(withoutImageHash)
pod.Annotations[SidecarSetHashWithoutImageAnnotation] = string(newWithoutImageHash)
}
// compatible done
}
sidecarList := listSidecarNameInSidecarSet(sidecarSet)
sidecarSetHash[sidecarSet.Name] = SidecarSetUpgradeSpec{
UpdateTimestamp: metav1.Now(),
SidecarSetHash: GetSidecarSetRevision(sidecarSet),
SidecarSetName: sidecarSet.Name,
SidecarList: sidecarList.List(),
SidecarSetControllerRevision: sidecarSet.Status.LatestRevision,
}
newHash, _ := json.Marshal(sidecarSetHash)
pod.Annotations[hashKey] = string(newHash)
}
func GetSidecarContainersInPod(sidecarSet *appsv1alpha1.SidecarSet) sets.String {
names := sets.NewString()
for _, sidecarContainer := range sidecarSet.Spec.Containers {
if IsHotUpgradeContainer(&sidecarContainer) {
name1, name2 := GetHotUpgradeContainerName(sidecarContainer.Name)
names.Insert(name2)
names.Insert(name1)
} else {
names.Insert(sidecarContainer.Name)
}
}
return names
}
func GetPodsSortFunc(pods []*corev1.Pod, waitUpdateIndexes []int) func(i, j int) bool {
// not-ready < ready, unscheduled < scheduled, and pending < running
return func(i, j int) bool {
return kubecontroller.ActivePods(pods).Less(waitUpdateIndexes[i], waitUpdateIndexes[j])
}
}
func IsPodInjectedSidecarSet(pod *corev1.Pod, sidecarSet *appsv1alpha1.SidecarSet) bool {
sidecarSetNameStr, ok := pod.Annotations[SidecarSetListAnnotation]
if !ok || len(sidecarSetNameStr) == 0 {
return false
}
sidecarSetNames := sets.NewString(strings.Split(sidecarSetNameStr, ",")...)
return sidecarSetNames.Has(sidecarSet.Name)
}
func IsPodConsistentWithSidecarSet(pod *corev1.Pod, sidecarSet *appsv1alpha1.SidecarSet) bool {
for i := range sidecarSet.Spec.Containers {
container := &sidecarSet.Spec.Containers[i]
switch container.UpgradeStrategy.UpgradeType {
case appsv1alpha1.SidecarContainerHotUpgrade:
_, exist := GetPodHotUpgradeInfoInAnnotations(pod)[container.Name]
if !exist || util.GetContainer(fmt.Sprintf("%v-1", container.Name), pod) == nil ||
util.GetContainer(fmt.Sprintf("%v-2", container.Name), pod) == nil {
return false
}
default:
if util.GetContainer(container.Name, pod) == nil {
return false
}
}
}
return true
}
func IsInjectedSidecarContainerInPod(container *corev1.Container) bool {
return util.GetContainerEnvValue(container, SidecarEnvKey) == "true"
}
func IsSharePodVolumeMounts(container *appsv1alpha1.SidecarContainer) bool {
return container.ShareVolumePolicy.Type == appsv1alpha1.ShareVolumePolicyEnabled
}
// TODO:
// If you share volume, the volume path of the business container may conflict with the volume path of the sidecar container,
// resulting in a failed pod creation.
// For example, if the user's main container volumeDevice has devicePath /var/log and the sidecar container has volumeMounts path /var/log,
// the path will conflict and the creation will fail.
func GetInjectedVolumeMountsAndEnvs(control SidecarControl, sidecarContainer *appsv1alpha1.SidecarContainer, pod *corev1.Pod) ([]corev1.VolumeMount, []corev1.EnvVar) {
if !IsSharePodVolumeMounts(sidecarContainer) {
return nil, nil
}
// injected volumeMounts
var injectedMounts []corev1.VolumeMount
// injected EnvVar
var injectedEnvs []corev1.EnvVar
for _, appContainer := range pod.Spec.Containers {
// ignore the injected sidecar container
if IsInjectedSidecarContainerInPod(&appContainer) {
continue
}
for _, volumeMount := range appContainer.VolumeMounts {
if !control.NeedToInjectVolumeMount(volumeMount) {
continue
}
injectedMounts = append(injectedMounts, volumeMount)
//If volumeMounts.SubPathExpr contains expansions, copy environment
//for example: SubPathExpr=foo/$(ODD_NAME)/$(POD_NAME), we need copy environment ODD_NAME、POD_NAME
//envs = [$(ODD_NAME) $(POD_NAME)]
envs := SubPathExprEnvReg.FindAllString(volumeMount.SubPathExpr, -1)
for _, env := range envs {
// $(ODD_NAME) -> ODD_NAME
envName := env[2 : len(env)-1]
// get envVar in container
eVar := util.GetContainerEnvVar(&appContainer, envName)
if eVar == nil {
klog.InfoS("Pod container got nil env", "pod", klog.KObj(pod), "containerName", appContainer.Name, "env", envName)
continue
}
injectedEnvs = append(injectedEnvs, *eVar)
}
}
}
// TODO: share pod.spec.initContainers[*].volumeMounts
return injectedMounts, injectedEnvs
}
func IsSharePodVolumeDevices(container *appsv1alpha1.SidecarContainer) bool {
if container.ShareVolumeDevicePolicy == nil {
return false
}
return container.ShareVolumeDevicePolicy.Type == appsv1alpha1.ShareVolumePolicyEnabled
}
func GetInjectedVolumeDevices(sidecarContainer *appsv1alpha1.SidecarContainer, pod *corev1.Pod) []corev1.VolumeDevice {
if !IsSharePodVolumeDevices(sidecarContainer) {
return nil
}
// injected volumeDevices
var volumeDevices []corev1.VolumeDevice
for _, appContainer := range pod.Spec.Containers {
// ignore the injected sidecar container
if IsInjectedSidecarContainerInPod(&appContainer) {
continue
}
for _, volumeDevice := range appContainer.VolumeDevices {
volumeDevices = append(volumeDevices, volumeDevice)
}
}
// TODO: share pod.spec.initContainers[*].volumeDevices
return volumeDevices
}
func GetSidecarTransferEnvs(sidecarContainer *appsv1alpha1.SidecarContainer, pod *corev1.Pod) (injectedEnvs []corev1.EnvVar) {
// pre-process envs in pod, format: container.name/env.name -> container.env
// if SourceContainerName is set, use it as source container name
// if SourceContainerNameFrom.FieldRef, use the fieldref value as source container name
envsInPod := make(map[string]corev1.EnvVar)
for _, container := range pod.Spec.Containers {
for _, env := range container.Env {
key := fmt.Sprintf("%v/%v", container.Name, env.Name)
envsInPod[key] = env
}
}
for _, tEnv := range sidecarContainer.TransferEnv {
envs := sets.NewString()
if tEnv.EnvName != "" {
envs.Insert(tEnv.EnvName)
}
for _, e := range tEnv.EnvNames {
envs.Insert(e)
}
sourceContainerName := tEnv.SourceContainerName
if tEnv.SourceContainerNameFrom != nil && tEnv.SourceContainerNameFrom.FieldRef != nil {
containerName, err := ExtractContainerNameFromFieldPath(tEnv.SourceContainerNameFrom.FieldRef, pod)
if err != nil {
klog.ErrorS(err, "Failed to get containerName from pod annotations or labels",
"pod", klog.KObj(pod), "annotationsOrLabels", tEnv.SourceContainerNameFrom.FieldRef)
continue
}
sourceContainerName = containerName
}
for _, envName := range envs.List() {
key := fmt.Sprintf("%v/%v", sourceContainerName, envName)
env, ok := envsInPod[key]
if !ok {
// if sourceContainerName is empty or not found in pod.spec.containers
klog.InfoS("There was no env in container", "envName", tEnv.EnvName, "containerName", tEnv.SourceContainerName)
continue
}
injectedEnvs = append(injectedEnvs, env)
}
}
return
}
func ExtractContainerNameFromFieldPath(fs *corev1.ObjectFieldSelector, pod *corev1.Pod) (string, error) {
fieldPath := fs.FieldPath
accessor, err := meta.Accessor(pod)
if err != nil {
return "", err
}
path, subscript, ok := fieldpath.SplitMaybeSubscriptedPath(fieldPath)
if ok {
switch path {
case "metadata.annotations":
if errs := validation.IsQualifiedName(strings.ToLower(subscript)); len(errs) != 0 {
return "", fmt.Errorf("invalid key subscript in %s: %s", fieldPath, strings.Join(errs, ";"))
}
return accessor.GetAnnotations()[subscript], nil
case "metadata.labels":
if errs := validation.IsQualifiedName(subscript); len(errs) != 0 {
return "", fmt.Errorf("invalid key subscript in %s: %s", fieldPath, strings.Join(errs, ";"))
}
return accessor.GetLabels()[subscript], nil
default:
return "", fmt.Errorf("fieldPath %q does not support subscript", fieldPath)
}
}
return "", fmt.Errorf("unsupported fieldPath: %v", fieldPath)
}
// code lifted from https://github.com/kubernetes/kubernetes/blob/master/pkg/apis/core/pods/helpers.go
// ConvertDownwardAPIFieldLabel converts the specified downward API field label
// and its value in the pod of the specified version to the internal version,
// and returns the converted label and value. This function returns an error if
// the conversion fails.
func ConvertDownwardAPIFieldLabel(version, label, value string) (string, string, error) {
if version != "v1" {
return "", "", fmt.Errorf("unsupported pod version: %s", version)
}
path, _, ok := fieldpath.SplitMaybeSubscriptedPath(label)
if ok {
switch path {
case "metadata.annotations", "metadata.labels":
return label, value, nil
default:
return "", "", fmt.Errorf("field path not supported: %s", path)
}
}
return "", "", fmt.Errorf("field label not supported: %s", label)
}
// PatchPodMetadata patch pod annotations and labels
func PatchPodMetadata(originMetadata *metav1.ObjectMeta, patches []appsv1alpha1.SidecarSetPatchPodMetadata) (skip bool, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("%v", r)
}
}()
if originMetadata.Annotations == nil {
originMetadata.Annotations = map[string]string{}
}
oldData := originMetadata.DeepCopy()
for _, patch := range patches {
switch patch.PatchPolicy {
case appsv1alpha1.SidecarSetRetainPatchPolicy, "":
retainPatchPodMetadata(originMetadata, patch)
case appsv1alpha1.SidecarSetOverwritePatchPolicy:
overwritePatchPodMetadata(originMetadata, patch)
case appsv1alpha1.SidecarSetMergePatchJsonPatchPolicy:
if err = mergePatchJsonPodMetadata(originMetadata, patch); err != nil {
return
}
}
}
if reflect.DeepEqual(oldData.Annotations, originMetadata.Annotations) {
skip = true
}
return
}
func retainPatchPodMetadata(originMetadata *metav1.ObjectMeta, patchPodField appsv1alpha1.SidecarSetPatchPodMetadata) {
for k, v := range patchPodField.Annotations {
if _, ok := originMetadata.Annotations[k]; !ok {
originMetadata.Annotations[k] = v
}
}
}
func overwritePatchPodMetadata(originMetadata *metav1.ObjectMeta, patchPodField appsv1alpha1.SidecarSetPatchPodMetadata) {
for k, v := range patchPodField.Annotations {
originMetadata.Annotations[k] = v
}
}
func mergePatchJsonPodMetadata(originMetadata *metav1.ObjectMeta, patchPodField appsv1alpha1.SidecarSetPatchPodMetadata) error {
for key, patchJSON := range patchPodField.Annotations {
if origin, ok := originMetadata.Annotations[key]; ok && origin != "" {
modified, err := jsonpatch.MergePatch([]byte(origin), []byte(patchJSON))
if err != nil {
return err
}
originMetadata.Annotations[key] = string(modified)
} else {
originMetadata.Annotations[key] = patchJSON
}
}
return nil
}
func ValidateSidecarSetPatchMetadataWhitelist(c client.Client, sidecarSet *appsv1alpha1.SidecarSet) error {
if len(sidecarSet.Spec.PatchPodMetadata) == 0 {
return nil
}
regAnnotations := make([]*regexp.Regexp, 0)
whitelist, err := configuration.GetSidecarSetPatchMetadataWhiteList(c)
if err != nil {
return err
} else if whitelist == nil {
if utilfeature.DefaultFeatureGate.Enabled(features.SidecarSetPatchPodMetadataDefaultsAllowed) {
return nil
}
return fmt.Errorf("SidecarSet patch metadata whitelist not found")
}
for _, rule := range whitelist.Rules {
if rule.Selector != nil {
selector, err := util.ValidatedLabelSelectorAsSelector(rule.Selector)
if err != nil {
return err
}
if !selector.Matches(labels.Set(sidecarSet.Labels)) {
continue
}
}
for _, key := range rule.AllowedAnnotationKeyExprs {
reg, err := regexp.Compile(key)
if err != nil {
return err
}
regAnnotations = append(regAnnotations, reg)
}
}
if len(regAnnotations) == 0 {
if utilfeature.DefaultFeatureGate.Enabled(features.SidecarSetPatchPodMetadataDefaultsAllowed) {
return nil
}
return fmt.Errorf("sidecarSet patch metadata annotation is not allowed")
}
for _, patch := range sidecarSet.Spec.PatchPodMetadata {
for key := range patch.Annotations {
if !matchRegKey(key, regAnnotations) {
return fmt.Errorf("sidecarSet patch metadata annotation(%s) is not allowed", key)
}
}
}
return nil
}
func matchRegKey(key string, regs []*regexp.Regexp) bool {
for _, reg := range regs {
if reg.MatchString(key) {
return true
}
}
return false
}
// IsSidecarContainer check whether initContainer is sidecar container in k8s 1.28.
func IsSidecarContainer(container corev1.Container) bool {
if container.RestartPolicy != nil && *container.RestartPolicy == corev1.ContainerRestartPolicyAlways {
return true
}
return false
}
// listSidecarNameInSidecarSet list always init containers and sidecar containers
func listSidecarNameInSidecarSet(sidecarSet *appsv1alpha1.SidecarSet) sets.String {
sidecarList := sets.NewString()
for _, sidecar := range sidecarSet.Spec.InitContainers {
if IsSidecarContainer(sidecar.Container) {
sidecarList.Insert(sidecar.Name)
}
}
for _, sidecar := range sidecarSet.Spec.Containers {
sidecarList.Insert(sidecar.Name)
}
return sidecarList
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sidecarcontrol
import (
"encoding/json"
"fmt"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)
const (
// SidecarSetWorkingHotUpgradeContainer records which hot upgrade container is working currently
SidecarSetWorkingHotUpgradeContainer = "kruise.io/sidecarset-working-hotupgrade-container"
// hotUpgrade container name suffix
hotUpgradeNameSuffix1 = "-1"
hotUpgradeNameSuffix2 = "-2"
// SidecarSetVersionEnvKey is sidecar container version in container env(SIDECARSET_VERSION)
SidecarSetVersionEnvKey = "SIDECARSET_VERSION"
// SidecarSetVersionAltEnvKey is container version env in the other sidecar container of the same hotupgrade sidecar(SIDECARSET_VERSION_ALT)
SidecarSetVersionAltEnvKey = "SIDECARSET_VERSION_ALT"
)
// GetHotUpgradeContainerName returns format: mesh-1, mesh-2
func GetHotUpgradeContainerName(name string) (string, string) {
return name + hotUpgradeNameSuffix1, name + hotUpgradeNameSuffix2
}
// GetPodSidecarSetVersionAnnotation is only used in hot upgrade container
// cName format: mesh-1, mesh-2
func GetPodSidecarSetVersionAnnotation(cName string) string {
return fmt.Sprintf("version.sidecarset.kruise.io/%s", cName)
}
func GetPodSidecarSetVersionAltAnnotation(cName string) string {
return fmt.Sprintf("versionalt.sidecarset.kruise.io/%s", cName)
}
// IsHotUpgradeContainer indicates whether sidecar container update strategy is HotUpdate
func IsHotUpgradeContainer(sidecarContainer *appsv1alpha1.SidecarContainer) bool {
return sidecarContainer.UpgradeStrategy.UpgradeType == appsv1alpha1.SidecarContainerHotUpgrade
}
// GetPodHotUpgradeInfoInAnnotations checks which hot upgrade sidecar container is working now
// format: sidecarset.spec.container[x].name -> pod.spec.container[x].name
// for example: mesh -> mesh-1, envoy -> envoy-2
func GetPodHotUpgradeInfoInAnnotations(pod *corev1.Pod) map[string]string {
hotUpgradeWorkContainer := make(map[string]string)
currentStr, ok := pod.Annotations[SidecarSetWorkingHotUpgradeContainer]
if !ok {
klog.V(6).InfoS("Pod annotations was not found", "pod", klog.KObj(pod), "annotations", SidecarSetWorkingHotUpgradeContainer)
return hotUpgradeWorkContainer
}
if err := json.Unmarshal([]byte(currentStr), &hotUpgradeWorkContainer); err != nil {
klog.ErrorS(err, "Failed to parse pod annotations value failed", "pod", klog.KObj(pod),
"annotations", SidecarSetWorkingHotUpgradeContainer, "value", currentStr)
return hotUpgradeWorkContainer
}
return hotUpgradeWorkContainer
}
// GetPodHotUpgradeContainers return two hot upgrade sidecar containers
// workContainer: currently working sidecar container, record in pod annotations[kruise.io/sidecarset-working-hotupgrade-container]
// otherContainer:
// 1. empty container
// 2. when in hot upgrading process, the older sidecar container
func GetPodHotUpgradeContainers(sidecarName string, pod *corev1.Pod) (workContainer, otherContainer string) {
hotUpgradeWorkContainer := GetPodHotUpgradeInfoInAnnotations(pod)
name1, name2 := GetHotUpgradeContainerName(sidecarName)
if hotUpgradeWorkContainer[sidecarName] == name1 {
otherContainer = name2
workContainer = name1
} else {
otherContainer = name1
workContainer = name2
}
return
}
// para1: nameToUpgrade, para2: otherContainer
func findContainerToHotUpgrade(sidecarContainer *appsv1alpha1.SidecarContainer, pod *corev1.Pod, control SidecarControl) (string, string) {
containerInPods := make(map[string]corev1.Container)
for _, containerInPod := range pod.Spec.Containers {
containerInPods[containerInPod.Name] = containerInPod
}
name1, name2 := GetHotUpgradeContainerName(sidecarContainer.Name)
c1, c2 := containerInPods[name1], containerInPods[name2]
// First, empty hot sidecar container will be upgraded with the latest sidecarSet specification
if c1.Image == sidecarContainer.UpgradeStrategy.HotUpgradeEmptyImage {
return c1.Name, c2.Name
}
if c2.Image == sidecarContainer.UpgradeStrategy.HotUpgradeEmptyImage {
return c2.Name, c1.Name
}
// Second, Not ready sidecar container will be upgraded
c1Ready := podutil.GetExistingContainerStatus(pod.Status.ContainerStatuses, c1.Name).Ready && control.IsPodStateConsistent(pod, sets.NewString(c1.Name))
c2Ready := podutil.GetExistingContainerStatus(pod.Status.ContainerStatuses, c2.Name).Ready && control.IsPodStateConsistent(pod, sets.NewString(c2.Name))
klog.V(3).InfoS("Pod container ready", "pod", klog.KObj(pod), "container1Name", c1.Name, "container1Ready",
c1Ready, "container2Name", c2.Name, "container2Ready", c2Ready)
if c1Ready && !c2Ready {
return c2.Name, c1.Name
}
if !c1Ready && c2Ready {
return c1.Name, c2.Name
}
// Third, the older sidecar container will be upgraded
workContainer, olderContainer := GetPodHotUpgradeContainers(sidecarContainer.Name, pod)
return olderContainer, workContainer
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resourcedistribution
import (
"context"
"flag"
"fmt"
"reflect"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
utildiscovery "github.com/openkruise/kruise/pkg/util/discovery"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/ratelimiter"
utils "github.com/openkruise/kruise/pkg/webhook/resourcedistribution/validating"
)
func init() {
flag.IntVar(&concurrentReconciles, "resourcedistribution-workers", concurrentReconciles, "Max concurrent workers for ResourceDistribution controller.")
}
var (
concurrentReconciles = 3
controllerKind = appsv1alpha1.SchemeGroupVersion.WithKind("ResourceDistribution")
)
// Add creates a new ResourceDistribution Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
if !utildiscovery.DiscoverGVK(controllerKind) || !utilfeature.DefaultFeatureGate.Enabled(features.ResourceDistributionGate) {
return nil
}
return add(mgr, newReconciler(mgr))
}
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
cli := utilclient.NewClientFromManager(mgr, "resourcedistribution-controller")
return &ReconcileResourceDistribution{
Client: cli,
scheme: mgr.GetScheme(),
}
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Create a new controller
c, err := controller.New("resourcedistribution-controller", mgr, controller.Options{
Reconciler: r, MaxConcurrentReconciles: concurrentReconciles, CacheSyncTimeout: util.GetControllerCacheSyncTimeout(),
RateLimiter: ratelimiter.DefaultControllerRateLimiter()})
if err != nil {
return err
}
// Watch for changes to ResourceDistribution
err = c.Watch(source.Kind(mgr.GetCache(), &appsv1alpha1.ResourceDistribution{},
&handler.TypedEnqueueRequestForObject[*appsv1alpha1.ResourceDistribution]{},
predicate.TypedFuncs[*appsv1alpha1.ResourceDistribution]{
UpdateFunc: func(e event.TypedUpdateEvent[*appsv1alpha1.ResourceDistribution]) bool {
oldObj := e.ObjectOld
newObj := e.ObjectNew
if !reflect.DeepEqual(oldObj.Spec, newObj.Spec) {
klog.V(3).InfoS("Observed updated Spec for ResourceDistribution", "resourceDistribution", klog.KObj(oldObj))
return true
}
return false
},
}))
if err != nil {
return err
}
// Watch for changes to all namespaces
err = c.Watch(source.Kind(mgr.GetCache(), &corev1.Namespace{}, &enqueueRequestForNamespace{reader: mgr.GetCache()}))
if err != nil {
return err
}
// Watch for changes to Secrets
secret := unstructured.Unstructured{}
secret.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret"))
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&secret), handler.EnqueueRequestForOwner(
mgr.GetScheme(), mgr.GetRESTMapper(), &appsv1alpha1.ResourceDistribution{}, handler.OnlyControllerOwner()),
predicate.Funcs{
CreateFunc: func(createEvent event.CreateEvent) bool {
return false
},
GenericFunc: func(genericEvent event.GenericEvent) bool {
return false
},
}))
if err != nil {
return err
}
// Watch for changes to ConfigMap
configMap := unstructured.Unstructured{}
configMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap"))
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&configMap), handler.EnqueueRequestForOwner(
mgr.GetScheme(), mgr.GetRESTMapper(), &appsv1alpha1.ResourceDistribution{}, handler.OnlyControllerOwner()),
predicate.Funcs{
CreateFunc: func(createEvent event.CreateEvent) bool {
return false
},
GenericFunc: func(genericEvent event.GenericEvent) bool {
return false
},
}))
if err != nil {
return err
}
return nil
}
var _ reconcile.Reconciler = &ReconcileResourceDistribution{}
// ReconcileResourceDistribution reconciles a ResourceDistribution object
type ReconcileResourceDistribution struct {
client.Client
scheme *runtime.Scheme
}
//+kubebuilder:rbac:groups=apps.kruise.io,resources=resourcedistributions,verbs=get;list;watch;
//+kubebuilder:rbac:groups=apps.kruise.io,resources=resourcedistributions/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=apps.kruise.io,resources=resourcedistributions/finalizers,verbs=update
//+kubebuilder:rbac:groups="core",resources=namespaces,verbs=get;list;watch;
//+kubebuilder:rbac:groups="core",resources=configmaps,verbs=get;list;watch;create;update;delete
//+kubebuilder:rbac:groups="core",resources=secrets,verbs=get;list;watch;create;update;delete
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the ResourceDistribution object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.6.4/pkg/reconcile
func (r *ReconcileResourceDistribution) Reconcile(_ context.Context, req ctrl.Request) (ctrl.Result, error) {
klog.V(3).InfoS("ResourceDistribution begin to reconcile", "resourceDistribution", req)
// fetch resourcedistribution instance as distributor
distributor := &appsv1alpha1.ResourceDistribution{}
if err := r.Client.Get(context.TODO(), req.NamespacedName, distributor); err != nil {
if errors.IsNotFound(err) {
// Object not found, return. Created objects are automatically garbage collected.
// For additional cleanup logic use finalizers.
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}
return r.doReconcile(distributor)
}
// doReconcile distribute and clean resource
func (r *ReconcileResourceDistribution) doReconcile(distributor *appsv1alpha1.ResourceDistribution) (ctrl.Result, error) {
resource, errs := utils.DeserializeResource(&distributor.Spec.Resource, field.NewPath("resource"))
if len(errs) != 0 || resource == nil {
klog.ErrorS(errs.ToAggregate(), "DeserializeResource error", "resourceDistribution", klog.KObj(distributor))
return reconcile.Result{}, nil // no need to retry
}
matchedNamespaces, unmatchedNamespaces, err := listNamespacesForDistributor(r.Client, &distributor.Spec.Targets)
if err != nil {
klog.ErrorS(err, "Failed to list namespace for ResourceDistributor", "resourceDistribution", klog.KObj(distributor))
return reconcile.Result{}, err
}
// 1. distribute resource to matched namespaces
succeeded, distributeErrList := r.distributeResource(distributor, matchedNamespaces, resource)
// 2. clean its owned resources in unmatched namespaces
_, cleanErrList := r.cleanResource(distributor, unmatchedNamespaces, resource)
// 3. process all errors about resource distribution and cleanup
conditions, errList := r.handleErrors(distributeErrList, cleanErrList)
// 4. update distributor status
newStatus := calculateNewStatus(distributor, conditions, int32(len(matchedNamespaces)), succeeded)
if err := r.updateDistributorStatus(distributor, newStatus); err != nil {
errList = append(errList, field.InternalError(field.NewPath("updateStatus"), err))
}
return ctrl.Result{}, errList.ToAggregate()
}
func (r *ReconcileResourceDistribution) distributeResource(distributor *appsv1alpha1.ResourceDistribution,
matchedNamespaces []string, resource runtime.Object) (int32, []*UnexpectedError) {
resourceName := utils.ConvertToUnstructured(resource).GetName()
resourceKind := resource.GetObjectKind().GroupVersionKind().Kind
resourceHashCode := hashResource(distributor.Spec.Resource)
return syncItSlowly(matchedNamespaces, 1, func(namespace string) *UnexpectedError {
ns := &corev1.Namespace{}
getNSErr := r.Client.Get(context.TODO(), types.NamespacedName{Name: namespace}, ns)
if errors.IsNotFound(getNSErr) || (getNSErr == nil && ns.DeletionTimestamp != nil) {
return &UnexpectedError{
err: fmt.Errorf("namespace not found or is terminating"),
namespace: namespace,
conditionID: NotExistConditionID,
}
}
// 1. try to fetch existing old resource
oldResource := &unstructured.Unstructured{}
oldResource.SetGroupVersionKind(resource.GetObjectKind().GroupVersionKind())
getErr := r.Client.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: resourceName}, oldResource)
if getErr != nil && !errors.IsNotFound(getErr) {
klog.ErrorS(getErr, "Error occurred when getting resource in namespace", "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: getErr,
namespace: namespace,
conditionID: GetConditionID,
}
}
// 2. if resource doesn't exist, create resource;
if getErr != nil && errors.IsNotFound(getErr) {
newResource := makeResourceObject(distributor, namespace, resource, resourceHashCode, nil)
if createErr := r.Client.Create(context.TODO(), newResource.(client.Object)); createErr != nil {
klog.ErrorS(createErr, "Error occurred when creating resource in namespace", "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: createErr,
namespace: namespace,
conditionID: CreateConditionID,
}
}
klog.V(3).InfoS("ResourceDistribution created resource in namespace", "resourceDistribution", klog.KObj(distributor), "resourceKind", resourceKind, "resourceName", resourceName, "namespace", namespace)
return nil
}
// 3. check conflict
if !isControlledByDistributor(oldResource, distributor) {
klog.InfoS("Conflict with existing resource in namespace", "resourceKind", resourceKind, "resourceName", resourceName, "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: fmt.Errorf("conflict with existing resources because of the same namespace, group, version, kind and name"),
namespace: namespace,
conditionID: ConflictConditionID,
}
}
// 4. check whether resource need to update
if needToUpdate(oldResource, utils.ConvertToUnstructured(resource)) {
newResource := makeResourceObject(distributor, namespace, resource, resourceHashCode, oldResource)
if updateErr := r.Client.Update(context.TODO(), newResource.(client.Object)); updateErr != nil {
klog.ErrorS(updateErr, "Error occurred when updating resource in namespace", "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: updateErr,
namespace: namespace,
conditionID: UpdateConditionID,
}
}
klog.V(3).InfoS("ResourceDistribution updated for namespaces", "resourceDistribution", klog.KObj(distributor), "resourceKind", resourceKind, "resourceName", resourceName, "namespace", namespace)
}
return nil
})
}
func (r *ReconcileResourceDistribution) cleanResource(distributor *appsv1alpha1.ResourceDistribution,
unmatchedNamespaces []string, resource runtime.Object) (int32, []*UnexpectedError) {
resourceName := utils.ConvertToUnstructured(resource).GetName()
resourceKind := resource.GetObjectKind().GroupVersionKind().Kind
return syncItSlowly(unmatchedNamespaces, 1, func(namespace string) *UnexpectedError {
ns := &corev1.Namespace{}
getNSErr := r.Client.Get(context.TODO(), types.NamespacedName{Name: namespace}, ns)
if errors.IsNotFound(getNSErr) || (getNSErr == nil && ns.DeletionTimestamp != nil) {
return nil
}
// 1. try to fetch existing old resource
oldResource := &unstructured.Unstructured{}
oldResource.SetGroupVersionKind(resource.GetObjectKind().GroupVersionKind())
if getErr := r.Client.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: resourceName}, oldResource); getErr != nil {
if errors.IsNotFound(getErr) {
return nil
}
klog.ErrorS(getErr, "Error occurred when getting resource in namespace", "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: getErr,
namespace: namespace,
conditionID: GetConditionID,
}
}
// 2. if the owner of the oldResource is not this distributor, just return
if !isControlledByDistributor(oldResource, distributor) {
return nil
}
// 3. else clean the resource
if deleteErr := r.Client.Delete(context.TODO(), oldResource); deleteErr != nil && !errors.IsNotFound(deleteErr) {
klog.ErrorS(deleteErr, "Error occurred when deleting resource in namespace from client", "namespace", namespace, "resourceDistribution", klog.KObj(distributor))
return &UnexpectedError{
err: deleteErr,
namespace: namespace,
conditionID: DeleteConditionID,
}
}
klog.V(3).InfoS("ResourceDistribution deleted in namespace", "resourceDistribution", klog.KObj(distributor), "resourceKind", resourceKind, "resourceName", resourceName, "namespace", namespace)
return nil
})
}
// handlerErrors process all errors about resource distribution and clean, and record them to conditions
func (r *ReconcileResourceDistribution) handleErrors(errLists ...[]*UnexpectedError) ([]appsv1alpha1.ResourceDistributionCondition, field.ErrorList) {
// init a status.conditions
conditions := make([]appsv1alpha1.ResourceDistributionCondition, NumberOfConditionTypes)
initConditionType(conditions)
// 1. build status.conditions
numberOfErr := 0
for i := range errLists {
numberOfErr += len(errLists[i])
for _, unexpected := range errLists[i] {
setCondition(&conditions[unexpected.conditionID], unexpected.err, unexpected.namespace)
}
}
// 2. build error list
errList := field.ErrorList{}
for i := range conditions {
if len(conditions[i].FailedNamespaces) == 0 {
continue
}
switch conditions[i].Type {
case appsv1alpha1.ResourceDistributionConflictOccurred, appsv1alpha1.ResourceDistributionNamespaceNotExists:
default:
errList = append(errList, field.InternalError(field.NewPath(string(conditions[i].Type)), fmt.Errorf(conditions[i].Reason)))
}
}
return conditions, errList
}
// updateDistributorStatus update distributor status after reconcile
func (r *ReconcileResourceDistribution) updateDistributorStatus(distributor *appsv1alpha1.ResourceDistribution, newStatus *appsv1alpha1.ResourceDistributionStatus) error {
if reflect.DeepEqual(distributor.Status, *newStatus) {
return nil
}
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
object := &appsv1alpha1.ResourceDistribution{}
if err := r.Client.Get(context.TODO(), types.NamespacedName{Name: distributor.Name}, object); err != nil {
return err
}
object.Status = *newStatus
return r.Client.Status().Update(context.TODO(), object)
})
}
// SetupWithManager sets up the controller with the Manager.
func (r *ReconcileResourceDistribution) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
// Uncomment the following line adding a pointer to an instance of the controlled resource as an argument
// For().
Complete(r)
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resourcedistribution
import (
"context"
"reflect"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
)
var _ handler.TypedEventHandler[*corev1.Namespace] = &enqueueRequestForNamespace{}
type matchFunc func(*corev1.Namespace, *appsv1alpha1.ResourceDistribution) (bool, error)
type enqueueRequestForNamespace struct {
reader client.Reader
}
func (p *enqueueRequestForNamespace) Create(ctx context.Context, evt event.TypedCreateEvent[*corev1.Namespace], q workqueue.RateLimitingInterface) {
p.addNamespace(q, evt.Object, matchViaTargets)
}
func (p *enqueueRequestForNamespace) Delete(ctx context.Context, evt event.TypedDeleteEvent[*corev1.Namespace], q workqueue.RateLimitingInterface) {
p.addNamespace(q, evt.Object, matchViaIncludedNamespaces)
}
func (p *enqueueRequestForNamespace) Generic(ctx context.Context, evt event.TypedGenericEvent[*corev1.Namespace], q workqueue.RateLimitingInterface) {
}
func (p *enqueueRequestForNamespace) Update(ctx context.Context, evt event.TypedUpdateEvent[*corev1.Namespace], q workqueue.RateLimitingInterface) {
p.updateNamespace(q, evt.ObjectOld, evt.ObjectNew)
}
// When a Namespace was created or deleted, figure out what ResourceDistribution it will work on it and enqueue them.
// obj must have *v1.Namespace type.
func (p *enqueueRequestForNamespace) addNamespace(q workqueue.RateLimitingInterface, obj runtime.Object, fn matchFunc) {
namespace, ok := obj.(*corev1.Namespace)
if !ok {
return
}
resourceDistributions, err := p.getNamespaceMatchedResourceDistributions(namespace, fn)
if err != nil {
klog.ErrorS(err, "Unable to get the ResourceDistributions related with namespace", "namespace", namespace.Name)
return
}
addMatchedResourceDistributionToWorkQueue(q, resourceDistributions)
}
// When labels of a Namespace were updated, figure out what ResourceDistribution it will work on it and enqueue them.
// objOld and objNew must have *v1.Namespace type.
func (p *enqueueRequestForNamespace) updateNamespace(q workqueue.RateLimitingInterface, objOld, objNew runtime.Object) {
namespaceOld, okOld := objOld.(*corev1.Namespace)
namespaceNew, okNew := objNew.(*corev1.Namespace)
if !okOld || !okNew || reflect.DeepEqual(namespaceNew.ObjectMeta.Labels, namespaceOld.ObjectMeta.Labels) {
return
}
p.addNamespace(q, objNew, matchViaLabelSelector)
p.addNamespace(q, objOld, matchViaLabelSelector)
}
// getNamespaceMatchedResourceDistributions returns all matched ResourceDistributions via labelSelector
func (p *enqueueRequestForNamespace) getNamespaceMatchedResourceDistributions(namespace *corev1.Namespace, match matchFunc) ([]*appsv1alpha1.ResourceDistribution, error) {
var matchedResourceDistributions []*appsv1alpha1.ResourceDistribution
ResourceDistributions := &appsv1alpha1.ResourceDistributionList{}
if err := p.reader.List(context.TODO(), ResourceDistributions); err != nil {
return nil, err
}
for i := range ResourceDistributions.Items {
resourceDistribution := &ResourceDistributions.Items[i]
matched, err := match(namespace, resourceDistribution)
if err != nil {
return nil, err
}
if matched {
matchedResourceDistributions = append(matchedResourceDistributions, resourceDistribution)
}
}
return matchedResourceDistributions, nil
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resourcedistribution
import (
"context"
"crypto/sha256"
"encoding/hex"
"reflect"
"sync"
"time"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
utils "github.com/openkruise/kruise/pkg/webhook/resourcedistribution/validating"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
"k8s.io/utils/integer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
const (
GetConditionID = 0
CreateConditionID = 1
UpdateConditionID = 2
DeleteConditionID = 3
ConflictConditionID = 4
NotExistConditionID = 5
NumberOfConditionTypes = 6
OperationSucceeded = "Succeeded"
)
// UnexpectedError is designed to store the information about .status.conditions when error occurs
type UnexpectedError struct {
err error
namespace string
conditionID int
}
// isInList return true if namespaceName is in namespaceList, else return false
func isInList(namespaceName string, namespaceList []appsv1alpha1.ResourceDistributionNamespace) bool {
for _, namespace := range namespaceList {
if namespaceName == namespace.Name {
return true
}
}
return false
}
// matchViaIncludedNamespaces return true if namespace is in targets.IncludedNamespaces
func matchViaIncludedNamespaces(namespace *corev1.Namespace, distributor *appsv1alpha1.ResourceDistribution) (bool, error) {
if isInList(namespace.Name, distributor.Spec.Targets.IncludedNamespaces.List) {
return true, nil
}
return false, nil
}
// matchViaLabelSelector return true if namespace matches with target.NamespacesLabelSelectors
func matchViaLabelSelector(namespace *corev1.Namespace, distributor *appsv1alpha1.ResourceDistribution) (bool, error) {
selector, err := util.ValidatedLabelSelectorAsSelector(&distributor.Spec.Targets.NamespaceLabelSelector)
if err != nil {
return false, err
}
if !selector.Empty() && selector.Matches(labels.Set(namespace.Labels)) {
return true, nil
}
return false, nil
}
// matchViaTargets check whether Namespace matches ResourceDistribution via spec.targets
func matchViaTargets(namespace *corev1.Namespace, distributor *appsv1alpha1.ResourceDistribution) (bool, error) {
targets := &distributor.Spec.Targets
if isInList(namespace.Name, targets.ExcludedNamespaces.List) {
return false, nil
}
if targets.AllNamespaces {
return true, nil
}
if isInList(namespace.Name, targets.IncludedNamespaces.List) {
return true, nil
}
return matchViaLabelSelector(namespace, distributor)
}
// addMatchedResourceDistributionToWorkQueue adds rds into q
func addMatchedResourceDistributionToWorkQueue(q workqueue.RateLimitingInterface, rds []*appsv1alpha1.ResourceDistribution) {
for _, rd := range rds {
q.Add(reconcile.Request{
NamespacedName: types.NamespacedName{
Name: rd.Name,
},
})
}
}
// hashResource hash resource yaml as version using SHA256
func hashResource(resourceYaml runtime.RawExtension) string {
md5Hash := sha256.Sum256(resourceYaml.Raw)
return hex.EncodeToString(md5Hash[:])
}
// setCondition set condition[].Type, .Reason, and .FailedNamespaces
func setCondition(condition *appsv1alpha1.ResourceDistributionCondition, err error, namespaces ...string) {
if condition == nil || err == nil {
return
}
condition.Reason = err.Error()
condition.FailedNamespaces = append(condition.FailedNamespaces, namespaces...)
}
// initConditionType set conditionTypes for .status.conditions
func initConditionType(conditions []appsv1alpha1.ResourceDistributionCondition) {
if len(conditions) < NumberOfConditionTypes {
return
}
conditions[GetConditionID].Type = appsv1alpha1.ResourceDistributionGetResourceFailed
conditions[CreateConditionID].Type = appsv1alpha1.ResourceDistributionCreateResourceFailed
conditions[UpdateConditionID].Type = appsv1alpha1.ResourceDistributionUpdateResourceFailed
conditions[DeleteConditionID].Type = appsv1alpha1.ResourceDistributionDeleteResourceFailed
conditions[ConflictConditionID].Type = appsv1alpha1.ResourceDistributionConflictOccurred
conditions[NotExistConditionID].Type = appsv1alpha1.ResourceDistributionNamespaceNotExists
}
// calculateNewStatus returns a complete new status to update distributor.status
func calculateNewStatus(distributor *appsv1alpha1.ResourceDistribution, newConditions []appsv1alpha1.ResourceDistributionCondition, desired, succeeded int32) *appsv1alpha1.ResourceDistributionStatus {
status := &appsv1alpha1.ResourceDistributionStatus{}
if distributor == nil || len(newConditions) < NumberOfConditionTypes {
return status
}
// set .Succeeded, .Failed, .ObservedGeneration
status.Desired = desired
status.Succeeded = succeeded
status.Failed = desired - succeeded
status.ObservedGeneration = distributor.Generation
// set .Conditions
oldConditions := distributor.Status.Conditions
for i := 0; i < NumberOfConditionTypes; i++ {
if len(newConditions[i].FailedNamespaces) == 0 {
// if no error occurred
newConditions[i].Reason = OperationSucceeded
newConditions[i].Status = appsv1alpha1.ResourceDistributionConditionFalse
} else {
newConditions[i].Status = appsv1alpha1.ResourceDistributionConditionTrue
}
if len(oldConditions) == 0 || oldConditions[i].Status != newConditions[i].Status {
// if .conditions.status changed
newConditions[i].LastTransitionTime = metav1.Time{Time: time.Now()}
} else {
newConditions[i].LastTransitionTime = oldConditions[i].LastTransitionTime
}
}
status.Conditions = newConditions
return status
}
// mergeMetadata will merge labels/annotations/finalizers
func mergeMetadata(newResource, oldResource *unstructured.Unstructured) {
if newResource.GetLabels() == nil {
newResource.SetLabels(make(map[string]string))
}
if newResource.GetAnnotations() == nil {
newResource.SetAnnotations(make(map[string]string))
}
for k, v := range oldResource.GetLabels() {
newLabels := newResource.GetLabels()
if _, ok := newLabels[k]; !ok {
newLabels[k] = v
}
newResource.SetLabels(newLabels)
}
for k, v := range oldResource.GetAnnotations() {
newAnnotations := newResource.GetAnnotations()
if _, ok := newAnnotations[k]; !ok {
newAnnotations[k] = v
}
newResource.SetAnnotations(newAnnotations)
}
newResource.SetFinalizers(sets.NewString(newResource.GetFinalizers()...).
Union(sets.NewString(oldResource.GetFinalizers()...)).List())
}
// makeResourceObject set some necessary information for resource before updating and creating
func makeResourceObject(distributor *appsv1alpha1.ResourceDistribution, namespace string, resource runtime.Object, hashCode string, oldResource *unstructured.Unstructured) runtime.Object {
// convert to unstructured
newResource := utils.ConvertToUnstructured(resource.DeepCopyObject())
if oldResource != nil {
mergeMetadata(newResource, oldResource)
}
// 1. set namespace
newResource.SetNamespace(namespace)
// 2. set ownerReference for cascading deletion
found := false
owners := newResource.GetOwnerReferences()
for i := range owners {
if owners[i].UID == distributor.UID {
found = true
break
}
}
if !found {
newResource.SetOwnerReferences(append(owners, *metav1.NewControllerRef(distributor, distributor.GroupVersionKind())))
}
// 3. set resource annotations
annotations := newResource.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
annotations[utils.ResourceHashCodeAnnotation] = hashCode
annotations[utils.SourceResourceDistributionOfResource] = distributor.Name
newResource.SetAnnotations(annotations)
return newResource
}
func syncItSlowly(namespaces []string, initialBatchSize int, fn func(namespace string) *UnexpectedError) (int32, []*UnexpectedError) {
successes := int32(0)
remaining := len(namespaces)
errList := make([]*UnexpectedError, 0)
for batchSize := integer.IntMin(remaining, initialBatchSize); batchSize > 0; batchSize = integer.IntMin(2*batchSize, remaining) {
errCh := make(chan *UnexpectedError, batchSize)
var wg sync.WaitGroup
wg.Add(batchSize)
for i := 0; i < batchSize; i++ {
namespace := namespaces[int(successes)+len(errList)+i]
go func() {
defer wg.Done()
if err := fn(namespace); err != nil {
errCh <- err
}
}()
}
wg.Wait()
errCount := len(errCh)
curSuccesses := batchSize - errCount
successes += int32(curSuccesses)
for i := 0; i < errCount; i++ {
errList = append(errList, <-errCh)
}
remaining -= batchSize
}
return successes, errList
}
// listNamespacesForDistributor returns two slices: one contains all matched namespaces, another contains all unmatched.
// Firstly, Spec.Targets will parse .AllNamespaces, .IncludedNamespaces, and .NamespaceLabelSelector; Then calculate their
// union; At last ExcludedNamespaces will act on the union to remove the designated namespaces from it.
func listNamespacesForDistributor(handlerClient client.Client, targets *appsv1alpha1.ResourceDistributionTargets) ([]string, []string, error) {
matchedSet := sets.NewString()
unmatchedSet := sets.NewString()
namespacesList := &corev1.NamespaceList{}
if err := handlerClient.List(context.TODO(), namespacesList); err != nil {
return nil, nil, err
}
for _, namespace := range namespacesList.Items {
unmatchedSet.Insert(namespace.Name)
}
if targets.AllNamespaces {
// 1. select all namespaces via targets.AllNamespace
for _, namespace := range namespacesList.Items {
matchedSet.Insert(namespace.Name)
}
} else {
// 2. select the namespaces via targets.IncludedNamespaces
for _, namespace := range targets.IncludedNamespaces.List {
matchedSet.Insert(namespace.Name)
}
}
if !targets.AllNamespaces && (len(targets.NamespaceLabelSelector.MatchLabels) != 0 || len(targets.NamespaceLabelSelector.MatchExpressions) != 0) {
// 3. select the namespaces via targets.NamespaceLabelSelector
selectors, err := util.ValidatedLabelSelectorAsSelector(&targets.NamespaceLabelSelector)
if err != nil {
return nil, nil, err
}
namespaces := &corev1.NamespaceList{}
if err := handlerClient.List(context.TODO(), namespaces, &client.ListOptions{LabelSelector: selectors}); err != nil {
return nil, nil, err
}
for _, namespace := range namespaces.Items {
matchedSet.Insert(namespace.Name)
}
}
// 4. exclude the namespaces via target.ExcludedNamespaces
for _, namespace := range targets.ExcludedNamespaces.List {
matchedSet.Delete(namespace.Name)
}
// 5. remove matched namespaces from unmatched namespace set
unmatchedSet = unmatchedSet.Difference(matchedSet)
return matchedSet.List(), unmatchedSet.List(), nil
}
func needToUpdate(old, new *unstructured.Unstructured) bool {
oldObject := old.DeepCopy().Object
newObject := new.DeepCopy().Object
oldObject["metadata"] = nil
newObject["metadata"] = nil
oldObject["status"] = nil
newObject["status"] = nil
return !reflect.DeepEqual(oldObject, newObject)
}
func isControlledByDistributor(resource metav1.Object, distributor *appsv1alpha1.ResourceDistribution) bool {
controller := metav1.GetControllerOf(resource)
if controller != nil && distributor != nil &&
distributor.APIVersion == controller.APIVersion &&
distributor.Kind == controller.Kind &&
distributor.Name == controller.Name {
return true
}
return false
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"context"
"time"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"github.com/openkruise/kruise/pkg/controller/util"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
wsutil "github.com/openkruise/kruise/pkg/webhook/workloadspread/validating"
)
// rescheduleSubset will delete some unschedulable Pods that still in pending status. Some subsets have no
// sufficient resource can lead to some Pods scheduled failed. WorkloadSpread has multiple subset, so these
// unschedulable Pods should be rescheduled to other subsets.
// Controller will mark the subset contains unschedulable Pods to unschedulable status and Webhook cannot inject
// Pod into this subset by check subset's status. The unschedulable subset' status can be kept for 5 minutes and then
// should be recovered schedulable status to try scheduling Pods again.
// TODO optimize the unschedulable duration of subset.
// return one parameters - unschedulable Pods belongs to this subset.
func (r *ReconcileWorkloadSpread) rescheduleSubset(ws *appsv1alpha1.WorkloadSpread,
pods []*corev1.Pod,
subsetStatus, oldSubsetStatus *appsv1alpha1.WorkloadSpreadSubsetStatus) []*corev1.Pod {
scheduleFailedPods := make([]*corev1.Pod, 0)
for i := range pods {
if PodUnscheduledTimeout(ws, pods[i]) {
scheduleFailedPods = append(scheduleFailedPods, pods[i])
}
}
unschedulable := len(scheduleFailedPods) > 0
if unschedulable {
klog.V(3).InfoS("Subset of WorkloadSpread is unschedulable", "subsetName", subsetStatus.Name, "workloadSpread", klog.KObj(ws))
}
oldCondition := GetWorkloadSpreadSubsetCondition(oldSubsetStatus, appsv1alpha1.SubsetSchedulable)
if oldCondition == nil {
if unschedulable {
setWorkloadSpreadSubsetCondition(subsetStatus, NewWorkloadSpreadSubsetCondition(appsv1alpha1.SubsetSchedulable, corev1.ConditionFalse, "", ""))
} else {
setWorkloadSpreadSubsetCondition(subsetStatus, NewWorkloadSpreadSubsetCondition(appsv1alpha1.SubsetSchedulable, corev1.ConditionTrue, "", ""))
}
return scheduleFailedPods
}
// copy old condition to avoid unnecessary update.
setWorkloadSpreadSubsetCondition(subsetStatus, oldCondition.DeepCopy())
if unschedulable {
setWorkloadSpreadSubsetCondition(subsetStatus, NewWorkloadSpreadSubsetCondition(appsv1alpha1.SubsetSchedulable, corev1.ConditionFalse, "", ""))
} else {
// consider to recover
if oldCondition.Status == corev1.ConditionFalse {
expectReschedule := oldCondition.LastTransitionTime.Add(wsutil.MaxScheduledFailedDuration)
currentTime := time.Now()
// the duration of unschedule status more than 5 minutes, recover to schedulable.
if expectReschedule.Before(currentTime) {
r.recorder.Eventf(ws, corev1.EventTypeNormal,
"RecoverSchedulable", "Subset %s of WorkloadSpread %s/%s is recovered from unschedulable to schedulable",
subsetStatus.Name, ws.Namespace, ws.Name)
setWorkloadSpreadSubsetCondition(subsetStatus, NewWorkloadSpreadSubsetCondition(appsv1alpha1.SubsetSchedulable, corev1.ConditionTrue, "", ""))
} else {
// less 5 minutes, keep unschedulable.
durationStore.Push(getWorkloadSpreadKey(ws), expectReschedule.Sub(currentTime))
}
}
}
return scheduleFailedPods
}
func (r *ReconcileWorkloadSpread) cleanupUnscheduledPods(ws *appsv1alpha1.WorkloadSpread,
scheduleFailedPodsMap map[string][]*corev1.Pod) error {
for subsetName, pods := range scheduleFailedPodsMap {
if err := r.deletePodsForSubset(ws, pods, subsetName); err != nil {
return err
}
}
return nil
}
func (r *ReconcileWorkloadSpread) deletePodsForSubset(ws *appsv1alpha1.WorkloadSpread,
pods []*corev1.Pod, subsetName string) error {
for _, pod := range pods {
if err := r.Client.Delete(context.TODO(), pod); err != nil {
r.recorder.Eventf(ws, corev1.EventTypeWarning,
"DeletePodFailed",
"Failed to delete unschedulabe Pod %s/%s in Subset %s of WorkloadSpread %s/%s",
pod.Namespace, pod.Name, subsetName, ws.Namespace, ws.Name)
return err
}
klog.V(3).InfoS("WorkloadSpread deleted unschedulabe Pod in Subset successfully", "workloadSpread", klog.KObj(ws), "pod", klog.KObj(pod), "subsetName", subsetName)
}
return nil
}
// PodUnscheduledTimeout return true when Pod was scheduled failed and timeout.
func PodUnscheduledTimeout(ws *appsv1alpha1.WorkloadSpread, pod *corev1.Pod) bool {
timeouted, nextCheckAfter := util.GetTimeBeforePendingTimeout(pod, time.Second*time.Duration(*ws.Spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds))
if nextCheckAfter > 0 {
durationStore.Push(getWorkloadSpreadKey(ws), nextCheckAfter)
}
return timeouted
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"context"
"fmt"
"sort"
"strconv"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/client"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
wsutil "github.com/openkruise/kruise/pkg/util/workloadspread"
)
const (
// RevisionAnnotation is the revision annotation of a deployment's replica sets which records its rollout sequence
RevisionAnnotation = "deployment.kubernetes.io/revision"
)
func (r *ReconcileWorkloadSpread) getWorkloadLatestVersion(ws *appsv1alpha1.WorkloadSpread) (string, error) {
targetRef := ws.Spec.TargetReference
if targetRef == nil {
return "", nil
}
gvk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind)
key := types.NamespacedName{Namespace: ws.Namespace, Name: targetRef.Name}
object := wsutil.GenerateEmptyWorkloadObject(gvk, key)
if err := r.Get(context.TODO(), key, object); err != nil {
return "", client.IgnoreNotFound(err)
}
return wsutil.GetWorkloadVersion(r.Client, object)
}
func (r *ReconcileWorkloadSpread) updateDeletionCost(ws *appsv1alpha1.WorkloadSpread,
versionedPodMap map[string]map[string][]*corev1.Pod,
workloadReplicas int32) error {
targetRef := ws.Spec.TargetReference
if targetRef == nil || !isEffectiveKindForDeletionCost(targetRef) {
return nil
}
latestVersion, err := r.getWorkloadLatestVersion(ws)
if err != nil {
klog.ErrorS(err, "Failed to get the latest version for workload in workloadSpread", "workloadSpread", klog.KObj(ws))
return err
}
// To try our best to keep the distribution of workload description during workload rolling:
// - to the latest version, we hope to scale down the last subset preferentially;
// - to other old versions, we hope to scale down the first subset preferentially;
for version, podMap := range versionedPodMap {
err = r.updateDeletionCostBySubset(ws, podMap, workloadReplicas, version != latestVersion)
if err != nil {
return err
}
}
return nil
}
func (r *ReconcileWorkloadSpread) updateDeletionCostBySubset(ws *appsv1alpha1.WorkloadSpread,
podMap map[string][]*corev1.Pod, workloadReplicas int32, reverseOrder bool) error {
subsetNum := len(ws.Spec.Subsets)
subsetIndex := func(index int) int {
if reverseOrder {
return subsetNum - index - 1
}
return index
}
// update Pod's deletion-cost annotation in each subset
for idx, subset := range ws.Spec.Subsets {
if err := r.syncSubsetPodDeletionCost(ws, &subset, subsetIndex(idx), podMap[subset.Name], workloadReplicas); err != nil {
return err
}
}
// update the deletion-cost annotation for such pods that do not match any real subsets.
// these pods will have the minimum deletion-cost, and will be deleted preferentially.
if len(podMap[FakeSubsetName]) > 0 {
if err := r.syncSubsetPodDeletionCost(ws, nil, len(ws.Spec.Subsets), podMap[FakeSubsetName], workloadReplicas); err != nil {
return err
}
}
return nil
}
// syncSubsetPodDeletionCost calculates the deletion-cost for the Pods belong to subset and update deletion-cost annotation.
// We have two conditions for subset's Pod deletion-cost
// 1. the number of active Pods in this subset <= maxReplicas or maxReplicas = nil, deletion-cost = 100 * (subsets.length - subsetIndex).
// name subset-a subset-b subset-c
// maxReplicas 10 10 nil
// pods number 10 10 10
// deletion-cost 300 200 100
// We delete Pods from back subset to front subset. The deletion order is: c -> b -> a.
// 2. the number of active Pods in this subset > maxReplicas
// two class:
// (a) the extra Pods more than maxReplicas: deletion-cost = -100 * (subsetIndex + 1) [Priority Deletion],
// (b) deletion-cost = 100 * (subsets.length - subsetIndex) [Reserve].
// name subset-a subset-b subset-c
// maxReplicas 10 10 nil
// pods number 20 20 20
// deletion-cost (300,-100) (200,-200) 100
func (r *ReconcileWorkloadSpread) syncSubsetPodDeletionCost(
ws *appsv1alpha1.WorkloadSpread,
subset *appsv1alpha1.WorkloadSpreadSubset,
subsetIndex int,
pods []*corev1.Pod,
workloadReplicas int32) error {
var err error
// slice that will contain all Pods that want to set deletion-cost a positive value.
var positivePods []*corev1.Pod
// slice that will contain all Pods that want to set deletion-cost a negative value.
var negativePods []*corev1.Pod
// count active Pods
activePods := make([]*corev1.Pod, 0, len(pods))
for i := range pods {
if kubecontroller.IsPodActive(pods[i]) {
activePods = append(activePods, pods[i])
}
}
replicas := len(activePods)
// First we partition Pods into two lists: positive, negative list.
if subset == nil {
// for the scene of FakeSubsetName, where the pods don't match any subset and will be deleted preferentially.
negativePods = activePods
} else if subset.MaxReplicas == nil {
// maxReplicas is nil, which means there is no limit to the number of Pods in this subset.
positivePods = activePods
} else {
subsetMaxReplicas, err := intstr.GetValueFromIntOrPercent(subset.MaxReplicas, int(workloadReplicas), true)
if err != nil || subsetMaxReplicas < 0 {
klog.ErrorS(err, "Failed to get maxReplicas value from subset of WorkloadSpread", "subsetName", subset.Name, "workloadSpread", klog.KObj(ws))
return nil
}
if replicas <= subsetMaxReplicas {
positivePods = activePods
} else {
// Pods are classified two class, the one is more healthy and it's size = subsetMaxReplicas, so
// setting deletion-cost to positive, another one is the left Pods means preferring to delete it,
// setting deletion-cost to negative, size = replicas - subsetMaxReplicas.
positivePods = make([]*corev1.Pod, 0, subsetMaxReplicas)
negativePods = make([]*corev1.Pod, 0, replicas-subsetMaxReplicas)
// sort Pods according to Pod's condition.
indexes := sortDeleteIndexes(activePods)
// partition Pods into negativePods and positivePods by sorted indexes.
for i := range indexes {
if i < (replicas - subsetMaxReplicas) {
negativePods = append(negativePods, activePods[indexes[i]])
} else {
positivePods = append(positivePods, activePods[indexes[i]])
}
}
}
}
err = r.updateDeletionCostForSubsetPods(ws, subset, positivePods, strconv.Itoa(wsutil.PodDeletionCostPositive*(len(ws.Spec.Subsets)-subsetIndex)))
if err != nil {
return err
}
return r.updateDeletionCostForSubsetPods(ws, subset, negativePods, strconv.Itoa(wsutil.PodDeletionCostNegative*(subsetIndex+1)))
}
func (r *ReconcileWorkloadSpread) updateDeletionCostForSubsetPods(ws *appsv1alpha1.WorkloadSpread,
subset *appsv1alpha1.WorkloadSpreadSubset, pods []*corev1.Pod, deletionCostStr string) error {
for _, pod := range pods {
if err := r.patchPodDeletionCost(ws, pod, deletionCostStr); err != nil {
subsetName := FakeSubsetName
if subset != nil {
subsetName = subset.Name
}
r.recorder.Eventf(ws, corev1.EventTypeWarning,
"PatchPodDeletionCostFailed",
"WorkloadSpread %s/%s failed to patch deletion-cost annotation to %d for Pod %s/%s in subset %s",
ws.Namespace, ws.Name, deletionCostStr, pod.Namespace, pod.Name, subsetName)
return err
}
}
return nil
}
func (r *ReconcileWorkloadSpread) patchPodDeletionCost(ws *appsv1alpha1.WorkloadSpread,
pod *corev1.Pod, deletionCostStr string) error {
clone := pod.DeepCopy()
annotationKey := wsutil.PodDeletionCostAnnotation
annotationValue := deletionCostStr
podAnnotation := pod.GetAnnotations()
oldValue, exist := podAnnotation[annotationKey]
// annotation has been set
if exist && annotationValue == oldValue {
return nil
}
// keep the original setting.
if !exist && annotationValue == "0" {
return nil
}
body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}}}`, annotationKey, annotationValue)
if err := r.Patch(context.TODO(), clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))); err != nil {
return err
}
klog.V(3).InfoS("WorkloadSpread patched deletion-cost annotation for Pod successfully", "workloadSpread", klog.KObj(ws), "deletionCost", deletionCostStr, "pod", klog.KObj(pod))
return nil
}
func sortDeleteIndexes(pods []*corev1.Pod) []int {
waitDeleteIndexes := make([]int, 0, len(pods))
for i := 0; i < len(pods); i++ {
waitDeleteIndexes = append(waitDeleteIndexes, i)
}
// Sort Pods with default sequence
// - Unassigned < assigned
// - PodPending < PodUnknown < PodRunning
// - Not ready < ready
// - Been ready for empty time < less time < more time
// - Pods with containers with higher restart counts < lower restart counts
// - Empty creation time pods < newer pods < older pods
// Using SliceStable to keep equal elements in their original order. It can avoid frequently update.
sort.SliceStable(waitDeleteIndexes, func(i, j int) bool {
return kubecontroller.ActivePods(pods).Less(waitDeleteIndexes[i], waitDeleteIndexes[j])
})
return waitDeleteIndexes
}
func isEffectiveKindForDeletionCost(targetRef *appsv1alpha1.TargetReference) bool {
switch targetRef.Kind {
case controllerKindRS.Kind, controllerKindDep.Kind, controllerKruiseKindCS.Kind:
return true
}
return false
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"context"
"encoding/json"
"flag"
"fmt"
"math"
"strings"
"time"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
ctrlUtil "github.com/openkruise/kruise/pkg/controller/util"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
"github.com/openkruise/kruise/pkg/util/configuration"
"github.com/openkruise/kruise/pkg/util/controllerfinder"
utildiscovery "github.com/openkruise/kruise/pkg/util/discovery"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/fieldindex"
"github.com/openkruise/kruise/pkg/util/ratelimiter"
"github.com/openkruise/kruise/pkg/util/requeueduration"
wsutil "github.com/openkruise/kruise/pkg/util/workloadspread"
)
func init() {
flag.IntVar(&concurrentReconciles, "workloadspread-workers", concurrentReconciles, "Max concurrent workers for WorkloadSpread controller.")
}
var (
concurrentReconciles = 3
)
const (
controllerName = "workloadspread-controller"
// CreatPodTimeout sets maximum time from the moment a pod is added to CreatePods in WorkloadSpread.Status by webhook
// to the time when the pod is expected to be seen by controller. If the pod has not been found by controller
// during that time it is assumed, which means it won't be created at all and corresponding record in map can be
// removed from WorkloadSpread.Status. It is assumed that pod/ws apiserver to controller latency is relatively small (like 1-2sec)
// so the below value should be more enough.
CreatPodTimeout = 30 * time.Second
// DeletePodTimeout is similar to the CreatePodTimeout and it's the time duration for deleting Pod.
DeletePodTimeout = 15 * time.Second
// FakeSubsetName is a fake subset name for such pods that do not match any subsets
FakeSubsetName = "kruise.io/workloadspread-fake-subset-name"
// IgnorePatchExistingPodsAnnotation ignore ws.Spec.Subsets[x].Patch for existing pods
IgnorePatchExistingPodsAnnotation = "workloadspread.kruise.io/ignore-patch-existing-pods-metadata"
)
var (
controllerKruiseKindWS = appsv1alpha1.SchemeGroupVersion.WithKind("WorkloadSpread")
controllerKruiseKindCS = appsv1alpha1.SchemeGroupVersion.WithKind("CloneSet")
controllerKruiseKindSts = appsv1alpha1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKindSts = appsv1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKindRS = appsv1.SchemeGroupVersion.WithKind("ReplicaSet")
controllerKindDep = appsv1.SchemeGroupVersion.WithKind("Deployment")
controllerKindJob = batchv1.SchemeGroupVersion.WithKind("Job")
)
// this is a short cut for any sub-functions to notify the reconcile how long to wait to requeue
var durationStore = requeueduration.DurationStore{}
// Add creates a new WorkloadSpread Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
if !utildiscovery.DiscoverGVK(controllerKruiseKindWS) || !utilfeature.DefaultFeatureGate.Enabled(features.WorkloadSpread) {
return nil
}
return add(mgr, newReconciler(mgr))
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Create a new controller
c, err := controller.New(controllerName, mgr, controller.Options{
Reconciler: r, MaxConcurrentReconciles: concurrentReconciles, CacheSyncTimeout: util.GetControllerCacheSyncTimeout(),
RateLimiter: ratelimiter.DefaultControllerRateLimiter()})
if err != nil {
return err
}
// Watch WorkloadSpread
err = c.Watch(source.Kind(mgr.GetCache(), &appsv1alpha1.WorkloadSpread{}, &handler.TypedEnqueueRequestForObject[*appsv1alpha1.WorkloadSpread]{}))
if err != nil {
return err
}
// Watch for changes to Pods have a specific annotation
err = c.Watch(source.Kind(mgr.GetCache(), &corev1.Pod{}, &podEventHandler{}))
if err != nil {
return err
}
// Watch for replica changes to CloneSet
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&appsv1alpha1.CloneSet{}), &workloadEventHandler{Reader: mgr.GetCache()}))
if err != nil {
return err
}
// Watch for replica changes to Deployment
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&appsv1.Deployment{}), &workloadEventHandler{Reader: mgr.GetCache()}))
if err != nil {
return err
}
// Watch for replica changes to ReplicaSet
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&appsv1.ReplicaSet{}), &workloadEventHandler{Reader: mgr.GetCache()}))
if err != nil {
return err
}
// Watch for parallelism changes to Job
err = c.Watch(source.Kind(mgr.GetCache(), client.Object(&batchv1.Job{}), &workloadEventHandler{Reader: mgr.GetCache()}))
if err != nil {
return err
}
// Watch for replicas changes to other CRD
whiteList, err := configuration.GetWSWatchCustomWorkloadWhiteList(mgr.GetClient())
if err != nil {
return err
}
if len(whiteList.Workloads) > 0 {
workloadHandler := &workloadEventHandler{Reader: mgr.GetClient()}
for _, workload := range whiteList.Workloads {
if _, err := ctrlUtil.AddWatcherDynamically(mgr, c, workloadHandler, workload.GroupVersionKind, "WorkloadSpread"); err != nil {
return err
}
}
}
return nil
}
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
cli := utilclient.NewClientFromManager(mgr, controllerName)
return &ReconcileWorkloadSpread{
Client: cli,
scheme: mgr.GetScheme(),
recorder: mgr.GetEventRecorderFor(controllerName),
controllerFinder: controllerfinder.Finder,
}
}
var _ reconcile.Reconciler = &ReconcileWorkloadSpread{}
// ReconcileWorkloadSpread reconciles a WorkloadSpread object
type ReconcileWorkloadSpread struct {
client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
controllerFinder *controllerfinder.ControllerFinder
}
// +kubebuilder:rbac:groups=apps.kruise.io,resources=workloadspreads,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=apps.kruise.io,resources=workloadspreads/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=apps.kruise.io,resources=workloadspreads/finalizers,verbs=update
// +kubebuilder:rbac:groups=apps.kruise.io,resources=clonesets,verbs=get;list;watch
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch
// +kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
func (r *ReconcileWorkloadSpread) Reconcile(_ context.Context, req reconcile.Request) (reconcile.Result, error) {
ws := &appsv1alpha1.WorkloadSpread{}
err := r.Get(context.TODO(), req.NamespacedName, ws)
if (err != nil && errors.IsNotFound(err)) || (err == nil && !ws.DeletionTimestamp.IsZero()) {
// delete cache if this workloadSpread has been deleted
if cacheErr := util.GlobalCache.Delete(&appsv1alpha1.WorkloadSpread{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps.kruise.io/v1alpha1",
Kind: "WorkloadSpread",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: req.Name,
},
}); cacheErr != nil {
klog.ErrorS(cacheErr, "Failed to delete workloadSpread cache after deletion", "workloadSpread", req)
}
return reconcile.Result{}, nil
} else if err != nil {
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}
startTime := time.Now()
klog.V(3).InfoS("Began to process WorkloadSpread", "workloadSpread", klog.KObj(ws))
err = r.syncWorkloadSpread(ws)
klog.V(3).InfoS("Finished syncing WorkloadSpread", "workloadSpread", klog.KObj(ws), "cost", time.Since(startTime))
return reconcile.Result{RequeueAfter: durationStore.Pop(getWorkloadSpreadKey(ws))}, err
}
func (r *ReconcileWorkloadSpread) getPodJob(ref *appsv1alpha1.TargetReference, namespace string) ([]*corev1.Pod, int32, error) {
ok, err := wsutil.VerifyGroupKind(ref, controllerKindJob.Kind, []string{controllerKindJob.Group})
if err != nil || !ok {
return nil, 0, err
}
job := &batchv1.Job{}
err = r.Get(context.TODO(), client.ObjectKey{Namespace: namespace, Name: ref.Name}, job)
if err != nil {
// when error is NotFound, it is ok here.
if errors.IsNotFound(err) {
klog.V(3).InfoS("Could not find Job", "job", klog.KRef(namespace, ref.Name))
return nil, 0, nil
}
return nil, 0, err
}
labelSelector, err := util.ValidatedLabelSelectorAsSelector(job.Spec.Selector)
if err != nil {
klog.ErrorS(err, "Failed to get labelSelector")
return nil, 0, err
}
podList := &corev1.PodList{}
listOption := &client.ListOptions{
Namespace: namespace,
LabelSelector: labelSelector,
FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(job.UID)}),
}
err = r.List(context.TODO(), podList, listOption)
if err != nil {
return nil, 0, err
}
matchedPods := make([]*corev1.Pod, 0, len(podList.Items))
for i := range podList.Items {
matchedPods = append(matchedPods, &podList.Items[i])
}
return matchedPods, *(job.Spec.Parallelism), nil
}
func (r *ReconcileWorkloadSpread) getReplicasPathList(ws *appsv1alpha1.WorkloadSpread) ([]string, error) {
if ws.Spec.TargetReference == nil {
return nil, nil
}
if ws.Spec.TargetFilter != nil && len(ws.Spec.TargetFilter.ReplicasPathList) > 0 {
return ws.Spec.TargetFilter.ReplicasPathList, nil
}
whiteList, err := configuration.GetWSWatchCustomWorkloadWhiteList(r.Client)
if err != nil {
return nil, err
}
gv, err := schema.ParseGroupVersion(ws.Spec.TargetReference.APIVersion)
if err != nil {
return nil, err
}
for _, wl := range whiteList.Workloads {
if wl.GroupVersion() != gv || wl.GroupVersionKind.Kind != ws.Spec.TargetReference.Kind {
continue
}
klog.V(5).InfoS("found replicas path in whitelist", "path", wl.ReplicasPath, "workloadSpread", klog.KObj(ws))
return []string{wl.ReplicasPath}, nil
}
return nil, nil
}
// getPodsForWorkloadSpread returns Pods managed by the WorkloadSpread object.
// return two parameters
// 1. podList for workloadSpread
// 2. workloadReplicas
func (r *ReconcileWorkloadSpread) getPodsForWorkloadSpread(ws *appsv1alpha1.WorkloadSpread) ([]*corev1.Pod, int32, error) {
if ws.Spec.TargetReference == nil {
return nil, 0, nil
}
var pods []*corev1.Pod
var workloadReplicas int32
var err error
targetRef := ws.Spec.TargetReference
switch targetRef.Kind {
case controllerKindJob.Kind:
pods, workloadReplicas, err = r.getPodJob(targetRef, ws.Namespace)
default:
pods, workloadReplicas, err = r.controllerFinder.GetPodsForRef(targetRef.APIVersion, targetRef.Kind, ws.Namespace, targetRef.Name, false)
}
if err != nil {
klog.ErrorS(err, "WorkloadSpread handled targetReference failed", "workloadSpread", klog.KObj(ws))
return nil, 0, err
}
workloadReplicas, pods, err = r.filterWorkload(ws, pods, workloadReplicas)
if err != nil {
klog.ErrorS(err, "Filter workload failed", "workloadSpread", klog.KObj(ws))
return nil, 0, err
}
return pods, workloadReplicas, err
}
func (r *ReconcileWorkloadSpread) filterWorkload(ws *appsv1alpha1.WorkloadSpread, pods []*corev1.Pod, replicas int32) (int32, []*corev1.Pod, error) {
klog.V(5).InfoS("before workload filtering", "pods", len(pods), "replicas", replicas, "workloadSpread", klog.KObj(ws))
replicasPathList, err := r.getReplicasPathList(ws)
if err != nil {
return replicas, pods, err
}
var filteredReplicas int32
if len(replicasPathList) > 0 {
// replicas path list configured in someplace, should overwrite replicas value
targetRef := ws.Spec.TargetReference
wl, err := r.controllerFinder.GetControllerAsUnstructured(controllerfinder.ControllerReference{
APIVersion: targetRef.APIVersion,
Kind: targetRef.Kind,
Name: targetRef.Name,
}, ws.Namespace)
if err != nil {
return replicas, pods, client.IgnoreNotFound(err)
}
for _, replicasPath := range replicasPathList {
n, err := wsutil.GetReplicasFromObject(wl, replicasPath)
if err != nil {
return replicas, pods, err
}
filteredReplicas += n
}
klog.V(4).InfoS("replicas after filtering", "replicas", filteredReplicas,
"replicasPathList", replicasPathList, "workloadSpread", klog.KObj(ws))
} else {
filteredReplicas = replicas
klog.V(4).InfoS("replicas not filtered", "workloadSpread", klog.KObj(ws))
}
var filteredPods []*corev1.Pod
if ws.Spec.TargetFilter != nil && ws.Spec.TargetFilter.Selector != nil {
for _, pod := range pods {
selected, err := wsutil.IsPodSelected(ws.Spec.TargetFilter, pod.Labels)
if err != nil {
return replicas, pods, err
}
if selected {
filteredPods = append(filteredPods, pod)
}
}
klog.V(4).InfoS("pods after filtering", "pods", len(filteredPods), "selector", ws.Spec.TargetFilter.Selector)
} else {
filteredPods = pods
}
return filteredReplicas, filteredPods, nil
}
// syncWorkloadSpread is the main logic of the WorkloadSpread controller. Firstly, we get Pods from workload managed by
// WorkloadSpread and then classify these Pods to each corresponding subset. Secondly, we set Pod deletion-cost annotation
// value by compare the number of subset's Pods with the subset's maxReplicas, and then we consider rescheduling failed Pods.
// Lastly, we update the WorkloadSpread's Status and clean up scheduled failed Pods. controller should collaborate with webhook
// to maintain WorkloadSpread status together. The controller is responsible for calculating the real status, and the webhook
// mainly counts missingReplicas and records the creation or deletion entry of Pod into map.
func (r *ReconcileWorkloadSpread) syncWorkloadSpread(ws *appsv1alpha1.WorkloadSpread) error {
if ws.Spec.TargetReference == nil {
klog.InfoS("WorkloadSpread has no target reference", "workloadSpread", klog.KObj(ws))
return nil
}
pods, workloadReplicas, err := r.getPodsForWorkloadSpread(ws)
if err != nil {
klog.ErrorS(err, "WorkloadSpread got matched pods failed", "workloadSpread", klog.KObj(ws))
return err
}
if len(pods) == 0 {
klog.InfoS("WorkloadSpread had no matched pods", "workloadSpread", klog.KObj(ws), "targetWorkloadReplicas", workloadReplicas)
}
// group Pods by pod-revision and subset
versionedPodMap, subsetPodMap, err := r.groupVersionedPods(ws, pods, workloadReplicas)
if err != nil {
return err
}
// update deletion-cost for each subset
err = r.updateDeletionCost(ws, versionedPodMap, workloadReplicas)
if err != nil {
return err
}
// calculate status and reschedule
status, scheduleFailedPodMap := r.calculateWorkloadSpreadStatus(ws, versionedPodMap, subsetPodMap, workloadReplicas)
if status == nil {
return nil
}
// update status
err = r.UpdateWorkloadSpreadStatus(ws, status)
if err != nil {
return err
}
// clean up unschedulable Pods
return r.cleanupUnscheduledPods(ws, scheduleFailedPodMap)
}
func getInjectWorkloadSpreadFromPod(pod *corev1.Pod) *wsutil.InjectWorkloadSpread {
injectStr, exist := pod.GetAnnotations()[wsutil.MatchedWorkloadSpreadSubsetAnnotations]
if !exist {
return nil
}
injectWS := &wsutil.InjectWorkloadSpread{}
err := json.Unmarshal([]byte(injectStr), injectWS)
if err != nil {
klog.ErrorS(err, "Failed to unmarshal JSON from Pod", "JSON", injectStr, "pod", klog.KObj(pod))
return nil
}
return injectWS
}
// groupVersionedPods will group pods by pod version and subset
func (r *ReconcileWorkloadSpread) groupVersionedPods(ws *appsv1alpha1.WorkloadSpread, allPods []*corev1.Pod, replicas int32) (map[string]map[string][]*corev1.Pod, map[string][]*corev1.Pod, error) {
versionedPods := map[string][]*corev1.Pod{}
for _, pod := range allPods {
version := wsutil.GetPodVersion(pod)
versionedPods[version] = append(versionedPods[version], pod)
}
subsetPodMap := map[string][]*corev1.Pod{}
versionedPodMap := map[string]map[string][]*corev1.Pod{}
// group pods by version
for version, pods := range versionedPods {
// group pods by subset
podMap, err := r.groupPodBySubset(ws, pods, replicas)
if err != nil {
return nil, nil, err
}
for subset, ps := range podMap {
subsetPodMap[subset] = append(subsetPodMap[subset], ps...)
}
versionedPodMap[version] = podMap
}
return versionedPodMap, subsetPodMap, nil
}
// groupPodBySubset returns a map, the key is the name of subset and the value represents the Pods of the corresponding subset.
func (r *ReconcileWorkloadSpread) groupPodBySubset(ws *appsv1alpha1.WorkloadSpread, pods []*corev1.Pod, replicas int32) (map[string][]*corev1.Pod, error) {
podMap := make(map[string][]*corev1.Pod, len(ws.Spec.Subsets)+1)
podMap[FakeSubsetName] = []*corev1.Pod{}
subsetMissingReplicas := make(map[string]int)
for _, subset := range ws.Spec.Subsets {
podMap[subset.Name] = []*corev1.Pod{}
subsetMissingReplicas[subset.Name], _ = intstr.GetScaledValueFromIntOrPercent(
intstr.ValueOrDefault(subset.MaxReplicas, intstr.FromInt32(math.MaxInt32)), int(replicas), true)
}
// count managed pods for each subset
for i := range pods {
injectWS := getInjectWorkloadSpreadFromPod(pods[i])
if isNotMatchedWS(injectWS, ws) {
continue
}
if _, exist := podMap[injectWS.Subset]; !exist {
continue
}
subsetMissingReplicas[injectWS.Subset]--
}
for i := range pods {
subsetName, err := r.getSuitableSubsetNameForPod(ws, pods[i], subsetMissingReplicas)
if err != nil {
return nil, err
}
if _, exist := podMap[subsetName]; exist {
podMap[subsetName] = append(podMap[subsetName], pods[i])
} else {
// for the scene where the original subset of the pod was deleted.
podMap[FakeSubsetName] = append(podMap[FakeSubsetName], pods[i])
}
}
return podMap, nil
}
// getSuitableSubsetNameForPod will return (FakeSubsetName, nil) if not found suitable subset for pod
func (r *ReconcileWorkloadSpread) getSuitableSubsetNameForPod(ws *appsv1alpha1.WorkloadSpread, pod *corev1.Pod, subsetMissingReplicas map[string]int) (string, error) {
injectWS := getInjectWorkloadSpreadFromPod(pod)
if isNotMatchedWS(injectWS, ws) {
// process the pods that were created before workloadSpread
matchedSubset, err := r.getAndUpdateSuitableSubsetName(ws, pod, subsetMissingReplicas)
klog.V(3).InfoS("no subset injected to pod, find a suitable one", "pod", klog.KObj(pod), "workloadSpread", klog.KObj(ws), "matchedSubset", matchedSubset)
if err != nil {
return "", err
} else if matchedSubset == nil {
return FakeSubsetName, nil
}
return matchedSubset.Name, nil
}
return injectWS.Subset, nil
}
// getSuitableSubsetForOldPod returns a suitable subset for the pod which was created before workloadSpread.
// getSuitableSubsetForOldPod will return (nil, nil) if there is no suitable subset for the pod.
func (r *ReconcileWorkloadSpread) getAndUpdateSuitableSubsetName(ws *appsv1alpha1.WorkloadSpread, pod *corev1.Pod, subsetMissingReplicas map[string]int) (*appsv1alpha1.WorkloadSpreadSubset, error) {
if len(pod.Spec.NodeName) == 0 {
return nil, nil
}
node := &corev1.Node{}
if err := r.Get(context.TODO(), types.NamespacedName{Name: pod.Spec.NodeName}, node); err != nil {
if errors.IsNotFound(err) {
return nil, nil
}
return nil, err
}
var maxPreferredScore int64 = -1
var favoriteSubset *appsv1alpha1.WorkloadSpreadSubset
for i := range ws.Spec.Subsets {
subset := &ws.Spec.Subsets[i]
// in case of that this pod was scheduled to the node which matches a subset of workloadSpread
matched, preferredScore, err := matchesSubset(pod, node, subset, subsetMissingReplicas[subset.Name])
if err != nil {
// requiredSelectorTerm field was validated at webhook stage, so this error should not occur
// this error should not be returned, because it is a non-transient error
klog.ErrorS(err, "Unexpected error occurred when matching pod with subset, please check requiredSelectorTerm field of subset in WorkloadSpread",
"pod", klog.KObj(pod), "subsetName", subset.Name, "workloadSpread", klog.KObj(ws))
}
klog.V(4).InfoS("preferred score for subset", "pod", klog.KObj(pod), "subsetName", subset.Name, "workloadSpread", klog.KObj(ws), "preferredScore", preferredScore, "node", node.Name)
// select the most favorite subsets for the pod by subset.PreferredNodeSelectorTerms
if matched && preferredScore > maxPreferredScore {
favoriteSubset = subset
maxPreferredScore = preferredScore
}
}
if favoriteSubset != nil {
if err := r.patchFavoriteSubsetMetadataToPod(pod, ws, favoriteSubset); err != nil {
return nil, err
}
subsetMissingReplicas[favoriteSubset.Name]--
return favoriteSubset, nil
}
return nil, nil
}
// patchFavoriteSubsetMetadataToPod patch MatchedWorkloadSpreadSubsetAnnotations to the pod,
// and select labels/annotations form favoriteSubset.patch, then patch them to the pod;
func (r *ReconcileWorkloadSpread) patchFavoriteSubsetMetadataToPod(pod *corev1.Pod, ws *appsv1alpha1.WorkloadSpread, favoriteSubset *appsv1alpha1.WorkloadSpreadSubset) error {
patchMetadata := make(map[string]interface{})
// decode favoriteSubset.patch.raw and add their labels and annotations to the patch
if favoriteSubset.Patch.Raw != nil && !strings.EqualFold(ws.Annotations[IgnorePatchExistingPodsAnnotation], "true") {
patchField := map[string]interface{}{}
if err := json.Unmarshal(favoriteSubset.Patch.Raw, &patchField); err == nil {
if metadata, ok := patchField["metadata"].(map[string]interface{}); ok && metadata != nil {
patchMetadata = metadata
}
}
}
injectWS, _ := json.Marshal(&wsutil.InjectWorkloadSpread{
Name: ws.Name,
Subset: favoriteSubset.Name,
})
if annotations, ok := patchMetadata["annotations"].(map[string]interface{}); ok && annotations != nil {
annotations[wsutil.MatchedWorkloadSpreadSubsetAnnotations] = string(injectWS)
} else {
patchMetadata["annotations"] = map[string]interface{}{
wsutil.MatchedWorkloadSpreadSubsetAnnotations: string(injectWS),
}
}
patch, _ := json.Marshal(map[string]interface{}{
"metadata": patchMetadata,
})
if err := r.Patch(context.TODO(), pod, client.RawPatch(types.StrategicMergePatchType, patch)); err != nil {
klog.ErrorS(err, `Failed to patch "matched-workloadspread" annotation for pod`,
"pod", klog.KObj(pod), "annotationValue", fmt.Sprintf("{Name: %s, Subset: %s}", ws.Name, favoriteSubset.Name))
return err
}
return nil
}
// return two parameters
// 1. current WorkloadSpreadStatus
// 2. a map, the key is the subsetName, the value is the schedule failed Pods belongs to the subset.
func (r *ReconcileWorkloadSpread) calculateWorkloadSpreadStatus(ws *appsv1alpha1.WorkloadSpread,
versionedPodMap map[string]map[string][]*corev1.Pod, subsetPodMap map[string][]*corev1.Pod,
workloadReplicas int32) (*appsv1alpha1.WorkloadSpreadStatus, map[string][]*corev1.Pod) {
status := appsv1alpha1.WorkloadSpreadStatus{}
// set the generation in the returned status
status.ObservedGeneration = ws.Generation
// status.ObservedWorkloadReplicas = workloadReplicas
status.VersionedSubsetStatuses = make(map[string][]appsv1alpha1.WorkloadSpreadSubsetStatus, len(versionedPodMap))
// overall subset statuses
var scheduleFailedPodMap map[string][]*corev1.Pod
status.SubsetStatuses, scheduleFailedPodMap = r.calculateWorkloadSpreadSubsetStatuses(ws, ws.Status.SubsetStatuses, subsetPodMap, workloadReplicas)
// versioned subset statuses calculated by observed pods
for version, podMap := range versionedPodMap {
status.VersionedSubsetStatuses[version], _ = r.calculateWorkloadSpreadSubsetStatuses(ws, ws.Status.VersionedSubsetStatuses[version], podMap, workloadReplicas)
}
// Consider this case:
// A Pod has been created and processed by webhook, but the Pod is not cached by controller.
// We have to keep the subsetStatus for this version even though there is no Pod belonging to it.
for version := range ws.Status.VersionedSubsetStatuses {
if _, exist := versionedPodMap[version]; exist {
continue
}
versionSubsetStatues, _ := r.calculateWorkloadSpreadSubsetStatuses(ws, ws.Status.VersionedSubsetStatuses[version], nil, workloadReplicas)
if !isEmptySubsetStatuses(versionSubsetStatues) {
status.VersionedSubsetStatuses[version] = versionSubsetStatues
}
}
return &status, scheduleFailedPodMap
}
func isEmptySubsetStatuses(statues []appsv1alpha1.WorkloadSpreadSubsetStatus) bool {
replicas, creating, deleting := 0, 0, 0
for _, subset := range statues {
replicas += int(subset.Replicas)
creating += len(subset.CreatingPods)
deleting += len(subset.DeletingPods)
}
return replicas+creating+deleting == 0
}
func (r *ReconcileWorkloadSpread) calculateWorkloadSpreadSubsetStatuses(ws *appsv1alpha1.WorkloadSpread,
oldSubsetStatuses []appsv1alpha1.WorkloadSpreadSubsetStatus, podMap map[string][]*corev1.Pod, workloadReplicas int32,
) ([]appsv1alpha1.WorkloadSpreadSubsetStatus, map[string][]*corev1.Pod) {
subsetStatuses := make([]appsv1alpha1.WorkloadSpreadSubsetStatus, len(ws.Spec.Subsets))
scheduleFailedPodMap := make(map[string][]*corev1.Pod)
// Using a map to restore name and old status of subset, because user could adjust the spec's subset sequence
// to change priority of subset. We guarantee that operation and use subset name to distinguish which subset
// from old status.
oldSubsetStatusMap := make(map[string]*appsv1alpha1.WorkloadSpreadSubsetStatus, len(oldSubsetStatuses))
for i := range oldSubsetStatuses {
oldSubsetStatusMap[oldSubsetStatuses[i].Name] = &oldSubsetStatuses[i]
}
var rescheduleCriticalSeconds int32
if ws.Spec.ScheduleStrategy.Type == appsv1alpha1.AdaptiveWorkloadSpreadScheduleStrategyType &&
ws.Spec.ScheduleStrategy.Adaptive != nil &&
ws.Spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds != nil {
rescheduleCriticalSeconds = *ws.Spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds
}
for i := 0; i < len(ws.Spec.Subsets); i++ {
subset := &ws.Spec.Subsets[i]
// calculate subset status
subsetStatus := r.calculateWorkloadSpreadSubsetStatus(ws, podMap[subset.Name], subset,
oldSubsetStatusMap[subset.Name], workloadReplicas)
if subsetStatus == nil {
return nil, nil
}
// don't reschedule the last subset.
if rescheduleCriticalSeconds > 0 {
if i != len(ws.Spec.Subsets)-1 {
pods := r.rescheduleSubset(ws, podMap[subset.Name], subsetStatus, oldSubsetStatusMap[subset.Name])
scheduleFailedPodMap[subset.Name] = pods
} else {
oldCondition := GetWorkloadSpreadSubsetCondition(oldSubsetStatusMap[subset.Name], appsv1alpha1.SubsetSchedulable)
if oldCondition != nil {
setWorkloadSpreadSubsetCondition(subsetStatus, oldCondition.DeepCopy())
}
setWorkloadSpreadSubsetCondition(subsetStatus, NewWorkloadSpreadSubsetCondition(appsv1alpha1.SubsetSchedulable, corev1.ConditionTrue, "", ""))
}
} else {
removeWorkloadSpreadSubsetCondition(subsetStatus, appsv1alpha1.SubsetSchedulable)
}
subsetStatuses[i] = *subsetStatus
}
return subsetStatuses, scheduleFailedPodMap
}
// calculateWorkloadSpreadSubsetStatus returns the current subsetStatus for subset.
func (r *ReconcileWorkloadSpread) calculateWorkloadSpreadSubsetStatus(ws *appsv1alpha1.WorkloadSpread,
pods []*corev1.Pod,
subset *appsv1alpha1.WorkloadSpreadSubset,
oldSubsetStatus *appsv1alpha1.WorkloadSpreadSubsetStatus,
workloadReplicas int32) *appsv1alpha1.WorkloadSpreadSubsetStatus {
// current subsetStatus in this reconcile
subsetStatus := &appsv1alpha1.WorkloadSpreadSubsetStatus{}
subsetStatus.Name = subset.Name
subsetStatus.CreatingPods = make(map[string]metav1.Time)
subsetStatus.DeletingPods = make(map[string]metav1.Time)
var err error
var subsetMaxReplicas int
if subset.MaxReplicas == nil {
// MaxReplicas is nil, which means there is no limit for subset replicas, using -1 to represent it.
subsetMaxReplicas = -1
} else {
subsetMaxReplicas, err = intstr.GetScaledValueFromIntOrPercent(subset.MaxReplicas, int(workloadReplicas), true)
if err != nil || subsetMaxReplicas < 0 {
klog.ErrorS(err, "Failed to get maxReplicas value from subset of WorkloadSpread", "subsetName", subset.Name, "workloadSpread", klog.KObj(ws))
return nil
}
}
// initialize missingReplicas to subsetMaxReplicas
subsetStatus.MissingReplicas = int32(subsetMaxReplicas)
currentTime := time.Now()
var oldCreatingPods map[string]metav1.Time
var oldDeletingPods map[string]metav1.Time
if oldSubsetStatus != nil {
// make a deep copy because we may need to remove some element later and compare old status with current status.
oldCreatingPods = make(map[string]metav1.Time, len(oldSubsetStatus.CreatingPods))
for k, v := range oldSubsetStatus.CreatingPods {
oldCreatingPods[k] = v
}
oldDeletingPods = oldSubsetStatus.DeletingPods
}
var active int32
for _, pod := range pods {
// remove this Pod from creatingPods map because this Pod has been created.
injectWS := getInjectWorkloadSpreadFromPod(pod)
if injectWS != nil && injectWS.UID != "" {
// Deployment or other native k8s workload has not generated the full pod.Name when webhook is mutating Pod.
// So webhook generates a UID to identify Pod and restore it into the creatingPods map. The generated
// UID and pod.Name have the same function.
delete(oldCreatingPods, injectWS.UID)
} else {
delete(oldCreatingPods, pod.Name)
}
// not active
if !kubecontroller.IsPodActive(pod) {
continue
}
active++
// count missingReplicas
if subsetStatus.MissingReplicas > 0 {
subsetStatus.MissingReplicas--
}
// some Pods in oldDeletingPods map, which records Pods we want to delete by webhook.
if deleteTime, exist := oldDeletingPods[pod.Name]; exist {
expectedDeletion := deleteTime.Time.Add(DeletePodTimeout)
// deleted this Pod timeout, so we consider removing it from oldDeletingPods map, which means deleted failed.
if expectedDeletion.Before(currentTime) {
r.recorder.Eventf(ws, corev1.EventTypeWarning,
"DeletePodFailed", "Pod %s/%s was expected to be deleted but it wasn't", ws.Namespace, pod.Name)
} else {
// no timeout, there may be some latency, to restore it into deletingPods map.
subsetStatus.DeletingPods[pod.Name] = deleteTime
// missingReplicas + 1, suppose it has been deleted
if subsetStatus.MissingReplicas < int32(subsetMaxReplicas) {
subsetStatus.MissingReplicas++
}
// requeue key in order to clean it from map when expectedDeletion is equal to currentTime.
durationStore.Push(getWorkloadSpreadKey(ws), expectedDeletion.Sub(currentTime))
}
}
}
// record active replicas number
subsetStatus.Replicas = active
// oldCreatingPods has remaining Pods that not be found by controller.
for podID, createTime := range oldCreatingPods {
expectedCreation := createTime.Time.Add(CreatPodTimeout)
// created this Pod timeout
if expectedCreation.Before(currentTime) {
r.recorder.Eventf(ws, corev1.EventTypeWarning,
"CreatePodFailed", "Pod %s/%s was expected to be created but it wasn't", ws.Namespace, podID)
} else {
// no timeout, to restore it into creatingPods map.
subsetStatus.CreatingPods[podID] = createTime
// missingReplicas - 1, suppose it has been created
if subsetStatus.MissingReplicas > 0 {
subsetStatus.MissingReplicas--
}
// requeue key when expectedCreation is equal to currentTime.
durationStore.Push(getWorkloadSpreadKey(ws), expectedCreation.Sub(currentTime))
}
}
return subsetStatus
}
func (r *ReconcileWorkloadSpread) UpdateWorkloadSpreadStatus(ws *appsv1alpha1.WorkloadSpread,
status *appsv1alpha1.WorkloadSpreadStatus) error {
if apiequality.Semantic.DeepEqual(status, ws.Status) {
return nil
}
clone := ws.DeepCopy()
clone.Status = *status
err := r.writeWorkloadSpreadStatus(clone)
logStatusChanges(ws, status, err)
return err
}
func logStatusChanges(ws *appsv1alpha1.WorkloadSpread, status *appsv1alpha1.WorkloadSpreadStatus, err error) {
if err != nil {
klog.ErrorS(err, "Failed to update WorkloadSpread status", "workloadSpread", klog.KObj(ws), "status", status)
return
}
oldSubsetStatuses := ws.Status.SubsetStatuses
oldSubsetStatusMap := make(map[string]*appsv1alpha1.WorkloadSpreadSubsetStatus, len(oldSubsetStatuses))
for i := range oldSubsetStatuses {
oldSubsetStatusMap[oldSubsetStatuses[i].Name] = &oldSubsetStatuses[i]
}
var log string
for i, subset := range ws.Spec.Subsets {
oldStatus, ok := oldSubsetStatusMap[subset.Name]
if !ok {
oldStatus = &appsv1alpha1.WorkloadSpreadSubsetStatus{
Name: subset.Name,
}
}
newStatus := status.SubsetStatuses[i]
log = fmt.Sprintf(" (<subset name: %s>", subset.Name)
if oldStatus.Replicas != newStatus.Replicas {
log += fmt.Sprintf(" <Replicas: %d -> %d>", oldStatus.Replicas, newStatus.Replicas)
} else {
log += fmt.Sprintf(" <Replicas: %d>", newStatus.Replicas)
}
if oldStatus.MissingReplicas != newStatus.MissingReplicas {
log += fmt.Sprintf(" <missingReplicas: %d -> %d>", oldStatus.MissingReplicas, newStatus.MissingReplicas)
} else {
log += fmt.Sprintf(" <missingReplicas: %d>", newStatus.MissingReplicas)
}
if len(oldStatus.CreatingPods) != len(newStatus.CreatingPods) {
log += fmt.Sprintf(" <creatingPods length: %d -> %d>", len(oldStatus.CreatingPods), len(newStatus.CreatingPods))
} else {
log += fmt.Sprintf(" <creatingPods length: %d>", len(newStatus.CreatingPods))
}
if len(oldStatus.DeletingPods) != len(newStatus.DeletingPods) {
log += fmt.Sprintf(" <deletingPods length: %d -> %d>", len(oldStatus.DeletingPods), len(newStatus.DeletingPods))
} else {
log += fmt.Sprintf(" <deletingPods length: %d>", len(newStatus.DeletingPods))
}
log += ")"
}
klog.V(3).InfoS("WorkloadSpread status changed", "workloadSpread", klog.KObj(ws), "details", log)
}
func (r *ReconcileWorkloadSpread) writeWorkloadSpreadStatus(ws *appsv1alpha1.WorkloadSpread) error {
unlock := util.GlobalKeyedMutex.Lock(string(ws.GetUID()))
defer unlock()
// If this update fails, don't retry it. Allow the failure to get handled &
// retried in `processNextWorkItem()`.
err := r.Status().Update(context.TODO(), ws)
if err == nil {
if cacheErr := util.GlobalCache.Add(ws); cacheErr != nil {
klog.ErrorS(cacheErr, "Failed to update WorkloadSpread cache after update status", "workloadSpread", klog.KObj(ws))
}
}
return err
}
func getWorkloadSpreadKey(o metav1.Object) string {
return o.GetNamespace() + "/" + o.GetName()
}
func isNotMatchedWS(injectWS *wsutil.InjectWorkloadSpread, ws *appsv1alpha1.WorkloadSpread) bool {
if injectWS == nil || injectWS.Name != ws.Name || injectWS.Subset == "" {
return true
}
return false
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"encoding/json"
"reflect"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/strategicpatch"
schedulecorev1 "k8s.io/component-helpers/scheduling/corev1"
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
"k8s.io/klog/v2"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
)
// NewWorkloadSpreadSubsetCondition creates a new WorkloadSpreadSubset condition.
func NewWorkloadSpreadSubsetCondition(condType appsv1alpha1.WorkloadSpreadSubsetConditionType, status corev1.ConditionStatus, reason, message string) *appsv1alpha1.WorkloadSpreadSubsetCondition {
return &appsv1alpha1.WorkloadSpreadSubsetCondition{
Type: condType,
Status: status,
LastTransitionTime: metav1.Now(),
Reason: reason,
Message: message,
}
}
// GetWorkloadSpreadSubsetCondition returns the condition with the provided type.
func GetWorkloadSpreadSubsetCondition(status *appsv1alpha1.WorkloadSpreadSubsetStatus, condType appsv1alpha1.WorkloadSpreadSubsetConditionType) *appsv1alpha1.WorkloadSpreadSubsetCondition {
if status == nil {
return nil
}
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
return &c
}
}
return nil
}
// setWorkloadSpreadSubsetCondition updates the WorkloadSpreadSubset to include the provided condition. If the condition that
// we are about to add already exists and has the same status, reason and message then we are not going to update.
func setWorkloadSpreadSubsetCondition(status *appsv1alpha1.WorkloadSpreadSubsetStatus, condition *appsv1alpha1.WorkloadSpreadSubsetCondition) {
if condition == nil {
return
}
currentCond := GetWorkloadSpreadSubsetCondition(status, condition.Type)
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
return
}
if currentCond != nil && currentCond.Status == condition.Status {
condition.LastTransitionTime = currentCond.LastTransitionTime
}
newConditions := filterOutCondition(status.Conditions, condition.Type)
status.Conditions = append(newConditions, *condition)
}
// removeWorkloadSpreadSubsetCondition removes the WorkloadSpreadSubset condition with the provided type.
func removeWorkloadSpreadSubsetCondition(status *appsv1alpha1.WorkloadSpreadSubsetStatus, condType appsv1alpha1.WorkloadSpreadSubsetConditionType) {
status.Conditions = filterOutCondition(status.Conditions, condType)
}
func filterOutCondition(conditions []appsv1alpha1.WorkloadSpreadSubsetCondition, condType appsv1alpha1.WorkloadSpreadSubsetConditionType) []appsv1alpha1.WorkloadSpreadSubsetCondition {
var newConditions []appsv1alpha1.WorkloadSpreadSubsetCondition
for _, c := range conditions {
if c.Type == condType {
continue
}
newConditions = append(newConditions, c)
}
return newConditions
}
func matchesSubset(pod *corev1.Pod, node *corev1.Node, subset *appsv1alpha1.WorkloadSpreadSubset, missingReplicas int) (bool, int64, error) {
// necessary condition
matched, err := matchesSubsetRequiredAndToleration(pod, node, subset)
if err != nil || !matched {
return false, -1, err
}
// preferredNodeScore is in [0, total_prefer_weight]
preferredNodeScore := int64(0)
if subset.PreferredNodeSelectorTerms != nil {
nodePreferredTerms, _ := nodeaffinity.NewPreferredSchedulingTerms(subset.PreferredNodeSelectorTerms)
preferredNodeScore = nodePreferredTerms.Score(node)
}
// preferredPodScore is in [0, 1]
preferredPodScore := int64(0)
if subset.Patch.Raw != nil {
preferredPodScore = podPreferredScore(subset, pod)
}
// we prefer the subset that still has room for more replicas
quotaScore := int64(0)
if missingReplicas > 0 {
quotaScore = int64(1)
}
// preferredPodScore is in [0, 1], so it cannot affect preferredNodeScore in the following expression
preferredScore := preferredNodeScore*100 + preferredPodScore*10 + quotaScore
return matched, preferredScore, nil
}
func podPreferredScore(subset *appsv1alpha1.WorkloadSpreadSubset, pod *corev1.Pod) int64 {
podBytes, _ := json.Marshal(pod)
modified, err := strategicpatch.StrategicMergePatch(podBytes, subset.Patch.Raw, &corev1.Pod{})
if err != nil {
klog.ErrorS(err, "Failed to merge patch raw for pod and subset", "pod", klog.KObj(pod), "subsetName", subset.Name)
return 0
}
patchedPod := &corev1.Pod{}
err = json.Unmarshal(modified, patchedPod)
if err != nil {
klog.ErrorS(err, "Failed to unmarshal for pod and subset", "pod", klog.KObj(pod), "subsetName", subset.Name)
return 0
}
// TODO: consider json annotation just like `{"json_key": ["value1", "value2"]}`.
// currently, we exclude annotations field because annotation may contain some filed we cannot handle.
// For example, we cannot judge whether the following two annotations are equal via DeepEqual method:
// example.com/list: '["a", "b", "c"]'
// example.com/list: '["b", "a", "c"]'
patchedPod.Annotations = pod.Annotations
if reflect.DeepEqual(pod, patchedPod) {
return 1
}
return 0
}
func matchesSubsetRequiredAndToleration(pod *corev1.Pod, node *corev1.Node, subset *appsv1alpha1.WorkloadSpreadSubset) (bool, error) {
// check toleration
tolerations := append(pod.Spec.Tolerations, subset.Tolerations...)
if _, hasUntoleratedTaint := schedulecorev1.FindMatchingUntoleratedTaint(node.Spec.Taints, tolerations, nil); hasUntoleratedTaint {
return false, nil
}
if subset.RequiredNodeSelectorTerm == nil {
return true, nil
}
// check nodeSelectorTerm
var nodeSelectorTerms []corev1.NodeSelectorTerm
if pod.Spec.Affinity != nil {
if pod.Spec.Affinity.NodeAffinity != nil {
if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms = append(nodeSelectorTerms, pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...)
}
}
}
if subset.RequiredNodeSelectorTerm != nil {
if len(nodeSelectorTerms) == 0 {
nodeSelectorTerms = []corev1.NodeSelectorTerm{
*subset.RequiredNodeSelectorTerm,
}
} else {
for i := range nodeSelectorTerms {
selectorTerm := &nodeSelectorTerms[i]
selectorTerm.MatchExpressions = append(selectorTerm.MatchExpressions, subset.RequiredNodeSelectorTerm.MatchExpressions...)
selectorTerm.MatchFields = append(selectorTerm.MatchFields, subset.RequiredNodeSelectorTerm.MatchFields...)
}
}
}
return schedulecorev1.MatchNodeSelectorTerms(node, &corev1.NodeSelector{
NodeSelectorTerms: nodeSelectorTerms,
})
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"context"
"encoding/json"
"reflect"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
appsv1beta1 "github.com/openkruise/kruise/apis/apps/v1beta1"
wsutil "github.com/openkruise/kruise/pkg/util/workloadspread"
)
type EventAction string
const (
CreateEventAction EventAction = "Create"
UpdateEventAction EventAction = "Update"
DeleteEventAction EventAction = "Delete"
DeploymentRevisionAnnotation = "deployment.kubernetes.io/revision"
)
var _ handler.TypedEventHandler[*corev1.Pod] = &podEventHandler{}
type podEventHandler struct{}
func (p *podEventHandler) Create(ctx context.Context, evt event.TypedCreateEvent[*corev1.Pod], q workqueue.RateLimitingInterface) {
p.handlePod(q, evt.Object, CreateEventAction)
}
func (p *podEventHandler) Update(ctx context.Context, evt event.TypedUpdateEvent[*corev1.Pod], q workqueue.RateLimitingInterface) {
oldPod := evt.ObjectOld
newPod := evt.ObjectNew
if kubecontroller.IsPodActive(oldPod) && !kubecontroller.IsPodActive(newPod) || wsutil.GetPodVersion(oldPod) != wsutil.GetPodVersion(newPod) {
p.handlePod(q, newPod, UpdateEventAction)
}
}
func (p *podEventHandler) Delete(ctx context.Context, evt event.TypedDeleteEvent[*corev1.Pod], q workqueue.RateLimitingInterface) {
p.handlePod(q, evt.Object, DeleteEventAction)
}
func (p *podEventHandler) Generic(ctx context.Context, evt event.TypedGenericEvent[*corev1.Pod], q workqueue.RateLimitingInterface) {
}
func (p *podEventHandler) handlePod(q workqueue.RateLimitingInterface, obj runtime.Object, action EventAction) {
pod := obj.(*corev1.Pod)
if value, exist := pod.GetAnnotations()[wsutil.MatchedWorkloadSpreadSubsetAnnotations]; exist {
injectWorkloadSpread := &wsutil.InjectWorkloadSpread{}
if err := json.Unmarshal([]byte(value), injectWorkloadSpread); err != nil {
klog.ErrorS(err, "Failed to unmarshal JSON to WorkloadSpread", "JSON", value)
return
}
nsn := types.NamespacedName{Namespace: pod.GetNamespace(), Name: injectWorkloadSpread.Name}
klog.V(5).InfoS("Handle Pod and reconcile WorkloadSpread",
"action", action, "pod", klog.KObj(pod), "workloadSpread", nsn)
q.Add(reconcile.Request{NamespacedName: nsn})
}
}
var _ handler.EventHandler = &workloadEventHandler{}
type workloadEventHandler struct {
client.Reader
}
func (w workloadEventHandler) Create(ctx context.Context, evt event.CreateEvent, q workqueue.RateLimitingInterface) {
w.handleWorkload(q, evt.Object, CreateEventAction)
}
func (w workloadEventHandler) Update(ctx context.Context, evt event.UpdateEvent, q workqueue.RateLimitingInterface) {
var gvk schema.GroupVersionKind
var oldReplicas int32
var newReplicas int32
var otherChanges bool
switch evt.ObjectNew.(type) {
case *appsv1alpha1.CloneSet:
oldObject := evt.ObjectOld.(*appsv1alpha1.CloneSet)
newObject := evt.ObjectNew.(*appsv1alpha1.CloneSet)
oldReplicas = *oldObject.Spec.Replicas
newReplicas = *newObject.Spec.Replicas
otherChanges = newObject.Status.UpdateRevision != oldObject.Status.CurrentRevision
gvk = controllerKruiseKindCS
case *appsv1.Deployment:
oldObject := evt.ObjectOld.(*appsv1.Deployment)
newObject := evt.ObjectNew.(*appsv1.Deployment)
oldReplicas = *oldObject.Spec.Replicas
newReplicas = *newObject.Spec.Replicas
otherChanges = newObject.Annotations[DeploymentRevisionAnnotation] != oldObject.Annotations[DeploymentRevisionAnnotation]
gvk = controllerKindDep
case *appsv1.ReplicaSet:
oldReplicas = *evt.ObjectOld.(*appsv1.ReplicaSet).Spec.Replicas
newReplicas = *evt.ObjectNew.(*appsv1.ReplicaSet).Spec.Replicas
gvk = controllerKindRS
case *batchv1.Job:
oldReplicas = *evt.ObjectOld.(*batchv1.Job).Spec.Parallelism
newReplicas = *evt.ObjectNew.(*batchv1.Job).Spec.Parallelism
gvk = controllerKindJob
case *appsv1.StatefulSet:
oldReplicas = *evt.ObjectOld.(*appsv1.StatefulSet).Spec.Replicas
newReplicas = *evt.ObjectNew.(*appsv1.StatefulSet).Spec.Replicas
gvk = controllerKindSts
case *appsv1beta1.StatefulSet:
oldReplicas = *evt.ObjectOld.(*appsv1beta1.StatefulSet).Spec.Replicas
newReplicas = *evt.ObjectNew.(*appsv1beta1.StatefulSet).Spec.Replicas
gvk = controllerKruiseKindSts
case *unstructured.Unstructured:
oldReplicas = wsutil.GetReplicasFromCustomWorkload(w.Reader, evt.ObjectOld.(*unstructured.Unstructured))
newReplicas = wsutil.GetReplicasFromCustomWorkload(w.Reader, evt.ObjectNew.(*unstructured.Unstructured))
gvk = evt.ObjectNew.(*unstructured.Unstructured).GroupVersionKind()
default:
return
}
// workload replicas changed, and reconcile corresponding WorkloadSpread
if oldReplicas != newReplicas || otherChanges {
workloadNsn := types.NamespacedName{
Namespace: evt.ObjectNew.GetNamespace(),
Name: evt.ObjectNew.GetName(),
}
owner := metav1.GetControllerOfNoCopy(evt.ObjectNew)
ws, err := w.getWorkloadSpreadForWorkload(workloadNsn, gvk, owner)
if err != nil {
klog.ErrorS(err, "Unable to get WorkloadSpread related with resource kind",
"kind", gvk.Kind, "workload", workloadNsn)
return
}
if ws != nil {
klog.V(3).InfoS("Workload changed replicas managed by WorkloadSpread",
"kind", gvk.Kind, "workload", workloadNsn, "oldReplicas", oldReplicas, "newReplicas", newReplicas, "workloadSpread", klog.KObj(ws))
nsn := types.NamespacedName{Namespace: ws.GetNamespace(), Name: ws.GetName()}
q.Add(reconcile.Request{NamespacedName: nsn})
}
}
}
func (w workloadEventHandler) Delete(ctx context.Context, evt event.DeleteEvent, q workqueue.RateLimitingInterface) {
w.handleWorkload(q, evt.Object, DeleteEventAction)
}
func (w workloadEventHandler) Generic(ctx context.Context, evt event.GenericEvent, q workqueue.RateLimitingInterface) {
}
func (w *workloadEventHandler) handleWorkload(q workqueue.RateLimitingInterface,
obj client.Object, action EventAction) {
var gvk schema.GroupVersionKind
switch obj.(type) {
case *appsv1alpha1.CloneSet:
gvk = controllerKruiseKindCS
case *appsv1.Deployment:
gvk = controllerKindDep
case *appsv1.ReplicaSet:
gvk = controllerKindRS
case *batchv1.Job:
gvk = controllerKindJob
case *appsv1.StatefulSet:
gvk = controllerKindSts
case *appsv1beta1.StatefulSet:
gvk = controllerKruiseKindSts
default:
return
}
workloadNsn := types.NamespacedName{
Namespace: obj.GetNamespace(),
Name: obj.GetName(),
}
owner := metav1.GetControllerOfNoCopy(obj)
ws, err := w.getWorkloadSpreadForWorkload(workloadNsn, gvk, owner)
if err != nil {
klog.ErrorS(err, "Unable to get WorkloadSpread related with workload",
"kind", gvk.Kind, "workload", workloadNsn)
return
}
if ws != nil {
klog.V(5).InfoS("Handle workload and reconcile WorkloadSpread",
"action", action, "kind", gvk.Kind, "workload", workloadNsn, "workloadSpread", klog.KObj(ws))
nsn := types.NamespacedName{Namespace: ws.GetNamespace(), Name: ws.GetName()}
q.Add(reconcile.Request{NamespacedName: nsn})
}
}
func (w *workloadEventHandler) getWorkloadSpreadForWorkload(
workloadNamespaceName types.NamespacedName,
gvk schema.GroupVersionKind, ownerRef *metav1.OwnerReference) (*appsv1alpha1.WorkloadSpread, error) {
wsList := &appsv1alpha1.WorkloadSpreadList{}
listOptions := &client.ListOptions{Namespace: workloadNamespaceName.Namespace}
if err := w.List(context.TODO(), wsList, listOptions); err != nil {
klog.ErrorS(err, "Failed to list WorkloadSpread", "namespace", workloadNamespaceName.Namespace)
return nil, err
}
// In case of ReplicaSet owned by Deployment, we should consider if the
// Deployment is referred by workloadSpread.
var ownerKey *types.NamespacedName
var ownerGvk schema.GroupVersionKind
if ownerRef != nil && reflect.DeepEqual(gvk, controllerKindRS) {
ownerGvk = schema.FromAPIVersionAndKind(ownerRef.APIVersion, ownerRef.Kind)
if reflect.DeepEqual(ownerGvk, controllerKindDep) {
ownerKey = &types.NamespacedName{Namespace: workloadNamespaceName.Namespace, Name: ownerRef.Name}
}
}
for _, ws := range wsList.Items {
if ws.DeletionTimestamp != nil {
continue
}
targetRef := ws.Spec.TargetReference
if targetRef == nil {
continue
}
// Ignore version
targetGk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind).GroupKind()
if reflect.DeepEqual(targetGk, gvk.GroupKind()) && targetRef.Name == workloadNamespaceName.Name {
return &ws, nil
}
if ownerKey != nil && reflect.DeepEqual(targetGk, ownerGvk.GroupKind()) && targetRef.Name == ownerKey.Name {
return &ws, nil
}
}
return nil, nil
}
/*
Copyright 2024 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"errors"
"fmt"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/labels"
intstrutil "k8s.io/apimachinery/pkg/util/intstr"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
)
func hasPercentSubset(ws *appsv1alpha1.WorkloadSpread) (has bool) {
if ws == nil {
return false
}
for _, subset := range ws.Spec.Subsets {
if subset.MaxReplicas != nil && subset.MaxReplicas.Type == intstrutil.String &&
strings.HasSuffix(subset.MaxReplicas.StrVal, "%") {
return true
}
}
return false
}
func NestedField[T any](obj any, paths ...string) (T, bool, error) {
if len(paths) == 0 {
val, ok := obj.(T)
if !ok {
return *new(T), false, errors.New("object type error")
}
return val, true, nil
}
if o, ok := obj.(map[string]any); ok {
return nestedMap[T](o, paths...)
}
if o, ok := obj.([]any); ok {
return nestedSlice[T](o, paths...)
}
return *new(T), false, errors.New("object is not deep enough")
}
func nestedSlice[T any](obj []any, paths ...string) (T, bool, error) {
idx, err := strconv.Atoi(paths[0])
if err != nil {
return *new(T), false, err
}
if idx < 0 || len(obj) <= idx {
return *new(T), false, fmt.Errorf("index %d out of range", idx)
}
return NestedField[T](obj[idx], paths[1:]...)
}
func nestedMap[T any](obj map[string]any, paths ...string) (T, bool, error) {
if val, ok := obj[paths[0]]; ok {
return NestedField[T](val, paths[1:]...)
} else {
return *new(T), false, fmt.Errorf("path \"%s\" not exists", paths[0])
}
}
func IsPodSelected(filter *appsv1alpha1.TargetFilter, podLabels map[string]string) (bool, error) {
if filter == nil {
return true, nil
}
selector, err := util.ValidatedLabelSelectorAsSelector(filter.Selector)
if err != nil {
return false, err
}
return selector.Matches(labels.Set(podLabels)), nil
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workloadspread
import (
"context"
"encoding/json"
"flag"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
intstrutil "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/client"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
appsv1beta1 "github.com/openkruise/kruise/apis/apps/v1beta1"
kubeClient "github.com/openkruise/kruise/pkg/client"
"github.com/openkruise/kruise/pkg/controller/cloneset/utils"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
"github.com/openkruise/kruise/pkg/util/configuration"
)
const (
// MatchedWorkloadSpreadSubsetAnnotations matched pod workloadSpread
MatchedWorkloadSpreadSubsetAnnotations = "apps.kruise.io/matched-workloadspread"
PodDeletionCostAnnotation = "controller.kubernetes.io/pod-deletion-cost"
PodDeletionCostPositive = 100
PodDeletionCostNegative = -100
// VersionIgnored means all Pods should be regard as a universal version,
// i.e., ignore the Pod/Workload version.
VersionIgnored = "versionIgnored"
)
var (
controllerKruiseKindCS = appsv1alpha1.SchemeGroupVersion.WithKind("CloneSet")
controllerKruiseKindAlphaSts = appsv1alpha1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKruiseKindBetaSts = appsv1beta1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKindJob = batchv1.SchemeGroupVersion.WithKind("Job")
controllerKindRS = appsv1.SchemeGroupVersion.WithKind("ReplicaSet")
controllerKindDep = appsv1.SchemeGroupVersion.WithKind("Deployment")
controllerKindSts = appsv1.SchemeGroupVersion.WithKind("StatefulSet")
enabledWorkloadStrForVersionedStatus = "deployment,replicaset"
EnabledWorkloadSetForVersionedStatus sets.String
GenerateNotFoundError = func(object client.Object, msg string) error {
objectGroupKind := object.GetObjectKind().GroupVersionKind().GroupKind()
return errors.NewNotFound(schema.GroupResource{Group: objectGroupKind.Group, Resource: objectGroupKind.Kind}, msg)
}
)
func init() {
flag.StringVar(&enabledWorkloadStrForVersionedStatus, "ws-enabled-versioned-status", enabledWorkloadStrForVersionedStatus, "Enabled workload that uses versioned subset status of WorkloadSpread.")
enabledWorkloadStrForVersionedStatus = strings.ToLower(enabledWorkloadStrForVersionedStatus)
EnabledWorkloadSetForVersionedStatus = sets.NewString(strings.Split(enabledWorkloadStrForVersionedStatus, ",")...)
if EnabledWorkloadSetForVersionedStatus.Has("deployment") {
EnabledWorkloadSetForVersionedStatus.Insert("replicaset")
}
}
type Operation string
const (
CreateOperation Operation = "Create"
DeleteOperation Operation = "Delete"
EvictionOperation Operation = "Eviction"
)
type workload struct {
Kind string
Groups []string
}
var (
workloads = []workload{
{Kind: controllerKindDep.Kind, Groups: []string{controllerKindDep.Group}},
{Kind: controllerKruiseKindCS.Kind, Groups: []string{controllerKruiseKindCS.Group}},
{Kind: controllerKindRS.Kind, Groups: []string{controllerKindRS.Group}},
{Kind: controllerKindJob.Kind, Groups: []string{controllerKindJob.Group}},
{Kind: controllerKindSts.Kind, Groups: []string{controllerKindSts.Group, controllerKruiseKindAlphaSts.Group, controllerKruiseKindBetaSts.Group}},
}
workloadsInWhiteListInitialized = false
)
type Handler struct {
client.Client
}
func NewWorkloadSpreadHandler(c client.Client) *Handler {
return &Handler{Client: c}
}
type InjectWorkloadSpread struct {
// matched WorkloadSpread.Name
Name string `json:"name"`
// Subset.Name
Subset string `json:"subset"`
// generate id if the Pod's name is nil.
UID string `json:"uid,omitempty"`
}
func VerifyGroupKind(ref interface{}, expectedKind string, expectedGroups []string) (bool, error) {
var gv schema.GroupVersion
var kind string
var err error
switch ref.(type) {
case *appsv1alpha1.TargetReference:
gv, err = schema.ParseGroupVersion(ref.(*appsv1alpha1.TargetReference).APIVersion)
if err != nil {
klog.ErrorS(err, "failed to parse GroupVersion for apiVersion", "apiVersion", ref.(*appsv1alpha1.TargetReference).APIVersion)
return false, err
}
kind = ref.(*appsv1alpha1.TargetReference).Kind
case *metav1.OwnerReference:
gv, err = schema.ParseGroupVersion(ref.(*metav1.OwnerReference).APIVersion)
if err != nil {
klog.ErrorS(err, "failed to parse GroupVersion for apiVersion", "apiVersion", ref.(*metav1.OwnerReference).APIVersion)
return false, err
}
kind = ref.(*metav1.OwnerReference).Kind
default:
return false, nil
}
if kind != expectedKind {
return false, nil
}
for _, group := range expectedGroups {
if group == gv.Group {
return true, nil
}
}
return false, nil
}
// matchReference return true if Pod has ownerReference matched workloads.
func matchReference(ref *metav1.OwnerReference) (bool, error) {
if ref == nil {
return false, nil
}
for _, wl := range workloads {
matched, err := VerifyGroupKind(ref, wl.Kind, wl.Groups)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}
// TODO consider pod/status update operation
func (h *Handler) HandlePodCreation(pod *corev1.Pod) (skip bool, err error) {
start := time.Now()
// filter out pods, include the following:
// 1. Deletion pod
// 2. Pod.Status.Phase = Succeeded or Failed
// 3. Pod.OwnerReference is nil
// 4. Pod.OwnerReference is not one of workloads, such as CloneSet, Deployment, ReplicaSet.
if !kubecontroller.IsPodActive(pod) {
return true, nil
}
ref := metav1.GetControllerOf(pod)
if ref == nil {
return true, nil
}
initializeWorkloadsInWhiteList(h.Client)
matched, err := matchReference(ref)
if err != nil || !matched {
return true, nil
}
var matchedWS *appsv1alpha1.WorkloadSpread
workloadSpreadList := &appsv1alpha1.WorkloadSpreadList{}
if err = h.Client.List(context.TODO(), workloadSpreadList, &client.ListOptions{Namespace: pod.Namespace}); err != nil {
return false, err
}
for _, ws := range workloadSpreadList.Items {
if ws.Spec.TargetReference == nil || !ws.DeletionTimestamp.IsZero() {
continue
}
// determine if the reference of workloadSpread and pod is equal
referenceEqual, err := h.isReferenceEqual(ws.Spec.TargetReference, ref, pod.GetNamespace())
if err != nil {
klog.ErrorS(err, "failed to determine whether workloadspread refers pod's owner",
"pod", klog.KObj(pod), "workloadspread", klog.KObj(&ws))
if errors.IsNotFound(err) {
return true, err
}
continue
}
selected, err := IsPodSelected(ws.Spec.TargetFilter, pod.GetLabels())
if err != nil {
klog.ErrorS(err, "failed to determine whether workloadspread selects pod",
"pod", klog.KObj(pod), "workloadspread", klog.KObj(&ws))
continue
}
if referenceEqual && selected {
matchedWS = &ws
// pod has at most one matched workloadSpread
break
}
}
// not found matched workloadSpread
if matchedWS == nil {
return true, nil
}
defer func() {
klog.V(3).InfoS("Cost of handling pod creation by WorkloadSpread",
"namespace", matchedWS.Namespace, "name", matchedWS.Name, "cost", time.Since(start))
}()
return false, h.mutatingPod(matchedWS, pod, nil, CreateOperation)
}
func (h *Handler) HandlePodDeletion(pod *corev1.Pod, operation Operation) error {
start := time.Now()
var injectWS *InjectWorkloadSpread
str, ok := pod.Annotations[MatchedWorkloadSpreadSubsetAnnotations]
if !ok || str == "" {
return nil
}
err := json.Unmarshal([]byte(str), &injectWS)
if err != nil {
klog.ErrorS(err, "parse Pod annotations failed", "namespace", pod.Namespace, "name", pod.Name,
"key", MatchedWorkloadSpreadSubsetAnnotations, "value", str)
return nil
}
// filter out pods, include the following:
// 1. DeletionTimestamp is not nil
// 2. Pod.Status.Phase = Succeeded or Failed
// 3. Pod.OwnerReference is nil
if injectWS == nil || !kubecontroller.IsPodActive(pod) || metav1.GetControllerOf(pod) == nil {
return nil
}
matchedWS := &appsv1alpha1.WorkloadSpread{}
err = h.Client.Get(context.TODO(), client.ObjectKey{Namespace: pod.Namespace, Name: injectWS.Name}, matchedWS)
if err != nil {
if errors.IsNotFound(err) {
klog.InfoS("Pod matched WorkloadSpread Not Found", "namespace", pod.Namespace, "name", pod.Name, "workloadSpread", injectWS.Name)
return nil
}
klog.ErrorS(err, "get pod matched workloadSpread failed", "namespace", pod.Namespace, "name", pod.Name, "workloadSpread", injectWS.Name)
return err
}
defer func() {
klog.V(3).InfoS("Cost of handling pod deletion by WorkloadSpread",
"namespace", matchedWS.Namespace, "name", matchedWS.Name, "cost", time.Since(start))
}()
return h.mutatingPod(matchedWS, pod, injectWS, operation)
}
func (h *Handler) mutatingPod(matchedWS *appsv1alpha1.WorkloadSpread,
pod *corev1.Pod,
injectWS *InjectWorkloadSpread,
operation Operation) error {
podName := pod.Name
if podName == "" {
podName = pod.GetGenerateName()
}
klog.V(3).InfoS("Operation Pod matched WorkloadSpread", "operation", operation, "podNs", pod.Namespace, "podName", podName, "wsNs", matchedWS.Namespace, "wsName", matchedWS.Name)
suitableSubsetName, generatedUID, err := h.acquireSuitableSubset(matchedWS, pod, injectWS, operation)
if err != nil {
return err
}
var injectErr error
// if create pod, inject affinity、toleration、metadata in pod object
if operation == CreateOperation && len(suitableSubsetName) > 0 {
if _, injectErr = injectWorkloadSpreadIntoPod(matchedWS, pod, suitableSubsetName, generatedUID); injectErr != nil {
klog.InfoS("failed to inject Pod subset data for WorkloadSpread",
"podNs", pod.Namespace, "podName", podName, "suitableSubsetName", suitableSubsetName, "wsNs", matchedWS.Namespace, "wsName", matchedWS.Name)
return injectErr
}
klog.V(3).InfoS("inject Pod subset data for WorkloadSpread",
"podNs", pod.Namespace, "podName", podName, "suitableSubsetName", suitableSubsetName, "wsNs", matchedWS.Namespace, "wsName", matchedWS.Name)
}
klog.V(3).InfoS("handler operation Pod generatedUID for WorkloadSpread done",
"operation", operation, "podNs", pod.Namespace, "podName", podName, "generatedUID", generatedUID, "wsNs", matchedWS.Namespace, "wsName", matchedWS.Name)
return injectErr
}
func (h *Handler) acquireSuitableSubset(matchedWS *appsv1alpha1.WorkloadSpread,
pod *corev1.Pod,
injectWS *InjectWorkloadSpread,
operation Operation) (string, string, error) {
if len(matchedWS.Spec.Subsets) == 1 &&
matchedWS.Spec.Subsets[0].MaxReplicas == nil {
return matchedWS.Spec.Subsets[0].Name, "", nil
}
var refresh, changed bool
var wsClone *appsv1alpha1.WorkloadSpread
var suitableSubset *appsv1alpha1.WorkloadSpreadSubsetStatus
var generatedUID, suitableSubsetName string
// for debug
var conflictTimes int
var costOfGet, costOfUpdate time.Duration
switch matchedWS.Spec.TargetReference.Kind {
case controllerKindSts.Kind:
// StatefulSet has special logic about pod assignment for subsets.
// For example, suppose that we have the following sub sets config:
// - name: subset-a
// maxReplicas: 5
// - name: subset-b
// maxReplicas: 5
// - name: subset-c
// the pods with order within [0, 5) will be assigned to subset-a;
// the pods with order within [5, 10) will be assigned to subset-b;
// the pods with order within [10, inf) will be assigned to subset-c.
currentThresholdID := int64(0)
for _, subset := range matchedWS.Spec.Subsets {
cond := getSubsetCondition(matchedWS, subset.Name, appsv1alpha1.SubsetSchedulable)
if cond != nil && cond.Status == corev1.ConditionFalse {
continue
}
subsetReplicasLimit := math.MaxInt32
if subset.MaxReplicas != nil {
subsetReplicasLimit = subset.MaxReplicas.IntValue()
}
// currently, we do not support reserveOrdinals feature for advanced statefulSet
currentThresholdID += int64(subsetReplicasLimit)
_, orderID := getParentNameAndOrdinal(pod)
if int64(orderID) < currentThresholdID {
suitableSubsetName = subset.Name
break
}
}
default:
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
unlock := util.GlobalKeyedMutex.Lock(string(matchedWS.GetUID()))
defer unlock()
var err error
startGet := time.Now()
// try best to get the latest revision of matchedWS in a low cost:
// 1. get two matchedWS, one is cached by this webhook process,
// another is cached by informer, compare and get the newer one;
// 2. if 1. failed, directly fetch matchedWS from APIServer;
wsClone, err = h.tryToGetTheLatestMatchedWS(matchedWS, refresh)
costOfGet += time.Since(startGet)
if err != nil {
return err
} else if wsClone == nil {
return nil
}
// check whether WorkloadSpread has suitable subset for the pod
// 1. changed indicates whether workloadSpread status changed
// 2. suitableSubset is matched subset for the pod
changed, suitableSubset, generatedUID, err = h.updateSubsetForPod(wsClone, pod, injectWS, operation)
if !changed || err != nil {
return err
}
// update WorkloadSpread status
start := time.Now()
if err = h.Client.Status().Update(context.TODO(), wsClone); err != nil {
refresh = true
conflictTimes++
} else {
klog.V(3).InfoS("update WorkloadSpread success",
"namespace", wsClone.Namespace, "name", wsClone.Name, "subsetStatus", suitableSubset.Name,
"missingReplicas", suitableSubset.MissingReplicas, "creatingPods", len(suitableSubset.CreatingPods), "deletingPods", len(suitableSubset.DeletingPods))
if cacheErr := util.GlobalCache.Add(wsClone); cacheErr != nil {
klog.ErrorS(cacheErr, "Failed to update workloadSpread cache after update status", "namespace", wsClone.Namespace, "name", wsClone.Name)
}
}
costOfUpdate += time.Since(start)
return err
}); err != nil {
klog.ErrorS(err, "update WorkloadSpread error", "namespace", matchedWS.Namespace, "name", matchedWS.Name)
return "", "", err
}
}
if suitableSubset != nil {
suitableSubsetName = suitableSubset.Name
}
klog.V(5).InfoS("Cost of assigning suitable subset of WorkloadSpread for pod",
"namespace", matchedWS.Namespace, "name", matchedWS.Name, "conflictTimes", conflictTimes, "costOfGet", costOfGet, "costOfUpdate", costOfUpdate)
return suitableSubsetName, generatedUID, nil
}
func (h *Handler) tryToGetTheLatestMatchedWS(matchedWS *appsv1alpha1.WorkloadSpread, refresh bool) (
*appsv1alpha1.WorkloadSpread, error) {
var err error
var wsClone *appsv1alpha1.WorkloadSpread
if refresh {
// TODO: shall we set metav1.GetOptions{resourceVersion="0"} so that we get the cached object in apiServer memory instead of etcd?
wsClone, err = kubeClient.GetGenericClient().KruiseClient.AppsV1alpha1().
WorkloadSpreads(matchedWS.Namespace).Get(context.TODO(), matchedWS.Name, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
return nil, nil
}
klog.ErrorS(err, "error getting updated WorkloadSpread from APIServer", "namespace", matchedWS.Namespace, "name", matchedWS.Name)
return nil, err
}
} else {
item, _, cacheErr := util.GlobalCache.Get(matchedWS)
if cacheErr != nil {
klog.ErrorS(cacheErr, "Failed to get cached WorkloadSpread from GlobalCache", "namespace", matchedWS.Namespace, "name", matchedWS.Name)
}
if localCachedWS, ok := item.(*appsv1alpha1.WorkloadSpread); ok {
wsClone = localCachedWS.DeepCopy()
} else {
wsClone = matchedWS.DeepCopy()
}
// compare and use the newer version
informerCachedWS := &appsv1alpha1.WorkloadSpread{}
if err = h.Get(context.TODO(), types.NamespacedName{Namespace: matchedWS.Namespace,
Name: matchedWS.Name}, informerCachedWS); err == nil {
// TODO: shall we process the case of that the ResourceVersion exceeds MaxInt64?
var localRV, informerRV int64
_ = runtime.Convert_string_To_int64(&wsClone.ResourceVersion, &localRV, nil)
_ = runtime.Convert_string_To_int64(&informerCachedWS.ResourceVersion, &informerRV, nil)
if localRV < informerRV {
wsClone = informerCachedWS
}
}
}
return wsClone, nil
}
// return three parameters:
// 1. changed(bool) indicates if workloadSpread.Status has changed
// 2. suitableSubset(*struct{}) indicates which workloadSpread.Subset does this pod match
// 3. generatedUID(types.UID) indicates which workloadSpread generate a UID for identifying Pod without a full name.
func (h *Handler) updateSubsetForPod(ws *appsv1alpha1.WorkloadSpread,
pod *corev1.Pod, injectWS *InjectWorkloadSpread, operation Operation) (
bool, *appsv1alpha1.WorkloadSpreadSubsetStatus, string, error) {
var suitableSubset *appsv1alpha1.WorkloadSpreadSubsetStatus
var generatedUID string
// We only care about the corresponding versioned subset status.
var err error
version := GetPodVersion(pod)
subsetStatuses := ws.Status.VersionedSubsetStatuses[version]
if len(subsetStatuses) == 0 {
subsetStatuses, err = h.initializedSubsetStatuses(ws)
if err != nil {
return false, nil, "", err
}
if ws.Status.VersionedSubsetStatuses == nil {
ws.Status.VersionedSubsetStatuses = map[string][]appsv1alpha1.WorkloadSpreadSubsetStatus{}
}
ws.Status.VersionedSubsetStatuses[version] = subsetStatuses
}
switch operation {
case CreateOperation:
if pod.Name != "" {
// pod is already in CreatingPods/DeletingPods List, then return
if isRecord, subset := isPodRecordedInSubset(subsetStatuses, pod.Name); isRecord {
return false, subset, "", nil
}
}
suitableSubset = h.getSuitableSubset(subsetStatuses)
if suitableSubset == nil {
klog.InfoS("WorkloadSpread doesn't have a suitable subset for Pod when creating",
"namespace", ws.Namespace, "wsName", ws.Name, "podName", pod.GetGenerateName())
return false, nil, "", nil
}
// no need to update WorkloadSpread status if MaxReplicas == nil
if suitableSubset.MissingReplicas == -1 {
return false, suitableSubset, "", nil
}
if suitableSubset.CreatingPods == nil {
suitableSubset.CreatingPods = map[string]metav1.Time{}
}
if pod.Name != "" {
suitableSubset.CreatingPods[pod.Name] = metav1.Time{Time: time.Now()}
} else {
// pod.Name is "" means that the Pod does not have a full name, but has a generated name during the mutating phase.
// We generate a uid to identify this Pod.
generatedUID = string(uuid.NewUUID())
suitableSubset.CreatingPods[generatedUID] = metav1.Time{Time: time.Now()}
}
if suitableSubset.MissingReplicas > 0 {
suitableSubset.MissingReplicas--
}
case DeleteOperation, EvictionOperation:
// pod is already in DeletingPods/CreatingPods List, then return
if isRecord, _ := isPodRecordedInSubset(subsetStatuses, pod.Name); isRecord {
return false, nil, "", nil
}
suitableSubset = getSpecificSubset(subsetStatuses, injectWS.Subset)
if suitableSubset == nil {
klog.V(5).InfoS("Pod matched WorkloadSpread not found Subset when deleting",
"namespace", ws.Namespace, "podName", pod.Name, "wsName", ws.Name, "subset", injectWS.Subset)
return false, nil, "", nil
}
if suitableSubset.MissingReplicas == -1 {
return false, suitableSubset, "", nil
}
if suitableSubset.DeletingPods == nil {
suitableSubset.DeletingPods = map[string]metav1.Time{}
}
suitableSubset.DeletingPods[pod.Name] = metav1.Time{Time: time.Now()}
if suitableSubset.MissingReplicas >= 0 {
suitableSubset.MissingReplicas++
}
default:
return false, nil, "", nil
}
// update subset status
for i := range ws.Status.VersionedSubsetStatuses[version] {
if ws.Status.VersionedSubsetStatuses[version][i].Name == suitableSubset.Name {
ws.Status.VersionedSubsetStatuses[version][i] = *suitableSubset
break
}
}
return true, suitableSubset, generatedUID, nil
}
// return two parameters
// 1. isRecord(bool) 2. SubsetStatuses
func isPodRecordedInSubset(subsetStatuses []appsv1alpha1.WorkloadSpreadSubsetStatus, podName string) (bool, *appsv1alpha1.WorkloadSpreadSubsetStatus) {
for _, subset := range subsetStatuses {
if _, ok := subset.CreatingPods[podName]; ok {
return true, &subset
}
if _, ok := subset.DeletingPods[podName]; ok {
return true, &subset
}
}
return false, nil
}
func injectWorkloadSpreadIntoPod(ws *appsv1alpha1.WorkloadSpread, pod *corev1.Pod, subsetName string, generatedUID string) (bool, error) {
var subset *appsv1alpha1.WorkloadSpreadSubset
for _, object := range ws.Spec.Subsets {
if subsetName == object.Name {
subset = &object
break
}
}
if subset == nil {
return false, nil
}
// inject toleration
if len(subset.Tolerations) > 0 {
pod.Spec.Tolerations = append(pod.Spec.Tolerations, subset.Tolerations...)
}
if pod.Spec.Affinity == nil {
pod.Spec.Affinity = &corev1.Affinity{}
}
if pod.Spec.Affinity.NodeAffinity == nil {
pod.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{}
}
if len(subset.PreferredNodeSelectorTerms) > 0 {
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
subset.PreferredNodeSelectorTerms...)
}
if subset.RequiredNodeSelectorTerm != nil {
if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{}
}
if len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 {
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []corev1.NodeSelectorTerm{
*subset.RequiredNodeSelectorTerm,
}
} else {
for i := range pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms {
selectorTerm := &pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[i]
selectorTerm.MatchExpressions = append(selectorTerm.MatchExpressions, subset.RequiredNodeSelectorTerm.MatchExpressions...)
selectorTerm.MatchFields = append(selectorTerm.MatchFields, subset.RequiredNodeSelectorTerm.MatchFields...)
}
}
}
if subset.Patch.Raw != nil {
cloneBytes, _ := json.Marshal(pod)
modified, err := strategicpatch.StrategicMergePatch(cloneBytes, subset.Patch.Raw, &corev1.Pod{})
if err != nil {
klog.ErrorS(err, "failed to merge patch raw", "raw", subset.Patch.Raw)
return false, err
}
newPod := &corev1.Pod{}
if err = json.Unmarshal(modified, newPod); err != nil {
klog.ErrorS(err, "failed to unmarshal to Pod", "pod", modified)
return false, err
}
if newPod.Spec.PriorityClassName != pod.Spec.PriorityClassName {
// Workloadspread webhook is called after builtin admission plugin,
// which means the priority is already set before priorityClassName being patched.
// Mismatched priorityClassName and priority value will be rejected by apiserver.
// we have to clear priority to avoid the problem, and the builtin admission plugin
// will be reinvoked after kruise webhook to setting the correct priority value.
newPod.Spec.Priority = nil
}
*pod = *newPod
}
if pod.Labels == nil {
pod.Labels = map[string]string{}
}
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
injectWS := &InjectWorkloadSpread{
Name: ws.Name,
Subset: subsetName,
UID: generatedUID,
}
by, _ := json.Marshal(injectWS)
pod.Annotations[MatchedWorkloadSpreadSubsetAnnotations] = string(by)
return true, nil
}
func getSpecificSubset(subsetStatuses []appsv1alpha1.WorkloadSpreadSubsetStatus, specifySubset string) *appsv1alpha1.WorkloadSpreadSubsetStatus {
for _, subset := range subsetStatuses {
if specifySubset == subset.Name {
return &subset
}
}
return nil
}
func (h *Handler) getSuitableSubset(subsetStatuses []appsv1alpha1.WorkloadSpreadSubsetStatus) *appsv1alpha1.WorkloadSpreadSubsetStatus {
for i := range subsetStatuses {
subset := &subsetStatuses[i]
canSchedule := true
for _, condition := range subset.Conditions {
if condition.Type == appsv1alpha1.SubsetSchedulable && condition.Status == corev1.ConditionFalse {
canSchedule = false
break
}
}
if canSchedule && (subset.MissingReplicas > 0 || subset.MissingReplicas == -1) {
// TODO simulation schedule
// scheduleStrategy.Type = Adaptive
// Webhook will simulate a schedule in order to check whether Pod can run in this subset,
// which does a generic predicates by the cache of nodes and pods in kruise manager.
// There may be some errors between simulation schedule and kubernetes scheduler with small probability.
return subset
}
}
return nil
}
func (h *Handler) isReferenceEqual(target *appsv1alpha1.TargetReference, owner *metav1.OwnerReference, namespace string) (bool, error) {
if owner == nil {
return false, nil
}
targetGv, err := schema.ParseGroupVersion(target.APIVersion)
if err != nil {
klog.ErrorS(err, "parse TargetReference apiVersion failed", "apiVersion", target.APIVersion)
return false, err
}
ownerGv, err := schema.ParseGroupVersion(owner.APIVersion)
if err != nil {
klog.ErrorS(err, "parse OwnerReference apiVersion failed", "apiVersion", owner.APIVersion)
return false, err
}
if targetGv.Group == ownerGv.Group && target.Kind == owner.Kind && target.Name == owner.Name {
return true, nil
}
if match, err := matchReference(owner); err != nil || !match {
return false, err
}
ownerObject, err := h.getObjectOf(owner, namespace)
if err != nil {
return false, err
}
return h.isReferenceEqual(target, metav1.GetControllerOfNoCopy(ownerObject), namespace)
}
// statefulPodRegex is a regular expression that extracts the parent StatefulSet and ordinal from the Name of a Pod
var statefulPodRegex = regexp.MustCompile("(.*)-([0-9]+)$")
// getParentNameAndOrdinal gets the name of pod's parent StatefulSet and pod's ordinal as extracted from its Name. If
// the Pod was not created by a StatefulSet, its parent is considered to be empty string, and its ordinal is considered
// to be -1.
func getParentNameAndOrdinal(pod *corev1.Pod) (string, int) {
parent := ""
ordinal := -1
subMatches := statefulPodRegex.FindStringSubmatch(pod.Name)
if len(subMatches) < 3 {
return parent, ordinal
}
parent = subMatches[1]
if i, err := strconv.ParseInt(subMatches[2], 10, 32); err == nil {
ordinal = int(i)
}
return parent, ordinal
}
func getSubsetCondition(ws *appsv1alpha1.WorkloadSpread, subsetName string, condType appsv1alpha1.WorkloadSpreadSubsetConditionType) *appsv1alpha1.WorkloadSpreadSubsetCondition {
for i := range ws.Status.SubsetStatuses {
subset := &ws.Status.SubsetStatuses[i]
if subset.Name != subsetName {
continue
}
for _, condition := range subset.Conditions {
if condition.Type == condType {
return &condition
}
}
}
return nil
}
func (h *Handler) getObjectOf(owner *metav1.OwnerReference, namespace string) (client.Object, error) {
objectKey := types.NamespacedName{Namespace: namespace, Name: owner.Name}
objectGvk := schema.FromAPIVersionAndKind(owner.APIVersion, owner.Kind)
object := GenerateEmptyWorkloadObject(objectGvk, objectKey)
if err := h.Get(context.TODO(), objectKey, object); err != nil {
return nil, err
}
return object, nil
}
func initializeWorkloadsInWhiteList(c client.Client) {
if workloadsInWhiteListInitialized {
return
}
whiteList, err := configuration.GetWSWatchCustomWorkloadWhiteList(c)
if err != nil {
return
}
for _, wl := range whiteList.Workloads {
workloads = append(workloads, workload{
Groups: []string{wl.Group},
Kind: wl.Kind,
})
for _, subWl := range wl.SubResources {
workloads = append(workloads, workload{
Groups: []string{subWl.Group},
Kind: subWl.Kind,
})
}
}
klog.InfoS("initialized workload list", "workloads", workloads)
workloadsInWhiteListInitialized = true
}
func (h *Handler) initializedSubsetStatuses(ws *appsv1alpha1.WorkloadSpread) ([]appsv1alpha1.WorkloadSpreadSubsetStatus, error) {
replicas, err := h.getWorkloadReplicas(ws)
klog.V(5).InfoS("get workload replicas", "replicas", replicas, "err", err, "workloadSpread", klog.KObj(ws))
if err != nil {
return nil, err
}
var subsetStatuses []appsv1alpha1.WorkloadSpreadSubsetStatus
for i := range ws.Spec.Subsets {
subset := ws.Spec.Subsets[i]
subsetStatus := appsv1alpha1.WorkloadSpreadSubsetStatus{Name: subset.Name}
if subset.MaxReplicas == nil {
subsetStatus.MissingReplicas = -1
} else {
missingReplicas, _ := intstrutil.GetScaledValueFromIntOrPercent(subset.MaxReplicas, int(replicas), true)
subsetStatus.MissingReplicas = int32(missingReplicas)
}
subsetStatuses = append(subsetStatuses, subsetStatus)
}
return subsetStatuses, nil
}
func (h *Handler) getWorkloadReplicas(ws *appsv1alpha1.WorkloadSpread) (int32, error) {
if ws.Spec.TargetReference == nil || !hasPercentSubset(ws) {
return 0, nil
}
gvk := schema.FromAPIVersionAndKind(ws.Spec.TargetReference.APIVersion, ws.Spec.TargetReference.Kind)
key := types.NamespacedName{Namespace: ws.Namespace, Name: ws.Spec.TargetReference.Name}
object := GenerateEmptyWorkloadObject(gvk, key)
// TODO: fetch workload from API Server directly to avoid latency of informer if using Percentage settings of subset[x].maxReplicas.
err := h.Get(context.TODO(), key, object)
if err != nil {
return 0, client.IgnoreNotFound(err)
}
if ws.Spec.TargetFilter != nil && len(ws.Spec.TargetFilter.ReplicasPathList) > 0 {
return GetReplicasFromWorkloadWithTargetFilter(object, ws.Spec.TargetFilter)
}
switch o := object.(type) {
case *appsv1.Deployment:
return *o.Spec.Replicas, nil
case *appsv1.ReplicaSet:
return *o.Spec.Replicas, nil
case *appsv1.StatefulSet:
return *o.Spec.Replicas, nil
case *batchv1.Job:
return *o.Spec.Parallelism, nil
case *appsv1alpha1.CloneSet:
return *o.Spec.Replicas, nil
case *appsv1beta1.StatefulSet:
return *o.Spec.Replicas, nil
case *unstructured.Unstructured:
return GetReplicasFromCustomWorkload(h.Client, o), nil
}
return 0, fmt.Errorf("got unexpected workload type for workloadspread %s/%s", ws.Namespace, ws.Name)
}
func GenerateEmptyWorkloadObject(gvk schema.GroupVersionKind, key types.NamespacedName) (object client.Object) {
switch gvk {
case controllerKindRS:
object = &appsv1.ReplicaSet{}
case controllerKindDep:
object = &appsv1.Deployment{}
case controllerKindSts:
object = &appsv1.StatefulSet{}
case controllerKindJob:
object = &batchv1.Job{}
case controllerKruiseKindCS:
object = &appsv1alpha1.CloneSet{}
case controllerKruiseKindAlphaSts, controllerKruiseKindBetaSts:
object = &appsv1beta1.StatefulSet{}
default:
unstructuredObject := &unstructured.Unstructured{}
unstructuredObject.SetGroupVersionKind(gvk)
object = unstructuredObject
}
object.SetName(key.Name)
object.SetNamespace(key.Namespace)
return
}
func GetReplicasFromObject(object *unstructured.Unstructured, replicasPath string) (int32, error) {
if replicasPath == "" {
return 0, nil
}
var exists bool
var replicas int64
var err error
path := strings.Split(replicasPath, ".")
replicas, exists, err = NestedField[int64](object.Object, path...)
if err != nil || !exists {
return 0, err
}
return int32(replicas), nil
}
func GetReplicasFromCustomWorkload(reader client.Reader, object *unstructured.Unstructured) int32 {
if object == nil {
return 0
}
whiteList, err := configuration.GetWSWatchCustomWorkloadWhiteList(reader)
if err != nil {
klog.Error("Failed to get workloadSpread custom workload white list from kruise config map")
return 0
}
gvk := object.GroupVersionKind()
for _, wl := range whiteList.Workloads {
if wl.GroupVersionKind.GroupKind() != gvk.GroupKind() {
continue
}
replicas, err := GetReplicasFromObject(object, wl.ReplicasPath)
if err != nil {
klog.ErrorS(err, "Failed to get replicas from custom workload", "gvk", gvk, "object", klog.KObj(object), "replicasPath", wl.ReplicasPath)
}
return replicas
}
return 0
}
func GetReplicasFromWorkloadWithTargetFilter(object client.Object, targetFilter *appsv1alpha1.TargetFilter) (int32, error) {
objMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(object)
if err != nil {
return 0, err
}
obj := &unstructured.Unstructured{Object: objMap}
var replicas int32 = 0
for _, path := range targetFilter.ReplicasPathList {
r, err := GetReplicasFromObject(obj, path)
if err != nil {
return 0, err
}
replicas += r
}
return replicas, nil
}
func GetPodVersion(pod *corev1.Pod) string {
if !enableVersionedStatus(pod) {
return VersionIgnored
}
if version, exists := pod.Labels[appsv1.DefaultDeploymentUniqueLabelKey]; exists {
return version
}
if version, exists := pod.Labels[appsv1.ControllerRevisionHashLabelKey]; exists {
return utils.GetShortHash(version)
}
return VersionIgnored
}
func GetWorkloadVersion(reader client.Reader, object client.Object) (string, error) {
if !enableVersionedStatus(object) {
return VersionIgnored, nil
}
switch o := object.(type) {
case *appsv1.ReplicaSet:
return o.Labels[appsv1.DefaultDeploymentUniqueLabelKey], nil
case *appsv1alpha1.CloneSet:
if o.Generation > o.Status.ObservedGeneration {
return "", GenerateNotFoundError(o, fmt.Sprintf("%s latest version", klog.KObj(o)))
}
return utils.GetShortHash(o.Status.UpdateRevision), nil
case *appsv1.Deployment:
rsLister := &appsv1.ReplicaSetList{}
selector, _ := metav1.LabelSelectorAsSelector(o.Spec.Selector)
err := reader.List(context.TODO(), rsLister, &client.ListOptions{LabelSelector: selector, Namespace: o.Namespace}, utilclient.DisableDeepCopy)
if err != nil {
return "", err
}
for i := range rsLister.Items {
rs := &rsLister.Items[i]
owner := metav1.GetControllerOfNoCopy(rs)
if owner == nil || owner.UID != o.UID || !rs.DeletionTimestamp.IsZero() {
continue
}
if util.EqualIgnoreHash(&o.Spec.Template, &rs.Spec.Template) {
return rs.Labels[appsv1.DefaultDeploymentUniqueLabelKey], nil
}
}
return "", GenerateNotFoundError(o, fmt.Sprintf("%s latest version", klog.KObj(o)))
}
return VersionIgnored, nil
}
func enableVersionedStatus(object client.Object) bool {
objectKind := object.GetObjectKind().GroupVersionKind().Kind
if EnabledWorkloadSetForVersionedStatus.Has(strings.ToLower(objectKind)) {
return true
}
owner := metav1.GetControllerOfNoCopy(object)
if owner != nil && EnabledWorkloadSetForVersionedStatus.Has(strings.ToLower(owner.Kind)) {
return true
}
return false
}
package mutating
import (
"context"
"strconv"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
storagenames "k8s.io/apiserver/pkg/storage/names"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appspub "github.com/openkruise/kruise/apis/apps/pub"
utilcontainerlaunchpriority "github.com/openkruise/kruise/pkg/util/containerlaunchpriority"
)
// start containers based on priority order
func (h *PodCreateHandler) containerLaunchPriorityInitialization(_ context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 ||
req.AdmissionRequest.Operation != admissionv1.Create ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
if len(pod.Spec.Containers) == 1 {
return true, nil
}
// if ordered flag has been set, then just process ordered logic and skip check for priority
if pod.Annotations[appspub.ContainerLaunchPriorityKey] == appspub.ContainerLaunchOrdered {
priority := make([]int, len(pod.Spec.Containers))
for i := range priority {
priority[i] = 0 - i
}
h.setPodEnv(priority, pod)
klog.V(3).InfoS("Injected ordered container launch priority for Pod", "namespace", pod.Namespace, "name", pod.Name)
return false, nil
}
// check whether containers have KRUISE_CONTAINER_PRIORITY key value pairs
priority, priorityFlag, err := h.getPriority(pod)
if err != nil {
return false, err
}
if !priorityFlag {
return true, nil
}
h.setPodEnv(priority, pod)
klog.V(3).InfoS("Injected customized container launch priority for Pod", "namespace", pod.Namespace, "name", pod.Name)
return false, nil
}
// the return []int is priority for each container in the pod, ordered as container
// order list in pod spec.
// the priorityFlag indicates whether this pod needs to launch containers with priority.
// return error is there is any (e.g. priority value less than minimum possible int value)
func (h *PodCreateHandler) getPriority(pod *corev1.Pod) ([]int, bool, error) {
var priorityFlag bool
var priority = make([]int, len(pod.Spec.Containers))
for i, c := range pod.Spec.Containers {
for _, e := range c.Env {
if e.Name == appspub.ContainerLaunchPriorityEnvName {
p, err := strconv.Atoi(e.Value)
if err != nil {
return nil, false, err
}
priority[i] = p
if p != 0 {
priorityFlag = true
}
}
}
}
// if all priorities are same, than no priority is needed
if priorityFlag {
var equityFlag = true
for _, v := range priority {
if v != priority[0] {
equityFlag = false
}
}
priorityFlag = !equityFlag
}
return priority, priorityFlag, nil
}
func (h *PodCreateHandler) setPodEnv(priority []int, pod *corev1.Pod) {
// Generate name for pods that only have generateName field
if len(pod.Name) == 0 && len(pod.GenerateName) > 0 {
pod.Name = storagenames.SimpleNameGenerator.GenerateName(pod.GenerateName)
}
for i := range priority {
pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, utilcontainerlaunchpriority.GeneratePriorityEnv(priority[i], pod.Name))
}
}
package mutating
import (
"context"
"encoding/json"
"fmt"
admissionv1 "k8s.io/api/admission/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
)
type containerLivenessProbe struct {
Name string `json:"name"`
LivenessProbe v1.Probe `json:"livenessProbe"`
}
func (h *PodCreateHandler) enhancedLivenessProbeWhenPodCreate(ctx context.Context, req admission.Request, pod *v1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 ||
req.AdmissionRequest.Operation != admissionv1.Create ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
if !util.IsPodOwnedByKruise(pod) {
return true, nil
}
if !usingEnhancedLivenessProbe(pod) {
return true, nil
}
context, err := removeAndBackUpPodContainerLivenessProbe(pod)
if err != nil {
klog.ErrorS(err, "Remove pod container livenessProbe config and backup error", "namespace", pod.Namespace, "name", pod.Name)
return false, err
}
if context == "" {
return true, nil
}
klog.V(3).InfoS("Mutating add pod annotation", "namespace", pod.Namespace, "name", pod.Name, "key", alpha1.AnnotationNativeContainerProbeContext, "value", context)
return false, nil
}
// return two parameters:
// 1. the json string of the pod containers native livenessProbe configurations.
// 2. the error reason of the function.
func removeAndBackUpPodContainerLivenessProbe(pod *v1.Pod) (string, error) {
containersLivenessProbe := []containerLivenessProbe{}
for index := range pod.Spec.Containers {
getContainer := &pod.Spec.Containers[index]
if getContainer.LivenessProbe == nil {
continue
}
containersLivenessProbe = append(containersLivenessProbe, containerLivenessProbe{
Name: getContainer.Name,
LivenessProbe: *getContainer.LivenessProbe,
})
getContainer.LivenessProbe = nil
}
if len(containersLivenessProbe) == 0 {
return "", nil
}
containersLivenessProbeRaw, err := json.Marshal(containersLivenessProbe)
if err != nil {
klog.ErrorS(err, "Failed to json marshal liveness probe for pod",
"probe", containersLivenessProbe, "namespace", pod.Namespace, "name", pod.Name)
return "", fmt.Errorf("Failed to json marshal %v for pod: %v/%v, err: %v",
containersLivenessProbe, pod.Namespace, pod.Name, err)
}
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Annotations[alpha1.AnnotationNativeContainerProbeContext] = string(containersLivenessProbeRaw)
return pod.Annotations[alpha1.AnnotationNativeContainerProbeContext], nil
}
// return one parameter:
// 1. the native container livenessprobe is enabled when the alpha1.AnnotationUsingEnhancedLiveness is true.
func usingEnhancedLivenessProbe(pod *v1.Pod) bool {
return pod.Annotations[alpha1.AnnotationUsingEnhancedLiveness] == "true"
}
/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
"github.com/openkruise/kruise/pkg/util/configuration"
)
const (
InjectedPersistentPodStateKey = "kruise.io/injected-persistent-pod-state"
)
// mutate pod based on static ip
func (h *PodCreateHandler) persistentPodStateMutatingPod(ctx context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
// only handler Create Pod Object Request
if len(req.AdmissionRequest.SubResource) > 0 || req.AdmissionRequest.Operation != admissionv1.Create ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
whiteList, err := configuration.GetPPSWatchCustomWorkloadWhiteList(h.Client)
if err != nil {
return false, err
}
ref := metav1.GetControllerOf(pod)
if ref == nil || !whiteList.ValidateAPIVersionAndKind(ref.APIVersion, ref.Kind) {
return true, nil
}
// selector persistentPodState
persistentPodState := SelectorPersistentPodState(h.Client, appsv1alpha1.TargetReference{
APIVersion: ref.APIVersion,
Kind: ref.Kind,
Name: ref.Name,
}, pod.Namespace)
if persistentPodState == nil || len(persistentPodState.Status.PodStates) == 0 {
return true, nil
}
// when data is NotFound, indicates that the pod is created for the first time and the scenario does not require persistent pod state
podState, ok := persistentPodState.Status.PodStates[pod.Name]
if !ok || len(podState.NodeTopologyLabels) == 0 {
return true, nil
}
// inject PersistentPodState node affinity in pod
nodeSelector, preference := createNodeAffinity(persistentPodState.Spec, podState)
if len(nodeSelector) == 0 && len(preference) == 0 {
return true, nil
}
klog.V(3).InfoS("inject node affinity in pod for PersistentPodState",
"required", util.DumpJSON(nodeSelector), "preferred", util.DumpJSON(preference), "namespace", pod.Namespace, "name", pod.Name)
// inject persistentPodState annotation in pod
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Annotations[InjectedPersistentPodStateKey] = persistentPodState.Name
// nodeSelector
if len(nodeSelector) != 0 {
if pod.Spec.NodeSelector == nil {
pod.Spec.NodeSelector = nodeSelector
} else {
for k, v := range nodeSelector {
pod.Spec.NodeSelector[k] = v
}
}
}
// preferences
if len(preference) > 0 {
if pod.Spec.Affinity == nil {
pod.Spec.Affinity = &corev1.Affinity{}
}
if pod.Spec.Affinity.NodeAffinity == nil {
pod.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{}
}
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preference...)
}
return false, nil
}
// return two parameters:
// 1. required nodeSelector
// 2. preferred []PreferredSchedulingTerm
func createNodeAffinity(spec appsv1alpha1.PersistentPodStateSpec, podState appsv1alpha1.PodState) (map[string]string, []corev1.PreferredSchedulingTerm) {
// required
var nodeSelector map[string]string
if spec.RequiredPersistentTopology != nil {
nodeSelector = map[string]string{}
for _, key := range spec.RequiredPersistentTopology.NodeTopologyKeys {
if value, ok := podState.NodeTopologyLabels[key]; ok {
nodeSelector[key] = value
}
}
}
// preferred
var preferences []corev1.PreferredSchedulingTerm
for _, item := range spec.PreferredPersistentTopology {
preference := corev1.PreferredSchedulingTerm{
Weight: item.Weight,
}
for _, key := range item.Preference.NodeTopologyKeys {
if value, ok := podState.NodeTopologyLabels[key]; ok {
requirement := corev1.NodeSelectorRequirement{
Key: key,
Operator: corev1.NodeSelectorOpIn,
Values: []string{value},
}
preference.Preference.MatchExpressions = append(preference.Preference.MatchExpressions, requirement)
}
}
preferences = append(preferences, preference)
}
return nodeSelector, preferences
}
func SelectorPersistentPodState(reader client.Reader, ref appsv1alpha1.TargetReference, ns string) *appsv1alpha1.PersistentPodState {
ppsList := &appsv1alpha1.PersistentPodStateList{}
if err := reader.List(context.TODO(), ppsList, &client.ListOptions{Namespace: ns}, utilclient.DisableDeepCopy); err != nil {
klog.ErrorS(err, "List PersistentPodStateList failed")
return nil
}
for i := range ppsList.Items {
pps := &ppsList.Items[i]
if !pps.DeletionTimestamp.IsZero() {
continue
}
// belongs the same workload
if util.IsReferenceEqual(ref, pps.Spec.TargetReference) {
return pps
}
}
return nil
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
"encoding/json"
"net/http"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/features"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
)
// PodCreateHandler handles Pod
type PodCreateHandler struct {
// To use the client, you need to do the following:
// - uncomment it
// - import sigs.k8s.io/controller-runtime/pkg/client
// - uncomment the InjectClient method at the bottom of this file.
Client client.Client
// Decoder decodes objects
Decoder admission.Decoder
}
var _ admission.Handler = &PodCreateHandler{}
// Handle handles admission requests.
func (h *PodCreateHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
obj := &corev1.Pod{}
err := h.Decoder.Decode(req, obj)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
// when pod.namespace is empty, using req.namespace
if obj.Namespace == "" {
obj.Namespace = req.Namespace
}
oriObj := obj.DeepCopy()
var changed bool
if skip := injectPodReadinessGate(req, obj); !skip {
changed = true
}
if utilfeature.DefaultFeatureGate.Enabled(features.WorkloadSpread) {
if skip, err := h.workloadSpreadMutatingPod(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
}
if skip, err := h.sidecarsetMutatingPod(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
// "the order matters and sidecarsetMutatingPod must precede containerLaunchPriorityInitialization"
if skip, err := h.containerLaunchPriorityInitialization(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
// patch related-pub annotation in pod
if utilfeature.DefaultFeatureGate.Enabled(features.PodUnavailableBudgetUpdateGate) ||
utilfeature.DefaultFeatureGate.Enabled(features.PodUnavailableBudgetDeleteGate) {
if skip, err := h.pubMutatingPod(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
}
// persistent pod state
if skip, err := h.persistentPodStateMutatingPod(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
// EnhancedLivenessProbe enabled
if utilfeature.DefaultFeatureGate.Enabled(features.EnhancedLivenessProbeGate) {
if skip, err := h.enhancedLivenessProbeWhenPodCreate(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.EnablePodProbeMarkerOnServerless) {
if skip, err := h.podProbeMarkerMutatingPod(ctx, req, obj); err != nil {
return admission.Errored(http.StatusInternalServerError, err)
} else if !skip {
changed = true
}
}
if !changed {
return admission.Allowed("")
}
marshaled, err := json.Marshal(obj)
if err != nil {
return admission.Errored(http.StatusInternalServerError, err)
}
original, err := json.Marshal(oriObj)
if err != nil {
return admission.Errored(http.StatusInternalServerError, err)
}
return admission.PatchResponseFromRaw(original, marshaled)
}
/*
Copyright 2024 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
"fmt"
"strings"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"k8s.io/kube-openapi/pkg/util/sets"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
"github.com/openkruise/kruise/pkg/util/podprobemarker"
)
// mutating relate-pub annotation in pod
func (h *PodCreateHandler) podProbeMarkerMutatingPod(ctx context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 || req.AdmissionRequest.Operation != admissionv1.Create ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
ppms, err := podprobemarker.GetPodProbeMarkerForPod(h.Client, pod)
if err != nil {
return false, err
} else if len(ppms) == 0 {
return true, nil
}
containers := sets.NewString()
for _, c := range pod.Spec.Containers {
containers.Insert(c.Name)
}
for _, c := range pod.Spec.InitContainers {
if util.IsRestartableInitContainer(&c) {
containers.Insert(c.Name)
}
}
matchedPodProbeMarkerName := sets.NewString()
matchedProbeKey := sets.NewString()
matchedConditions := sets.NewString()
matchedProbes := make([]appsv1alpha1.PodContainerProbe, 0)
for i := range ppms {
obj := ppms[i]
for i := range obj.Spec.Probes {
probe := obj.Spec.Probes[i]
key := fmt.Sprintf("%s/%s", probe.ContainerName, probe.Name)
if matchedConditions.Has(probe.PodConditionType) || matchedProbeKey.Has(key) || !containers.Has(probe.ContainerName) || probe.PodConditionType == "" {
continue
}
// No need to pass in marker related fields
probe.MarkerPolicy = nil
matchedProbes = append(matchedProbes, probe)
matchedProbeKey.Insert(key)
matchedConditions.Insert(probe.PodConditionType)
matchedPodProbeMarkerName.Insert(obj.Name)
}
}
if len(matchedProbes) == 0 {
return true, nil
}
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
body := util.DumpJSON(matchedProbes)
pod.Annotations[appsv1alpha1.PodProbeMarkerAnnotationKey] = body
pod.Annotations[appsv1alpha1.PodProbeMarkerListAnnotationKey] = strings.Join(matchedPodProbeMarkerName.List(), ",")
klog.V(3).InfoS("mutating add pod annotation", "namespace", pod.Namespace, "name", pod.Name, "key", appsv1alpha1.PodProbeMarkerAnnotationKey, "value", body)
return false, nil
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
admissionv1 "k8s.io/api/admission/v1"
v1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appspub "github.com/openkruise/kruise/apis/apps/pub"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
)
func injectPodReadinessGate(req admission.Request, pod *v1.Pod) (skip bool) {
if req.Operation != admissionv1.Create {
return true
}
if !util.IsPodOwnedByKruise(pod) && !utilfeature.DefaultFeatureGate.Enabled(features.KruisePodReadinessGate) {
return true
}
util.InjectReadinessGateToPod(pod, appspub.KruisePodReadyConditionType)
return false
}
/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/control/pubcontrol"
"github.com/openkruise/kruise/pkg/controller/podunavailablebudget"
)
// mutating relate-pub annotation in pod
func (h *PodCreateHandler) pubMutatingPod(ctx context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 || req.AdmissionRequest.Operation != admissionv1.Create ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
pub, err := podunavailablebudget.GetPubForPod(h.Client, pod)
if err != nil {
klog.ErrorS(err, "Failed to get pub for pod", "pod", klog.KObj(pod))
return false, nil
} else if pub == nil {
return true, nil
}
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Annotations[pubcontrol.PodRelatedPubAnnotation] = pub.Name
klog.V(3).InfoS("mutating add pod annotation", "namespace", pod.Namespace, "name", pod.Name, "key", pubcontrol.PodRelatedPubAnnotation, "value", pub.Name)
return false, nil
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"sort"
"strings"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/control/sidecarcontrol"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/fieldindex"
"github.com/openkruise/kruise/pkg/util/history"
admissionv1 "k8s.io/api/admission/v1"
apps "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
// mutate pod based on SidecarSet Object
func (h *PodCreateHandler) sidecarsetMutatingPod(ctx context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 ||
(req.AdmissionRequest.Operation != admissionv1.Create && req.AdmissionRequest.Operation != admissionv1.Update) ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
// filter out pods that don't require inject
if !sidecarcontrol.IsActivePod(pod) {
return true, nil
}
var oldPod *corev1.Pod
var isUpdated bool
//when Operation is update, decode older object
if req.AdmissionRequest.Operation == admissionv1.Update {
isUpdated = true
oldPod = new(corev1.Pod)
if err = h.Decoder.Decode(
admission.Request{AdmissionRequest: admissionv1.AdmissionRequest{Object: req.AdmissionRequest.OldObject}},
oldPod); err != nil {
return false, err
}
}
// DisableDeepCopy:true, indicates must be deep copy before update sidecarSet objection
sidecarSetList := &appsv1alpha1.SidecarSetList{}
sidecarSetList2 := &appsv1alpha1.SidecarSetList{}
podNamespace := pod.Namespace
if podNamespace == "" {
podNamespace = "default"
}
if err := h.Client.List(ctx, sidecarSetList, client.MatchingFields{fieldindex.IndexNameForSidecarSetNamespace: podNamespace}, utilclient.DisableDeepCopy); err != nil {
return false, err
}
if err := h.Client.List(ctx, sidecarSetList2, client.MatchingFields{fieldindex.IndexNameForSidecarSetNamespace: fieldindex.IndexValueSidecarSetClusterScope}, utilclient.DisableDeepCopy); err != nil {
return false, err
}
matchedSidecarSets := make([]sidecarcontrol.SidecarControl, 0)
for _, sidecarSet := range append(sidecarSetList.Items, sidecarSetList2.Items...) {
if sidecarSet.Spec.InjectionStrategy.Paused {
continue
}
if matched, err := sidecarcontrol.PodMatchedSidecarSet(h.Client, pod, &sidecarSet); err != nil {
return false, err
} else if !matched {
continue
}
// get user-specific revision or the latest revision of SidecarSet
suitableSidecarSet, err := h.getSuitableRevisionSidecarSet(&sidecarSet, oldPod, pod, req.AdmissionRequest.Operation)
if err != nil {
return false, err
}
// check whether sidecarSet is active
// when sidecarSet is not active, it will not perform injections and upgrades process.
control := sidecarcontrol.New(suitableSidecarSet)
if !control.IsActiveSidecarSet() {
continue
}
matchedSidecarSets = append(matchedSidecarSets, control)
}
if len(matchedSidecarSets) == 0 {
return true, nil
}
// check pod
if isUpdated {
if !matchedSidecarSets[0].IsPodAvailabilityChanged(pod, oldPod) {
klog.V(3).InfoS("pod availability unchanged for sidecarSet, and ignore", "namespace", pod.Namespace, "name", pod.Name)
return true, nil
}
}
klog.V(4).InfoS("begin to operate resource", "func", "sidecar inject",
"operation", req.Operation, "namespace", req.Namespace, "name", req.Name, "resource", req.Resource, "subResource", req.SubResource)
// patch pod metadata, annotations & labels
// When the Pod main container is upgraded in place, and the sidecarSet configuration does not change at this time,
// at this point, it can also patch pod metadata
if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
}
skip = true
for _, control := range matchedSidecarSets {
sidecarSet := control.GetSidecarset()
sk, err := sidecarcontrol.PatchPodMetadata(&pod.ObjectMeta, sidecarSet.Spec.PatchPodMetadata)
if err != nil {
klog.ErrorS(err, "sidecarSet update pod metadata failed", "sidecarSet", sidecarSet.Name, "namespace", pod.Namespace, "podName", pod.Name)
return false, err
} else if !sk {
// skip = false
skip = false
}
}
//build sidecar containers, sidecar initContainers, sidecar volumes, annotations to inject into pod object
sidecarContainers, sidecarInitContainers, sidecarSecrets, volumesInSidecar, injectedAnnotations, err := buildSidecars(isUpdated, pod, oldPod, matchedSidecarSets)
if err != nil {
return false, err
} else if len(sidecarContainers) == 0 && len(sidecarInitContainers) == 0 {
klog.V(3).InfoS("pod don't have injected containers", "func", "sidecar inject", "namespace", pod.Namespace, "name", pod.Name)
return skip, nil
}
klog.V(3).InfoS("begin inject into pod", "func", "sidecar inject", "sidecarContainers", sidecarContainers,
"sidecarInitContainers", sidecarInitContainers, "sidecarSecrets", sidecarSecrets,
"volumesInSidecar", volumesInSidecar, "injectedAnnotations", injectedAnnotations,
"namespace", pod.Namespace, "name", pod.Name)
klog.V(4).InfoS("before mutating", "func", "sidecar inject", "pod", klog.KObj(pod))
// apply sidecar set info into pod
// 1. inject init containers, sort by their name, after the original init containers
sort.SliceStable(sidecarInitContainers, func(i, j int) bool {
return sidecarInitContainers[i].Name < sidecarInitContainers[j].Name
})
pod.Spec.InitContainers = mergeSidecarContainers(pod.Spec.InitContainers, sidecarInitContainers)
// 2. inject containers
if utilfeature.DefaultFeatureGate.Enabled(features.EnableSortSidecarContainerByName) {
sort.SliceStable(sidecarContainers, func(i, j int) bool {
return sidecarContainers[i].Name < sidecarContainers[j].Name
})
}
pod.Spec.Containers = mergeSidecarContainers(pod.Spec.Containers, sidecarContainers)
// 3. inject volumes
pod.Spec.Volumes = util.MergeVolumes(pod.Spec.Volumes, volumesInSidecar)
// 4. inject imagePullSecrets
pod.Spec.ImagePullSecrets = mergeSidecarSecrets(pod.Spec.ImagePullSecrets, sidecarSecrets)
// 5. apply annotations
for k, v := range injectedAnnotations {
pod.Annotations[k] = v
}
klog.V(4).InfoS("after mutating", "func", "sidecar inject", "pod", klog.KObj(pod))
return false, nil
}
func (h *PodCreateHandler) getSuitableRevisionSidecarSet(sidecarSet *appsv1alpha1.SidecarSet, oldPod, newPod *corev1.Pod, operation admissionv1.Operation) (*appsv1alpha1.SidecarSet, error) {
switch operation {
case admissionv1.Update:
// optimization: quickly return if newPod matched the latest sidecarSet
if sidecarcontrol.GetPodSidecarSetRevision(sidecarSet.Name, newPod) == sidecarcontrol.GetSidecarSetRevision(sidecarSet) {
return sidecarSet.DeepCopy(), nil
}
hc := sidecarcontrol.NewHistoryControl(h.Client)
revisions, err := history.NewHistory(h.Client).ListControllerRevisions(sidecarcontrol.MockSidecarSetForRevision(sidecarSet), hc.GetRevisionSelector(sidecarSet))
if err != nil {
klog.ErrorS(err, "Failed to list history controllerRevisions", "name", sidecarSet.Name)
return nil, err
}
suitableSidecarSet, err := h.getSpecificRevisionSidecarSetForPod(sidecarSet, revisions, newPod)
if err != nil {
return nil, err
} else if suitableSidecarSet != nil {
return suitableSidecarSet, nil
}
suitableSidecarSet, err = h.getSpecificRevisionSidecarSetForPod(sidecarSet, revisions, oldPod)
if err != nil {
return nil, err
} else if suitableSidecarSet != nil {
return suitableSidecarSet, nil
}
return sidecarSet.DeepCopy(), nil
default:
revisionInfo := sidecarSet.Spec.InjectionStrategy.Revision
if revisionInfo == nil || (revisionInfo.RevisionName == nil && revisionInfo.CustomVersion == nil) {
return sidecarSet.DeepCopy(), nil
}
specificHistory, err := h.getSpecificHistorySidecarSet(sidecarSet, revisionInfo)
if err != nil {
return nil, err
}
if sidecarSet.Spec.UpdateStrategy.Paused {
klog.V(3).InfoS("sidecarset upgrade is paused, will inject specified revision", "sidecarSet", klog.KObj(sidecarSet))
return specificHistory, nil
}
switch sidecarSet.Spec.InjectionStrategy.Revision.Policy {
case appsv1alpha1.PartialSidecarSetInjectRevisionPolicy:
if updateStrategy := sidecarSet.Spec.UpdateStrategy; updateStrategy.Selector != nil {
selector, err := util.ValidatedLabelSelectorAsSelector(updateStrategy.Selector)
if err != nil {
klog.ErrorS(err, "Failed to parse SidecarSet update strategy selector", "sidecarSet", klog.KObj(sidecarSet))
return nil, err
}
if !selector.Matches(labels.Set(newPod.Labels)) {
// Only the Pods that are not selected by the selector will definitely be injected with the specified version of the Sidecar.
klog.V(3).InfoS("New pod is not updated, specified revision will be injected",
"pod", klog.KObj(newPod), "sidecarSet", klog.KObj(sidecarSet), "revisionInfo", revisionInfo)
return specificHistory, nil
}
}
klog.V(3).InfoS("New pod is updated, which has a probability to be injected with the latest sidecar",
"pod", klog.KObj(newPod), "sidecarSet", klog.KObj(sidecarSet), "partition", sidecarSet.Spec.UpdateStrategy.Partition)
return h.selectRevisionRandomly(specificHistory, sidecarSet.DeepCopy(), sidecarSet.Spec.UpdateStrategy.Partition)
default: // Always strategy
return specificHistory, nil
}
}
}
// selectRevisionRandomly selects 'old' according to the probabilities specified by the partition.
func (h *PodCreateHandler) selectRevisionRandomly(old, new *appsv1alpha1.SidecarSet, partition *intstr.IntOrString) (*appsv1alpha1.SidecarSet, error) {
if partition == nil || partition.Type == intstr.Int {
return new, nil
}
probability, err := util.ParsePercentageAsFloat64(partition.StrVal)
if err != nil {
return nil, err
}
if rand.Float64() <= probability {
return old, nil
} else {
return new, nil
}
}
func (h *PodCreateHandler) getSpecificRevisionSidecarSetForPod(sidecarSet *appsv1alpha1.SidecarSet, revisions []*apps.ControllerRevision, pod *corev1.Pod) (*appsv1alpha1.SidecarSet, error) {
var err error
var matchedSidecarSet *appsv1alpha1.SidecarSet
for _, revision := range revisions {
if sidecarcontrol.GetPodSidecarSetControllerRevision(sidecarSet.Name, pod) == revision.Name {
matchedSidecarSet, err = h.getSpecificHistorySidecarSet(sidecarSet, &appsv1alpha1.SidecarSetInjectRevision{RevisionName: &revision.Name})
if err != nil {
return nil, err
}
break
}
}
return matchedSidecarSet, nil
}
func (h *PodCreateHandler) getSpecificHistorySidecarSet(sidecarSet *appsv1alpha1.SidecarSet, revisionInfo *appsv1alpha1.SidecarSetInjectRevision) (*appsv1alpha1.SidecarSet, error) {
// else return its corresponding history revision
hc := sidecarcontrol.NewHistoryControl(h.Client)
historySidecarSet, err := hc.GetHistorySidecarSet(sidecarSet, revisionInfo)
if err != nil {
klog.ErrorS(err, "Failed to restore history revision for SidecarSet",
"name", sidecarSet.Name, "revision", sidecarSet.Spec.InjectionStrategy.Revision)
return nil, err
}
if historySidecarSet == nil {
historySidecarSet = sidecarSet.DeepCopy()
klog.InfoS("Failed to restore history revision for SidecarSet, will use the latest", "name", sidecarSet.Name)
}
return historySidecarSet, nil
}
func mergeSidecarSecrets(secretsInPod, secretsInSidecar []corev1.LocalObjectReference) (allSecrets []corev1.LocalObjectReference) {
secretFilter := make(map[string]bool)
for _, podSecret := range secretsInPod {
if _, ok := secretFilter[podSecret.Name]; !ok {
secretFilter[podSecret.Name] = true
allSecrets = append(allSecrets, podSecret)
}
}
for _, sidecarSecret := range secretsInSidecar {
if _, ok := secretFilter[sidecarSecret.Name]; !ok {
secretFilter[sidecarSecret.Name] = true
allSecrets = append(allSecrets, sidecarSecret)
}
}
return allSecrets
}
func mergeSidecarContainers(origins []corev1.Container, injected []*appsv1alpha1.SidecarContainer) []corev1.Container {
//format: pod.spec.containers[index].name -> index(the index of container in pod)
containersInPod := make(map[string]int)
for index, container := range origins {
containersInPod[container.Name] = index
}
var beforeAppContainers []corev1.Container
var afterAppContainers []corev1.Container
for _, sidecar := range injected {
//sidecar container already exist in pod
//keep the order of pod's original containers unchanged
if index, ok := containersInPod[sidecar.Name]; ok {
origins[index] = sidecar.Container
continue
}
switch sidecar.PodInjectPolicy {
case appsv1alpha1.BeforeAppContainerType:
beforeAppContainers = append(beforeAppContainers, sidecar.Container)
case appsv1alpha1.AfterAppContainerType:
afterAppContainers = append(afterAppContainers, sidecar.Container)
default:
afterAppContainers = append(afterAppContainers, sidecar.Container)
}
}
origins = append(beforeAppContainers, origins...)
origins = append(origins, afterAppContainers...)
return origins
}
func buildSidecars(isUpdated bool, pod *corev1.Pod, oldPod *corev1.Pod, matchedSidecarSets []sidecarcontrol.SidecarControl) (
sidecarContainers, sidecarInitContainers []*appsv1alpha1.SidecarContainer, sidecarSecrets []corev1.LocalObjectReference,
volumesInSidecars []corev1.Volume, injectedAnnotations map[string]string, err error) {
// injected annotations
injectedAnnotations = make(map[string]string)
// get sidecarSet annotations from pods
// sidecarSet.name -> sidecarSet hash struct
sidecarSetHash := make(map[string]sidecarcontrol.SidecarSetUpgradeSpec)
// sidecarSet.name -> sidecarSet hash(without image) struct
sidecarSetHashWithoutImage := make(map[string]sidecarcontrol.SidecarSetUpgradeSpec)
// parse sidecar hash in pod annotations
if oldHashStr := pod.Annotations[sidecarcontrol.SidecarSetHashAnnotation]; len(oldHashStr) > 0 {
if err = json.Unmarshal([]byte(oldHashStr), &sidecarSetHash); err != nil {
// to be compatible with older sidecarSet hash struct, map[string]string
olderSidecarSetHash := make(map[string]string)
if err = json.Unmarshal([]byte(oldHashStr), &olderSidecarSetHash); err != nil {
return nil, nil, nil, nil, nil,
fmt.Errorf("pod(%s/%s) invalid annotations[%s] value %v, unmarshal failed: %v", pod.Namespace, pod.Name, sidecarcontrol.SidecarSetHashAnnotation, oldHashStr, err)
}
for k, v := range olderSidecarSetHash {
sidecarSetHash[k] = sidecarcontrol.SidecarSetUpgradeSpec{
SidecarSetHash: v,
SidecarSetName: k,
}
}
}
}
if oldHashStr := pod.Annotations[sidecarcontrol.SidecarSetHashWithoutImageAnnotation]; len(oldHashStr) > 0 {
if err = json.Unmarshal([]byte(oldHashStr), &sidecarSetHashWithoutImage); err != nil {
// to be compatible with older sidecarSet hash struct, map[string]string
olderSidecarSetHash := make(map[string]string)
if err = json.Unmarshal([]byte(oldHashStr), &olderSidecarSetHash); err != nil {
return nil, nil, nil, nil, nil,
fmt.Errorf("pod(%s/%s) invalid annotations[%s] value %v, unmarshal failed: %v", pod.Namespace, pod.Name, sidecarcontrol.SidecarSetHashWithoutImageAnnotation, oldHashStr, err)
}
for k, v := range olderSidecarSetHash {
sidecarSetHashWithoutImage[k] = sidecarcontrol.SidecarSetUpgradeSpec{
SidecarSetHash: v,
SidecarSetName: k,
}
}
}
}
// hotUpgrade work info, sidecarSet.spec.container[x].name -> pod.spec.container[x].name
// for example: mesh -> mesh-1, envoy -> envoy-2
hotUpgradeWorkInfo := sidecarcontrol.GetPodHotUpgradeInfoInAnnotations(pod)
// SidecarSet Name List, for example: log-sidecarset,envoy-sidecarset
sidecarSetNames := sets.NewString()
if sidecarSetListStr := pod.Annotations[sidecarcontrol.SidecarSetListAnnotation]; sidecarSetListStr != "" {
sidecarSetNames.Insert(strings.Split(sidecarSetListStr, ",")...)
}
for _, control := range matchedSidecarSets {
sidecarSet := control.GetSidecarset()
klog.V(3).InfoS("build pod sidecar containers for sidecarSet", "namespace", pod.Namespace, "podName", pod.Name, "sidecarSet", sidecarSet.Name)
// sidecarSet List
sidecarSetNames.Insert(sidecarSet.Name)
// pre-process volumes only in sidecar
volumesMap := getVolumesMapInSidecarSet(sidecarSet)
// process sidecarset hash
setUpgrade1 := sidecarcontrol.SidecarSetUpgradeSpec{
UpdateTimestamp: metav1.Now(),
SidecarSetHash: sidecarcontrol.GetSidecarSetRevision(sidecarSet),
SidecarSetName: sidecarSet.Name,
SidecarSetControllerRevision: sidecarSet.Status.LatestRevision,
}
setUpgrade2 := sidecarcontrol.SidecarSetUpgradeSpec{
UpdateTimestamp: metav1.Now(),
SidecarSetHash: sidecarcontrol.GetSidecarSetWithoutImageRevision(sidecarSet),
SidecarSetName: sidecarSet.Name,
}
isInjecting := false
sidecarList := sets.NewString()
//process initContainers
//only when created pod, inject initContainer and pullSecrets
if !isUpdated {
for i := range sidecarSet.Spec.InitContainers {
initContainer := &sidecarSet.Spec.InitContainers[i]
// only insert k8s native sidecar container for in-place update
if sidecarcontrol.IsSidecarContainer(initContainer.Container) {
sidecarList.Insert(initContainer.Name)
}
// volumeMounts that injected into sidecar container
// when volumeMounts SubPathExpr contains expansions, then need copy container EnvVars(injectEnvs)
injectedMounts, injectedEnvs := sidecarcontrol.GetInjectedVolumeMountsAndEnvs(control, initContainer, pod)
// get injected env & mounts explicitly so that can be compared with old ones in pod
transferEnvs := sidecarcontrol.GetSidecarTransferEnvs(initContainer, pod)
// append volumeMounts SubPathExpr environments
transferEnvs = util.MergeEnvVar(transferEnvs, injectedEnvs)
// insert volumes that initContainers used
for _, mount := range initContainer.VolumeMounts {
if vol, ok := volumesMap[mount.Name]; ok {
volumesInSidecars = append(volumesInSidecars, *vol)
} else {
klog.Warningf("InitContainer volumeMount %s cannot be found in volumes of sidecarSet %s", mount.Name, sidecarSet.Name)
}
}
for _, mount := range initContainer.VolumeDevices {
if vol, ok := volumesMap[mount.Name]; ok {
volumesInSidecars = append(volumesInSidecars, *vol)
} else {
klog.Warningf("InitContainer volumeDevice %s cannot be found in volumes of sidecarSet %s", mount.Name, sidecarSet.Name)
}
}
// merge VolumeMounts from sidecar.VolumeMounts and shared VolumeMounts
initContainer.VolumeMounts = util.MergeVolumeMounts(initContainer.Container, injectedMounts)
// add "IS_INJECTED" env in initContainer's envs
initContainer.Env = append(initContainer.Env, corev1.EnvVar{Name: sidecarcontrol.SidecarEnvKey, Value: "true"})
// merged Env from sidecar.Env and transfer envs
initContainer.Env = util.MergeEnvVar(initContainer.Env, transferEnvs)
isInjecting = true
// merge volumeDevice
injectedDevices := sidecarcontrol.GetInjectedVolumeDevices(initContainer, pod)
initContainer.VolumeDevices = util.MergeVolumeDevices(initContainer.Container, injectedDevices)
klog.InfoS("try to inject initContainer sidecar",
"containerName", initContainer.Name, "namespace", pod.Namespace, "podName", pod.Name, "envs", transferEnvs, "volumeMounts", injectedMounts, "volumeDevices", injectedDevices)
// when sidecar container UpgradeStrategy is HotUpgrade
if sidecarcontrol.IsSidecarContainer(initContainer.Container) && sidecarcontrol.IsHotUpgradeContainer(initContainer) {
hotContainers, annotations := injectHotUpgradeContainers(hotUpgradeWorkInfo, initContainer)
sidecarInitContainers = append(sidecarInitContainers, hotContainers...)
for k, v := range annotations {
injectedAnnotations[k] = v
}
} else {
sidecarInitContainers = append(sidecarInitContainers, initContainer)
}
}
//process imagePullSecrets
sidecarSecrets = append(sidecarSecrets, sidecarSet.Spec.ImagePullSecrets...)
}
//process containers
for i := range sidecarSet.Spec.Containers {
sidecarContainer := &sidecarSet.Spec.Containers[i]
sidecarList.Insert(sidecarContainer.Name)
// volumeMounts that injected into sidecar container
// when volumeMounts SubPathExpr contains expansions, then need copy container EnvVars(injectEnvs)
injectedMounts, injectedEnvs := sidecarcontrol.GetInjectedVolumeMountsAndEnvs(control, sidecarContainer, pod)
// get injected env & mounts explicitly so that can be compared with old ones in pod
transferEnvs := sidecarcontrol.GetSidecarTransferEnvs(sidecarContainer, pod)
// append volumeMounts SubPathExpr environments
transferEnvs = util.MergeEnvVar(transferEnvs, injectedEnvs)
//when update pod object
if isUpdated {
// judge whether inject sidecar container into pod
needInject, existSidecars, existVolumes := control.NeedToInjectInUpdatedPod(pod, oldPod, sidecarContainer, transferEnvs, injectedMounts)
if !needInject {
sidecarContainers = append(sidecarContainers, existSidecars...)
volumesInSidecars = append(volumesInSidecars, existVolumes...)
continue
}
klog.V(3).InfoS("upgrade or insert sidecar container during pod upgrade",
"containerName", sidecarContainer.Name, "namespace", pod.Namespace, "podName", pod.Name)
//when created pod object, need inject sidecar container into pod
} else {
klog.V(3).InfoS("inject new sidecar container during pod creation",
"containerName", sidecarContainer.Name, "namespace", pod.Namespace, "podName", pod.Name)
}
isInjecting = true
// insert volume that sidecar container used
for _, mount := range sidecarContainer.VolumeMounts {
if vol, ok := volumesMap[mount.Name]; ok {
volumesInSidecars = append(volumesInSidecars, *vol)
} else {
klog.Warningf("Container volumeMount %s cannot be found in volumes of sidecarSet %s", mount.Name, sidecarSet.Name)
}
}
for _, mount := range sidecarContainer.VolumeDevices {
if vol, ok := volumesMap[mount.Name]; ok {
volumesInSidecars = append(volumesInSidecars, *vol)
} else {
klog.Warningf("Container volumeDevice %s cannot be found in volumes of sidecarSet %s", mount.Name, sidecarSet.Name)
}
}
// merge VolumeMounts from sidecar.VolumeMounts and shared VolumeMounts
sidecarContainer.VolumeMounts = util.MergeVolumeMounts(sidecarContainer.Container, injectedMounts)
// add the "Injected" env to the sidecar container
sidecarContainer.Env = append(sidecarContainer.Env, corev1.EnvVar{Name: sidecarcontrol.SidecarEnvKey, Value: "true"})
// merged Env from sidecar.Env and transfer envs
sidecarContainer.Env = util.MergeEnvVar(sidecarContainer.Env, transferEnvs)
// merge volumeDevice
injectedDevices := sidecarcontrol.GetInjectedVolumeDevices(sidecarContainer, pod)
sidecarContainer.VolumeDevices = util.MergeVolumeDevices(sidecarContainer.Container, injectedDevices)
klog.InfoS("try to inject Container sidecar",
"containerName", sidecarContainer.Name, "namespace", pod.Namespace, "podName", pod.Name, "envs", transferEnvs, "volumeMounts", injectedMounts, "volumeDevices", injectedDevices)
// when sidecar container UpgradeStrategy is HotUpgrade
if sidecarcontrol.IsHotUpgradeContainer(sidecarContainer) {
hotContainers, annotations := injectHotUpgradeContainers(hotUpgradeWorkInfo, sidecarContainer)
sidecarContainers = append(sidecarContainers, hotContainers...)
for k, v := range annotations {
injectedAnnotations[k] = v
}
} else {
sidecarContainers = append(sidecarContainers, sidecarContainer)
}
}
// the container was (re)injected and the annotations need to be updated
if isInjecting {
setUpgrade1.SidecarList = sidecarList.List()
setUpgrade2.SidecarList = sidecarList.List()
sidecarSetHash[sidecarSet.Name] = setUpgrade1
sidecarSetHashWithoutImage[sidecarSet.Name] = setUpgrade2
}
}
// store sidecarset hash in pod annotations
by, _ := json.Marshal(sidecarSetHash)
injectedAnnotations[sidecarcontrol.SidecarSetHashAnnotation] = string(by)
by, _ = json.Marshal(sidecarSetHashWithoutImage)
injectedAnnotations[sidecarcontrol.SidecarSetHashWithoutImageAnnotation] = string(by)
sidecarSetNameList := strings.Join(sidecarSetNames.List(), ",")
// store matched sidecarset list in pod annotations
injectedAnnotations[sidecarcontrol.SidecarSetListAnnotation] = sidecarSetNameList
return sidecarContainers, sidecarInitContainers, sidecarSecrets, volumesInSidecars, injectedAnnotations, nil
}
func getVolumesMapInSidecarSet(sidecarSet *appsv1alpha1.SidecarSet) map[string]*corev1.Volume {
volumesMap := make(map[string]*corev1.Volume)
for idx, volume := range sidecarSet.Spec.Volumes {
volumesMap[volume.Name] = &sidecarSet.Spec.Volumes[idx]
}
return volumesMap
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"encoding/json"
"fmt"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/control/sidecarcontrol"
corev1 "k8s.io/api/core/v1"
)
func injectHotUpgradeContainers(hotUpgradeWorkInfo map[string]string, sidecarContainer *appsv1alpha1.SidecarContainer) (
sidecarContainers []*appsv1alpha1.SidecarContainer, injectedAnnotations map[string]string) {
injectedAnnotations = make(map[string]string)
// container1 is current worked container
// container2 is empty container, and don't work now
container1, container2 := generateHotUpgradeContainers(sidecarContainer)
sidecarContainers = append(sidecarContainers, container1)
sidecarContainers = append(sidecarContainers, container2)
//mark sidecarset.version in annotations
// "1" indicates sidecar container is first injected into pod, and not upgrade process
injectedAnnotations[sidecarcontrol.GetPodSidecarSetVersionAnnotation(container1.Name)] = "1"
injectedAnnotations[sidecarcontrol.GetPodSidecarSetVersionAltAnnotation(container1.Name)] = "0"
// "0" indicates sidecar container is hot upgrade empty container
injectedAnnotations[sidecarcontrol.GetPodSidecarSetVersionAnnotation(container2.Name)] = "0"
injectedAnnotations[sidecarcontrol.GetPodSidecarSetVersionAltAnnotation(container2.Name)] = "1"
// used to mark which container is currently working, first is container1
// format: map[container.name] = pod.spec.container[x].name
hotUpgradeWorkInfo[sidecarContainer.Name] = container1.Name
// store working HotUpgrade container in pod annotations
by, _ := json.Marshal(hotUpgradeWorkInfo)
injectedAnnotations[sidecarcontrol.SidecarSetWorkingHotUpgradeContainer] = string(by)
return sidecarContainers, injectedAnnotations
}
func generateHotUpgradeContainers(container *appsv1alpha1.SidecarContainer) (*appsv1alpha1.SidecarContainer, *appsv1alpha1.SidecarContainer) {
name1, name2 := sidecarcontrol.GetHotUpgradeContainerName(container.Name)
container1, container2 := container.DeepCopy(), container.DeepCopy()
container1.Name = name1
container2.Name = name2
// set the non-working hot upgrade container image to empty, first is container2
container2.Container.Image = container.UpgradeStrategy.HotUpgradeEmptyImage
// set sidecarset.version in container env
setSidecarContainerVersionEnv(&container1.Container)
setSidecarContainerVersionEnv(&container2.Container)
return container1, container2
}
// use Sidecarset.ResourceVersion to mark sidecar container version in env(SIDECARSET_VERSION)
// env(SIDECARSET_VERSION) ValueFrom pod.metadata.annotations['sidecarset.kruise.io/{container.name}.version']
func setSidecarContainerVersionEnv(container *corev1.Container) {
// inject SIDECARSET_VERSION
container.Env = append(container.Env, corev1.EnvVar{
Name: sidecarcontrol.SidecarSetVersionEnvKey,
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: fmt.Sprintf("metadata.annotations['%s']", sidecarcontrol.GetPodSidecarSetVersionAnnotation(container.Name)),
},
},
})
// inject SIDECARSET_VERSION_ALT
container.Env = append(container.Env, corev1.EnvVar{
Name: sidecarcontrol.SidecarSetVersionAltEnvKey,
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: fmt.Sprintf("metadata.annotations['%s']", sidecarcontrol.GetPodSidecarSetVersionAltAnnotation(container.Name)),
},
},
})
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/webhook/types"
)
// +kubebuilder:webhook:path=/mutate-pod,mutating=true,failurePolicy=fail,sideEffects=None,admissionReviewVersions=v1;v1beta1,groups="",resources=pods,verbs=create,versions=v1,name=mpod.kb.io
var (
// HandlerGetterMap contains admission webhook handlers
HandlerGetterMap = map[string]types.HandlerGetter{
"mutate-pod": func(mgr manager.Manager) admission.Handler {
return &PodCreateHandler{
Client: mgr.GetClient(),
Decoder: admission.NewDecoder(mgr.GetScheme()),
}
},
}
)
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutating
import (
"context"
admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apiserver/pkg/util/dryrun"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
wsutil "github.com/openkruise/kruise/pkg/util/workloadspread"
)
func (h *PodCreateHandler) workloadSpreadMutatingPod(ctx context.Context, req admission.Request, pod *corev1.Pod) (skip bool, err error) {
if len(req.AdmissionRequest.SubResource) > 0 ||
req.AdmissionRequest.Resource.Resource != "pods" {
return true, nil
}
workloadSpreadHandler := wsutil.NewWorkloadSpreadHandler(h.Client)
var dryRun bool
switch req.AdmissionRequest.Operation {
case admissionv1.Create:
options := &metav1.CreateOptions{}
err := h.Decoder.DecodeRaw(req.Options, options)
if err != nil {
return false, err
}
// check dry run
dryRun = dryrun.IsDryRun(options.DryRun)
if dryRun {
klog.V(5).InfoS("Operation is a dry run, then admit", "operation", req.AdmissionRequest.Operation, "namespace", pod.Namespace, "podName", pod.Name)
return true, nil
}
return workloadSpreadHandler.HandlePodCreation(pod)
default:
return true, nil
}
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"context"
"fmt"
"net/http"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
webhookutil "github.com/openkruise/kruise/pkg/webhook/util"
admissionv1 "k8s.io/api/admission/v1"
"k8s.io/apimachinery/pkg/api/errors"
apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/klog/v2"
coreval "k8s.io/kubernetes/pkg/apis/core/validation"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
// ResourceDistributionCreateUpdateHandler handles ResourceDistribution
type ResourceDistributionCreateUpdateHandler struct {
Client client.Client
// Decoder decodes objects
Decoder admission.Decoder
}
var _ admission.Handler = &ResourceDistributionCreateUpdateHandler{}
// validateResourceDistributionSpec validate Spec when creating and updating
// (1). validate resource itself
// (2). validate targets
func (h *ResourceDistributionCreateUpdateHandler) validateResourceDistributionSpec(obj, oldObj *appsv1alpha1.ResourceDistribution, fldPath *field.Path) (allErrs field.ErrorList) {
spec := &obj.Spec
// deserialize resource from runtime.rawExtension
resource, errs := DeserializeResource(&spec.Resource, fldPath)
allErrs = append(allErrs, errs...)
if resource == nil {
return
}
// deserialize old resource if need
var oldResource runtime.Object
if oldObj != nil {
oldResource, errs = DeserializeResource(&oldObj.Spec.Resource, fldPath)
allErrs = append(allErrs, errs...)
}
// 1. validate resource
allErrs = append(allErrs, h.validateResourceDistributionSpecResource(resource, oldResource, fldPath.Child("resource"))...)
// 2. validate targets
allErrs = append(allErrs, h.validateResourceDistributionSpecTargets(&obj.Spec.Targets, fldPath.Child("targets"))...)
return
}
// validateResourceDistributionResource validate Spec.Resource when creating and updating
// (1). check whether type of the resource is supported
// (2). detect updating conflict, i.e., GK and name cannot be modified
// (3). dry run to check whether resource can be created
func (h *ResourceDistributionCreateUpdateHandler) validateResourceDistributionSpecResource(resource, oldResource runtime.Object, fldPath *field.Path) (allErrs field.ErrorList) {
// 1. check whether the GK of the resource is in supportedGKList
if !isSupportedGK(resource) {
return append(allErrs, field.Invalid(fldPath, resource, fmt.Sprintf("unknown or unsupported resource GroupKind, only support %v", supportedGKList)))
}
// 2. validate resource group, kind and name when updating
if oldResource != nil && !haveSameGVKAndName(resource, oldResource) {
return append(allErrs, field.Invalid(fldPath, nil, "resource apiVersion, kind, and name are immutable"))
}
// 3. dry run to check the resource
mice := resource.DeepCopyObject().(client.Object)
ConvertToUnstructured(mice).SetNamespace(webhookutil.GetNamespace())
err := h.Client.Create(context.TODO(), mice, &client.CreateOptions{DryRun: []string{metav1.DryRunAll}})
if err != nil && !errors.IsAlreadyExists(err) {
return append(allErrs, field.InternalError(fldPath, fmt.Errorf("failed to dry-run to validate spec.resource, error: %v", err)))
}
return
}
// validateResourceDistributionSpecTargets validate Spec.Targets
// (1). validate target namespace names
// (2). validate conflict between existing resources
func (h *ResourceDistributionCreateUpdateHandler) validateResourceDistributionSpecTargets(targets *appsv1alpha1.ResourceDistributionTargets, fldPath *field.Path) (allErrs field.ErrorList) {
// 1. validate namespace of IncludedNamespaces.List and ExcludedNamespaces.List
conflicted := make([]string, 0)
includedNS := sets.NewString()
for _, namespace := range targets.IncludedNamespaces.List {
includedNS.Insert(namespace.Name)
// validate namespace name
for _, msg := range coreval.ValidateNamespaceName(namespace.Name, false) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("includedNamespaces"), targets.IncludedNamespaces, msg))
}
}
for _, namespace := range targets.ExcludedNamespaces.List {
// validate namespace name
for _, msg := range coreval.ValidateNamespaceName(namespace.Name, false) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("excludedNamespaces"), targets.ExcludedNamespaces, msg))
}
// validate conflict between IncludedNamespaces and ExcludedNamespaces
if includedNS.Has(namespace.Name) {
conflicted = append(conflicted, namespace.Name)
}
}
if len(conflicted) != 0 {
allErrs = append(allErrs, field.Invalid(fldPath, targets, fmt.Sprintf("ambiguous targets because namespace %v is in both IncludedNamespaces.List and ExcludedNamespaces.List", conflicted)))
}
// 2. validate targets.NamespaceLabelSelector
if _, err := metav1.LabelSelectorAsSelector(&targets.NamespaceLabelSelector); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("namespaceLabelSelector"), targets.NamespaceLabelSelector, fmt.Sprintf("labelSelectorAsSelector error: %v", err)))
}
return
}
// validateResourceDistribution is an entrance to validate ResourceDistribution when creating and updating
// (1). validate ResourceDistribution ObjectMeta
// (2). validate ResourceDistribution Spec
func (h *ResourceDistributionCreateUpdateHandler) validateResourceDistribution(obj, oldObj *appsv1alpha1.ResourceDistribution) (allErrs field.ErrorList) {
// 1. validate metadata
allErrs = apimachineryvalidation.ValidateObjectMeta(&obj.ObjectMeta, false, apimachineryvalidation.NameIsDNSSubdomain, field.NewPath("metadata"))
// 2. validate spec
return append(allErrs, h.validateResourceDistributionSpec(obj, oldObj, field.NewPath("spec"))...)
}
// Handle handles admission requests.
func (h *ResourceDistributionCreateUpdateHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
obj := &appsv1alpha1.ResourceDistribution{}
if err := h.Decoder.Decode(req, obj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
var oldObj *appsv1alpha1.ResourceDistribution
if req.AdmissionRequest.Operation == admissionv1.Update {
oldObj = &appsv1alpha1.ResourceDistribution{}
if err := h.Decoder.DecodeRaw(req.AdmissionRequest.OldObject, oldObj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ResourceDistributionGate) {
return admission.Errored(http.StatusForbidden, fmt.Errorf("feature-gate %s is not enabled", features.ResourceDistributionGate))
}
if allErrs := h.validateResourceDistribution(obj, oldObj); len(allErrs) != 0 {
klog.V(3).InfoS("all errors of validation", "errors", fmt.Sprintf("%v", allErrs))
return admission.Errored(http.StatusUnprocessableEntity, allErrs.ToAggregate())
}
return admission.ValidationResponse(true, "")
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"fmt"
"reflect"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation/field"
)
const (
ResourceHashCodeAnnotation = "kruise.io/resourcedistribution.resource.hashcode"
SourceResourceDistributionOfResource = "kruise.io/resourcedistribution.resource.from"
)
var (
// supportedGKList is a list that contains all supported resource group, and kind
// Support CustomResourceDefinition
/* ADD NEW RESOURCE TYPE HERE*/
supportedGKList = []schema.GroupKind{
{Group: "", Kind: "Secret"},
{Group: "", Kind: "ConfigMap"},
}
)
// isSupportedGVK check whether object is supported by ResourceDistribution
func isSupportedGK(object runtime.Object) bool {
if object == nil {
return false
}
objGK := object.GetObjectKind().GroupVersionKind().GroupKind()
for _, gk := range supportedGKList {
if reflect.DeepEqual(gk, objGK) {
return true
}
}
return false
}
// haveSameGVKAndName return true if two resources have the same group, version, kind and name
func haveSameGVKAndName(resource, otherResource runtime.Object) bool {
Name, anotherName := ConvertToUnstructured(resource).GetName(), ConvertToUnstructured(otherResource).GetName()
GVK, anotherGVK := resource.GetObjectKind().GroupVersionKind(), otherResource.GetObjectKind().GroupVersionKind()
return Name == anotherName && reflect.DeepEqual(GVK, anotherGVK)
}
// ConvertToUnstructured receive runtime.Object, return *unstructured.Unstructured
// reused by controller
func ConvertToUnstructured(resourceObject runtime.Object) (resource *unstructured.Unstructured) {
switch unstructuredResource := resourceObject.(type) {
case *unstructured.Unstructured:
return unstructuredResource
default:
return nil
}
}
// DeserializeResource receive yaml of resource, return runtime.Object
// reused by controller
func DeserializeResource(resourceRawExtension *runtime.RawExtension, fldPath *field.Path) (resource runtime.Object, allErrs field.ErrorList) {
// 1. check whether resource yaml is empty
if len(resourceRawExtension.Raw) == 0 {
return nil, append(allErrs, field.Invalid(fldPath, resource, "empty resource is not allowed"))
}
// 2. deserialize resource
resource, _, err := unstructured.UnstructuredJSONScheme.Decode(resourceRawExtension.Raw, nil, nil)
if err != nil {
allErrs = append(allErrs, field.InternalError(fldPath, fmt.Errorf("failed to deserialize resource, please check your spec.resource, err %v", err)))
}
return
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/webhook/types"
)
//+kubebuilder:webhook:path=/validate-apps-kruise-io-v1alpha1-resourcedistribution,mutating=false,failurePolicy=fail,sideEffects=None,admissionReviewVersions=v1;v1beta1,groups=apps.kruise.io,resources=resourcedistributions,verbs=create;update,versions=v1alpha1,name=vresourcedistribution.kb.io
var (
// HandlerGetterMap contains admission webhook handlers
HandlerGetterMap = map[string]types.HandlerGetter{
"validate-apps-kruise-io-v1alpha1-resourcedistribution": func(mgr manager.Manager) admission.Handler {
return &ResourceDistributionCreateUpdateHandler{
Client: mgr.GetClient(),
Decoder: admission.NewDecoder(mgr.GetScheme()),
}
},
}
)
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"context"
"fmt"
"net/http"
"reflect"
"regexp"
"strings"
"k8s.io/apimachinery/pkg/util/intstr"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/control/sidecarcontrol"
"github.com/openkruise/kruise/pkg/util"
webhookutil "github.com/openkruise/kruise/pkg/webhook/util"
admissionv1 "k8s.io/api/admission/v1"
v1 "k8s.io/api/core/v1"
genericvalidation "k8s.io/apimachinery/pkg/api/validation"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metavalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/util/sets"
validationutil "k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field"
appsvalidation "k8s.io/kubernetes/pkg/apis/apps/validation"
"k8s.io/kubernetes/pkg/apis/core"
corev1 "k8s.io/kubernetes/pkg/apis/core/v1"
corevalidation "k8s.io/kubernetes/pkg/apis/core/validation"
"k8s.io/kubernetes/pkg/fieldpath"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
const (
sidecarSetNameMaxLen = 63
)
var validDownwardAPIFieldPathExpressions = sets.NewString(
"metadata.name",
"metadata.labels",
"metadata.annotations")
var (
validateSidecarSetNameMsg = "sidecarset name must consist of alphanumeric characters or '-'"
validateSidecarSetNameRegex = regexp.MustCompile(validSidecarSetNameFmt)
validSidecarSetNameFmt = `^[a-zA-Z0-9\-]+$`
)
// SidecarSetCreateUpdateHandler handles SidecarSet
type SidecarSetCreateUpdateHandler struct {
// To use the client, you need to do the following:
// - uncomment it
// - import sigs.k8s.io/controller-runtime/pkg/client
// - uncomment the InjectClient method at the bottom of this file.
Client client.Client
// Decoder decodes objects
Decoder admission.Decoder
}
func (h *SidecarSetCreateUpdateHandler) validatingSidecarSetFn(_ context.Context, obj *appsv1alpha1.SidecarSet, older *appsv1alpha1.SidecarSet) (bool, string, error) {
allErrs := h.validateSidecarSet(obj, older)
if len(allErrs) != 0 {
return false, "", allErrs.ToAggregate()
}
return true, "allowed to be admitted", nil
}
func (h *SidecarSetCreateUpdateHandler) validateSidecarSet(obj *appsv1alpha1.SidecarSet, older *appsv1alpha1.SidecarSet) field.ErrorList {
// validating ObjectMeta
allErrs := genericvalidation.ValidateObjectMeta(&obj.ObjectMeta, false, validateSidecarSetName, field.NewPath("metadata"))
// validating spec
allErrs = append(allErrs, h.validateSidecarSetSpec(obj, field.NewPath("spec"))...)
// when operation is update, older isn't empty, and validating whether old and new containers conflict
if older != nil {
allErrs = append(allErrs, validateSidecarContainerConflict(obj.Spec.Containers, older.Spec.Containers, field.NewPath("spec.containers"))...)
}
if len(allErrs) != 0 {
return allErrs
}
// iterate across all containers in other sidecarsets to avoid duplication of name
sidecarSets := &appsv1alpha1.SidecarSetList{}
if err := h.Client.List(context.TODO(), sidecarSets, &client.ListOptions{}); err != nil {
allErrs = append(allErrs, field.InternalError(field.NewPath(""), fmt.Errorf("query other sidecarsets failed, err: %v", err)))
}
allErrs = append(allErrs, validateSidecarConflict(h.Client, sidecarSets, obj, field.NewPath("spec"))...)
return allErrs
}
func validateSidecarSetName(name string, _ bool) (allErrs []string) {
if !validateSidecarSetNameRegex.MatchString(name) {
allErrs = append(allErrs, validationutil.RegexError(validateSidecarSetNameMsg, validSidecarSetNameFmt, "example-com"))
}
if len(name) > sidecarSetNameMaxLen {
allErrs = append(allErrs, validationutil.MaxLenError(sidecarSetNameMaxLen))
}
return allErrs
}
func (h *SidecarSetCreateUpdateHandler) validateSidecarSetSpec(obj *appsv1alpha1.SidecarSet, fldPath *field.Path) field.ErrorList {
spec := &obj.Spec
allErrs := field.ErrorList{}
// currently when initContainer restartPolicy = Always, kruise don't support in-place update
for _, c := range obj.Spec.InitContainers {
if sidecarcontrol.IsSidecarContainer(c.Container) && obj.Spec.UpdateStrategy.Type == appsv1alpha1.RollingUpdateSidecarSetStrategyType {
allErrs = append(allErrs, field.Required(fldPath.Child("updateStrategy"), "The initContainer in-place upgrade is not currently supported."))
}
}
//validate spec selector
if spec.Selector == nil {
allErrs = append(allErrs, field.Required(fldPath.Child("selector"), "no selector defined for SidecarSet"))
} else {
allErrs = append(allErrs, validateSelector(spec.Selector, fldPath.Child("selector"))...)
}
if spec.Namespace != "" && spec.NamespaceSelector != nil {
allErrs = append(allErrs, field.Required(fldPath.Child("namespace, namespaceSelector"), "namespace and namespaceSelector are mutually exclusive"))
} else if spec.NamespaceSelector != nil {
allErrs = append(allErrs, validateSelector(spec.NamespaceSelector, fldPath.Child("namespaceSelector"))...)
}
//validating SidecarSetInjectionStrategy
allErrs = append(allErrs, h.validateSidecarSetInjectionStrategy(obj, fldPath.Child("injectionStrategy"))...)
//validating SidecarSetUpdateStrategy
allErrs = append(allErrs, validateSidecarSetUpdateStrategy(&spec.UpdateStrategy, fldPath.Child("updateStrategy"))...)
//validating volumes
vols, vErrs := getCoreVolumes(spec.Volumes, fldPath.Child("volumes"))
allErrs = append(allErrs, vErrs...)
//validating sidecar container
// if don't have any initContainers, containers
if len(spec.InitContainers) == 0 && len(spec.Containers) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Root(), "no initContainer or container defined for SidecarSet"))
} else {
allErrs = append(allErrs, validateContainersForSidecarSet(spec.InitContainers, spec.Containers, vols, fldPath.Root())...)
}
// validating metadata
annotationKeys := sets.NewString()
if err := sidecarcontrol.ValidateSidecarSetPatchMetadataWhitelist(h.Client, obj); err != nil {
allErrs = append(allErrs, field.Required(fldPath.Child("patchPodMetadata"), err.Error()))
}
for _, patch := range spec.PatchPodMetadata {
if len(patch.Annotations) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("patchPodMetadata"), "no annotations defined for SidecarSet"))
} else {
metadata := metav1.ObjectMeta{Annotations: patch.Annotations, Name: "fake-name"}
allErrs = append(allErrs, genericvalidation.ValidateObjectMeta(&metadata, false, validateSidecarSetName, field.NewPath("patchPodMetadata"))...)
}
if patch.PatchPolicy == "" {
allErrs = append(allErrs, field.Required(fldPath.Child("patchPodMetadata"), "no patchPolicy defined for patchPodMetadata"))
}
for k := range patch.Annotations {
if annotationKeys.Has(k) {
allErrs = append(allErrs, field.Required(fldPath.Child("patchPodMetadata"), fmt.Sprintf("patch annotation[%s] already exist", k)))
} else {
annotationKeys.Insert(k)
}
}
}
return allErrs
}
func validateSelector(selector *metav1.LabelSelector, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, metavalidation.ValidateLabelSelector(selector,
metavalidation.LabelSelectorValidationOptions{}, fldPath)...)
if len(selector.MatchLabels)+len(selector.MatchExpressions) == 0 {
allErrs = append(allErrs, field.Invalid(fldPath, selector, "empty selector is not valid for sidecarset."))
}
_, err := metav1.LabelSelectorAsSelector(selector)
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("selector"), selector, ""))
}
return allErrs
}
func (h *SidecarSetCreateUpdateHandler) validateSidecarSetInjectionStrategy(obj *appsv1alpha1.SidecarSet, _ *field.Path) field.ErrorList {
errList := field.ErrorList{}
revisionInfo := obj.Spec.InjectionStrategy.Revision
if revisionInfo != nil {
switch {
case revisionInfo.RevisionName == nil && revisionInfo.CustomVersion == nil:
errList = append(errList, field.Invalid(field.NewPath("revision"), revisionInfo, "revisionName and customVersion cannot be empty simultaneously"))
default:
revision, err := sidecarcontrol.NewHistoryControl(h.Client).GetHistorySidecarSet(obj, revisionInfo)
if err != nil || revision == nil {
errList = append(errList, field.Invalid(field.NewPath("revision"), revision, fmt.Sprintf("Cannot find specific ControllerRevision, err: %v", err)))
}
}
switch revisionInfo.Policy {
case "", appsv1alpha1.AlwaysSidecarSetInjectRevisionPolicy, appsv1alpha1.PartialSidecarSetInjectRevisionPolicy:
default:
errList = append(errList, field.Invalid(field.NewPath("revision").Child("policy"), revisionInfo, fmt.Sprintf("Invalid policy %v, supported: [%s, %s]",
revisionInfo.Policy, appsv1alpha1.AlwaysSidecarSetInjectRevisionPolicy, appsv1alpha1.PartialSidecarSetInjectRevisionPolicy)))
}
}
return errList
}
// intStrIsSet returns true when the intstr is not nil and not the default 0 value.
func intStrIsSet(i *intstr.IntOrString) bool {
if i == nil {
return false
}
if i.Type == intstr.String {
return true
}
return i.IntVal != 0
}
func validateSidecarSetUpdateStrategy(strategy *appsv1alpha1.SidecarSetUpdateStrategy, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
// if SidecarSet update strategy is RollingUpdate
if strategy.Type == appsv1alpha1.RollingUpdateSidecarSetStrategyType {
if intStrIsSet(strategy.Partition) && strategy.Selector != nil {
allErrs = append(allErrs, field.Invalid(field.NewPath("updateStrategy"), fmt.Sprintf("%++v", strategy), "Partition and Selector cannot be used together"))
}
if strategy.Selector != nil {
allErrs = append(allErrs, validateSelector(strategy.Selector, fldPath.Child("selector"))...)
}
if strategy.Partition != nil {
allErrs = append(allErrs, appsvalidation.ValidatePositiveIntOrPercent(*(strategy.Partition), fldPath.Child("partition"))...)
}
if strategy.MaxUnavailable != nil {
allErrs = append(allErrs, appsvalidation.ValidatePositiveIntOrPercent(*(strategy.MaxUnavailable), fldPath.Child("maxUnavailable"))...)
}
if err := strategy.PriorityStrategy.FieldsValidation(); err != nil {
allErrs = append(allErrs, field.Required(fldPath.Child("priorityStrategy"), err.Error()))
}
if strategy.ScatterStrategy != nil {
if err := strategy.ScatterStrategy.FieldsValidation(); err != nil {
allErrs = append(allErrs, field.Required(fldPath.Child("scatterStrategy"), err.Error()))
}
}
}
return allErrs
}
func validateContainersForSidecarSet(
initContainers, containers []appsv1alpha1.SidecarContainer,
coreVolumes []core.Volume, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
//validating initContainer
var coreInitContainers []core.Container
for _, container := range initContainers {
coreContainer := core.Container{}
if err := corev1.Convert_v1_Container_To_core_Container(&container.Container, &coreContainer, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("initContainer"), container.Container, fmt.Sprintf("Convert_v1_Container_To_core_Container failed: %v", err)))
return allErrs
}
coreInitContainers = append(coreInitContainers, coreContainer)
}
//validating container
var coreContainers []core.Container
for i, container := range containers {
idxPath := fldPath.Index(i)
if container.PodInjectPolicy != appsv1alpha1.BeforeAppContainerType && container.PodInjectPolicy != appsv1alpha1.AfterAppContainerType {
allErrs = append(allErrs, field.Invalid(fldPath.Child("container").Child("podInjectPolicy"), container.PodInjectPolicy, "unsupported pod inject policy"))
}
if container.ShareVolumePolicy.Type != appsv1alpha1.ShareVolumePolicyEnabled && container.ShareVolumePolicy.Type != appsv1alpha1.ShareVolumePolicyDisabled {
allErrs = append(allErrs, field.Invalid(fldPath.Child("container").Child("shareVolumePolicy"), container.ShareVolumePolicy, "unsupported share volume policy"))
}
allErrs = append(allErrs, validateDownwardAPI(container.TransferEnv, idxPath.Child("transferEnv"))...)
coreContainer := core.Container{}
if err := corev1.Convert_v1_Container_To_core_Container(&container.Container, &coreContainer, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("container"), container.Container, fmt.Sprintf("Convert_v1_Container_To_core_Container failed: %v", err)))
return allErrs
}
coreContainers = append(coreContainers, coreContainer)
}
// hack, use fakePod to reuse unexported 'validateContainers' function
var fakePod *core.Pod
if len(coreContainers) == 0 {
// hack, the ValidatePod requires containers, so create a fake coreContainer
coreContainers = []core.Container{
{
Name: "test",
Image: "busybox",
ImagePullPolicy: core.PullIfNotPresent,
TerminationMessagePolicy: core.TerminationMessageReadFile,
},
}
}
fakePod = &core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"},
Spec: core.PodSpec{
DNSPolicy: core.DNSClusterFirst,
RestartPolicy: core.RestartPolicyAlways,
InitContainers: coreInitContainers,
Containers: coreContainers,
Volumes: coreVolumes,
ServiceAccountName: "default",
},
}
allErrs = append(allErrs, corevalidation.ValidatePodCreate(fakePod, webhookutil.DefaultPodValidationOptions)...)
return allErrs
}
func validateSidecarContainerConflict(newContainers, oldContainers []appsv1alpha1.SidecarContainer, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
oldStrategy := make(map[string]appsv1alpha1.SidecarContainerUpgradeType)
for _, container := range oldContainers {
oldStrategy[container.Name] = container.UpgradeStrategy.UpgradeType
}
for _, container := range newContainers {
if strategy, ok := oldStrategy[container.Name]; ok {
if strategy != "" && container.UpgradeStrategy.UpgradeType != strategy {
allErrs = append(allErrs, field.Invalid(fldPath.Child("upgradeStrategy").Child("upgradeType"),
container.Name, fmt.Sprintf("container %v upgradeType is immutable", container.Name)))
}
} else {
allErrs = append(allErrs, field.Invalid(fldPath.Child("name"),
container.Name, fmt.Sprintf("container %v is not found", container.Name)))
}
}
return allErrs
}
// validate the sidecarset spec.container.name, spec.initContainer.name, volume.name conflicts with others in cluster
func validateSidecarConflict(c client.Client, sidecarSets *appsv1alpha1.SidecarSetList, sidecarSet *appsv1alpha1.SidecarSet, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
// record initContainer, container, volume name of other sidecarsets in cluster
// container name -> sidecarset
containerInOthers := make(map[string]*appsv1alpha1.SidecarSet)
// volume name -> sidecarset
volumeInOthers := make(map[string]*appsv1alpha1.SidecarSet)
// init container name -> sidecarset
initContainerInOthers := make(map[string]*appsv1alpha1.SidecarSet)
// patch pod annotation key -> sidecarset.Name#patchPolicy
annotationsInOthers := make(map[string]string)
matchedList := make([]*appsv1alpha1.SidecarSet, 0)
for i := range sidecarSets.Items {
obj := &sidecarSets.Items[i]
if isSidecarSetNamespaceOverlapping(c, sidecarSet, obj) && util.IsSelectorOverlapping(sidecarSet.Spec.Selector, obj.Spec.Selector) {
matchedList = append(matchedList, obj)
}
}
for _, set := range matchedList {
//ignore this sidecarset
if set.Name == sidecarSet.Name {
continue
}
for _, container := range set.Spec.InitContainers {
initContainerInOthers[container.Name] = set
}
for _, container := range set.Spec.Containers {
containerInOthers[container.Name] = set
}
for _, volume := range set.Spec.Volumes {
volumeInOthers[volume.Name] = set
}
for _, patch := range set.Spec.PatchPodMetadata {
if patch.PatchPolicy == appsv1alpha1.SidecarSetRetainPatchPolicy {
continue
}
for key := range patch.Annotations {
annotationsInOthers[key] = fmt.Sprintf("%s#%s", set.Name, patch.PatchPolicy)
}
}
}
// whether initContainers conflict
for _, container := range sidecarSet.Spec.InitContainers {
if other, ok := initContainerInOthers[container.Name]; ok {
allErrs = append(allErrs, field.Invalid(fldPath.Child("containers"), container.Name, fmt.Sprintf(
"container %v already exist in %v", container.Name, other.Name)))
}
}
// whether containers conflict
for _, container := range sidecarSet.Spec.Containers {
if other, ok := containerInOthers[container.Name]; ok {
allErrs = append(allErrs, field.Invalid(fldPath.Child("containers"), container.Name, fmt.Sprintf(
"container %v already exist in %v", container.Name, other.Name)))
}
}
// whether volumes conflict
for _, volume := range sidecarSet.Spec.Volumes {
if other, ok := volumeInOthers[volume.Name]; ok {
if !reflect.DeepEqual(&volume, getSidecarsetVolume(volume.Name, other)) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("volumes"), volume.Name, fmt.Sprintf(
"volume %s is in conflict with sidecarset %s", volume.Name, other.Name)))
}
}
}
// whether pod metadata conflict
for _, patch := range sidecarSet.Spec.PatchPodMetadata {
if patch.PatchPolicy == appsv1alpha1.SidecarSetRetainPatchPolicy {
continue
}
for key := range patch.Annotations {
other, ok := annotationsInOthers[key]
if !ok {
continue
}
slice := strings.Split(other, "#")
if patch.PatchPolicy == appsv1alpha1.SidecarSetOverwritePatchPolicy || appsv1alpha1.SidecarSetPatchPolicyType(slice[1]) == appsv1alpha1.SidecarSetOverwritePatchPolicy {
allErrs = append(allErrs, field.Invalid(fldPath.Child("patchPodMetadata"), key, fmt.Sprintf("annotation %s is in conflict with sidecarset %s", key, slice[0])))
}
}
}
return allErrs
}
func getSidecarsetVolume(volumeName string, sidecarset *appsv1alpha1.SidecarSet) *v1.Volume {
for _, volume := range sidecarset.Spec.Volumes {
if volume.Name == volumeName {
return &volume
}
}
return nil
}
func validateDownwardAPI(envs []appsv1alpha1.TransferEnvVar, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
for _, tEnv := range envs {
if tEnv.SourceContainerNameFrom != nil && tEnv.SourceContainerNameFrom.FieldRef != nil {
allErrs = append(allErrs, validateObjectFieldSelector(tEnv.SourceContainerNameFrom.FieldRef, &validDownwardAPIFieldPathExpressions, fldPath.Child("fieldRef"))...)
}
}
return allErrs
}
func validateObjectFieldSelector(fs *v1.ObjectFieldSelector, expressions *sets.String, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if len(fs.APIVersion) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("apiVersion"), ""))
return allErrs
}
if len(fs.FieldPath) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("fieldPath"), ""))
return allErrs
}
internalFieldPath, _, err := sidecarcontrol.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("fieldPath"), fs.FieldPath, fmt.Sprintf("error converting fieldPath: %v", err)))
return allErrs
}
if path, subscript, ok := fieldpath.SplitMaybeSubscriptedPath(internalFieldPath); ok {
switch path {
case "metadata.annotations":
for _, msg := range validationutil.IsQualifiedName(strings.ToLower(subscript)) {
allErrs = append(allErrs, field.Invalid(fldPath, subscript, msg))
}
case "metadata.labels":
for _, msg := range validationutil.IsQualifiedName(subscript) {
allErrs = append(allErrs, field.Invalid(fldPath, subscript, msg))
}
default:
allErrs = append(allErrs, field.Invalid(fldPath, path, "does not support subscript"))
}
} else if !expressions.Has(path) {
allErrs = append(allErrs, field.NotSupported(fldPath.Child("fieldPath"), path, expressions.List()))
return allErrs
}
return allErrs
}
var _ admission.Handler = &SidecarSetCreateUpdateHandler{}
// Handle handles admission requests.
func (h *SidecarSetCreateUpdateHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
obj := &appsv1alpha1.SidecarSet{}
err := h.Decoder.Decode(req, obj)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
var oldSidecarSet *appsv1alpha1.SidecarSet
//when Operation is update, decode older object
if req.AdmissionRequest.Operation == admissionv1.Update {
oldSidecarSet = new(appsv1alpha1.SidecarSet)
if err := h.Decoder.Decode(
admission.Request{AdmissionRequest: admissionv1.AdmissionRequest{Object: req.AdmissionRequest.OldObject}},
oldSidecarSet); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
}
allowed, reason, err := h.validatingSidecarSetFn(ctx, obj, oldSidecarSet)
if err != nil {
return admission.Errored(http.StatusInternalServerError, err)
}
return admission.ValidationResponse(allowed, reason)
}
/*
Copyright 2020 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/kubernetes/pkg/apis/core"
corev1 "k8s.io/kubernetes/pkg/apis/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/control/sidecarcontrol"
"github.com/openkruise/kruise/pkg/util"
)
func getCoreVolumes(volumes []v1.Volume, fldPath *field.Path) ([]core.Volume, field.ErrorList) {
allErrs := field.ErrorList{}
var coreVolumes []core.Volume
for _, volume := range volumes {
coreVolume := core.Volume{}
if err := corev1.Convert_v1_Volume_To_core_Volume(&volume, &coreVolume, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Root(), volume, fmt.Sprintf("Convert_v1_Volume_To_core_Volume failed: %v", err)))
return nil, allErrs
}
coreVolumes = append(coreVolumes, coreVolume)
}
return coreVolumes, allErrs
}
func isSidecarSetNamespaceOverlapping(c client.Client, origin *appsv1alpha1.SidecarSet, other *appsv1alpha1.SidecarSet) bool {
originNamespace := origin.Spec.Namespace
otherNamespace := other.Spec.Namespace
if originNamespace != "" && otherNamespace != "" && originNamespace != otherNamespace {
return false
}
originSelector := origin.Spec.NamespaceSelector
otherSelector := other.Spec.NamespaceSelector
if originSelector != nil && otherSelector != nil && !util.IsSelectorOverlapping(originSelector, otherSelector) {
return false
}
if originNamespace != "" && otherSelector != nil && !sidecarcontrol.IsSelectorNamespace(c, originNamespace, otherSelector) {
return false
}
if otherNamespace != "" && originSelector != nil && !sidecarcontrol.IsSelectorNamespace(c, otherNamespace, otherSelector) {
return false
}
return true
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/webhook/types"
)
// +kubebuilder:webhook:path=/validate-apps-kruise-io-v1alpha1-sidecarset,mutating=false,failurePolicy=fail,sideEffects=None,admissionReviewVersions=v1;v1beta1,groups=apps.kruise.io,resources=sidecarsets,verbs=create;update,versions=v1alpha1,name=vsidecarset.kb.io
var (
// HandlerGetterMap contains admission webhook handlers
HandlerGetterMap = map[string]types.HandlerGetter{
"validate-apps-kruise-io-v1alpha1-sidecarset": func(mgr manager.Manager) admission.Handler {
return &SidecarSetCreateUpdateHandler{
Client: mgr.GetClient(),
Decoder: admission.NewDecoder(mgr.GetScheme()),
}
},
}
)
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"context"
"fmt"
"net/http"
admissionv1 "k8s.io/api/admission/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/util"
"github.com/openkruise/kruise/pkg/webhook/util/deletionprotection"
)
// UnitedDeploymentCreateUpdateHandler handles UnitedDeployment
type UnitedDeploymentCreateUpdateHandler struct {
// To use the client, you need to do the following:
// - uncomment it
// - import sigs.k8s.io/controller-runtime/pkg/client
// - uncomment the InjectClient method at the bottom of this file.
// Client client.Client
// Decoder decodes objects
Decoder admission.Decoder
}
var _ admission.Handler = &UnitedDeploymentCreateUpdateHandler{}
// Handle handles admission requests.
func (h *UnitedDeploymentCreateUpdateHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
obj := &appsv1alpha1.UnitedDeployment{}
oldObj := &appsv1alpha1.UnitedDeployment{}
switch req.AdmissionRequest.Operation {
case admissionv1.Create:
if err := h.Decoder.Decode(req, obj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
if allErrs := validateUnitedDeployment(obj); len(allErrs) > 0 {
return admission.Errored(http.StatusUnprocessableEntity, allErrs.ToAggregate())
}
case admissionv1.Update:
if err := h.Decoder.Decode(req, obj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
if err := h.Decoder.DecodeRaw(req.AdmissionRequest.OldObject, oldObj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
validationErrorList := validateUnitedDeployment(obj)
updateErrorList := ValidateUnitedDeploymentUpdate(obj, oldObj)
if allErrs := append(validationErrorList, updateErrorList...); len(allErrs) > 0 {
return admission.Errored(http.StatusUnprocessableEntity, allErrs.ToAggregate())
}
case admissionv1.Delete:
if len(req.OldObject.Raw) == 0 {
klog.InfoS("Skip to validate UnitedDeployment deletion for no old object, maybe because of Kubernetes version < 1.16", "namespace", req.Namespace, "name", req.Name)
return admission.ValidationResponse(true, "")
}
if err := h.Decoder.DecodeRaw(req.AdmissionRequest.OldObject, oldObj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
if err := deletionprotection.ValidateWorkloadDeletion(oldObj, oldObj.Spec.Replicas); err != nil {
deletionprotection.WorkloadDeletionProtectionMetrics.WithLabelValues(fmt.Sprintf("%s_%s_%s", req.Kind.Kind, oldObj.GetNamespace(), oldObj.GetName()), req.UserInfo.Username).Add(1)
util.LoggerProtectionInfo(util.ProtectionEventDeletionProtection, req.Kind.Kind, oldObj.GetNamespace(), oldObj.GetName(), req.UserInfo.Username)
return admission.Errored(http.StatusForbidden, err)
}
}
return admission.ValidationResponse(true, "")
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"fmt"
"strings"
appsv1 "k8s.io/api/apps/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
appsvalidation "k8s.io/kubernetes/pkg/apis/apps/validation"
"k8s.io/kubernetes/pkg/apis/core"
corev1 "k8s.io/kubernetes/pkg/apis/core/v1"
apivalidation "k8s.io/kubernetes/pkg/apis/core/validation"
"k8s.io/utils/pointer"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
udctrl "github.com/openkruise/kruise/pkg/controller/uniteddeployment"
webhookutil "github.com/openkruise/kruise/pkg/webhook/util"
"github.com/openkruise/kruise/pkg/webhook/util/convertor"
)
// validateUnitedDeploymentSpec tests if required fields in the UnitedDeployment spec are set.
func validateUnitedDeploymentSpec(spec *appsv1alpha1.UnitedDeploymentSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if spec.Replicas != nil {
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(*spec.Replicas), fldPath.Child("replicas"))...)
}
if spec.Selector == nil {
allErrs = append(allErrs, field.Required(fldPath.Child("selector"), ""))
} else {
allErrs = append(allErrs, unversionedvalidation.ValidateLabelSelector(spec.Selector, unversionedvalidation.LabelSelectorValidationOptions{}, fldPath.Child("selector"))...)
if len(spec.Selector.MatchLabels)+len(spec.Selector.MatchExpressions) == 0 {
allErrs = append(allErrs, field.Invalid(fldPath.Child("selector"), spec.Selector, "empty selector is invalid for statefulset"))
}
}
selector, err := metav1.LabelSelectorAsSelector(spec.Selector)
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("selector"), spec.Selector, ""))
} else {
allErrs = append(allErrs, validateSubsetTemplate(&spec.Template, selector, fldPath.Child("template"))...)
}
allErrs = append(allErrs, validateSubsetReplicas(spec.Replicas, spec.Topology.Subsets, fldPath.Child("topology", "subsets"))...)
subSetNames := sets.String{}
for i, subset := range spec.Topology.Subsets {
if len(subset.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("topology", "subsets").Index(i).Child("name"), ""))
}
if subSetNames.Has(subset.Name) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("topology", "subsets").Index(i).Child("name"), subset.Name, fmt.Sprintf("duplicated subset name %s", subset.Name)))
}
subSetNames.Insert(subset.Name)
if errs := apimachineryvalidation.NameIsDNSLabel(subset.Name, false); len(errs) > 0 {
allErrs = append(allErrs, field.Invalid(fldPath.Child("topology", "subsets").Index(i).Child("name"), subset.Name, fmt.Sprintf("invalid subset name %s", strings.Join(errs, ", "))))
}
coreNodeSelectorTerm := &core.NodeSelectorTerm{}
if err := corev1.Convert_v1_NodeSelectorTerm_To_core_NodeSelectorTerm(subset.NodeSelectorTerm.DeepCopy(), coreNodeSelectorTerm, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("topology", "subsets").Index(i).Child("nodeSelectorTerm"), subset.NodeSelectorTerm, fmt.Sprintf("Convert_v1_NodeSelectorTerm_To_core_NodeSelectorTerm failed: %v", err)))
} else {
allErrs = append(allErrs, apivalidation.ValidateNodeSelectorTerm(*coreNodeSelectorTerm, fldPath.Child("topology", "subsets").Index(i).Child("nodeSelectorTerm"))...)
}
if subset.Tolerations != nil {
var coreTolerations []core.Toleration
for i, toleration := range subset.Tolerations {
coreToleration := &core.Toleration{}
if err := corev1.Convert_v1_Toleration_To_core_Toleration(&toleration, coreToleration, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("topology", "subsets").Index(i).Child("tolerations"), subset.Tolerations, fmt.Sprintf("Convert_v1_Toleration_To_core_Toleration failed: %v", err)))
} else {
coreTolerations = append(coreTolerations, *coreToleration)
}
}
allErrs = append(allErrs, apivalidation.ValidateTolerations(coreTolerations, fldPath.Child("topology", "subsets").Index(i).Child("tolerations"))...)
}
if subset.Replicas == nil {
continue
}
}
if spec.UpdateStrategy.ManualUpdate != nil {
for subset := range spec.UpdateStrategy.ManualUpdate.Partitions {
if !subSetNames.Has(subset) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("updateStrategy", "partitions"), spec.UpdateStrategy.ManualUpdate.Partitions, fmt.Sprintf("subset %s does not exist", subset)))
}
}
}
return allErrs
}
func validateSubsetReplicas(expectedReplicas *int32, subsets []appsv1alpha1.Subset, fldPath *field.Path) field.ErrorList {
var (
sumReplicas = int64(0)
sumMinReplicas = int64(0)
sumMaxReplicas = int64(0)
countReplicas = 0
countMaxReplicas = 0
hasReplicasSettings = false
hasCapacitySettings = false
err error
errList field.ErrorList
)
if expectedReplicas == nil {
expectedReplicas = pointer.Int32(-1)
}
for i, subset := range subsets {
replicas := int32(0)
if subset.Replicas != nil {
countReplicas++
hasReplicasSettings = true
replicas, err = udctrl.ParseSubsetReplicas(*expectedReplicas, *subset.Replicas)
if err != nil {
errList = append(errList, field.Invalid(fldPath.Index(i).Child("replicas"), subset.Replicas, err.Error()))
}
}
sumReplicas += int64(replicas)
minReplicas := int32(0)
if subset.MinReplicas != nil {
hasCapacitySettings = true
minReplicas, err = udctrl.ParseSubsetReplicas(*expectedReplicas, *subset.MinReplicas)
if err != nil {
errList = append(errList, field.Invalid(fldPath.Index(i).Child("minReplicas"), subset.MaxReplicas, err.Error()))
}
}
sumMinReplicas += int64(minReplicas)
maxReplicas := int32(1000000)
if subset.MaxReplicas != nil {
countMaxReplicas++
hasCapacitySettings = true
maxReplicas, err = udctrl.ParseSubsetReplicas(*expectedReplicas, *subset.MaxReplicas)
if err != nil {
errList = append(errList, field.Invalid(fldPath.Index(i).Child("minReplicas"), subset.MaxReplicas, err.Error()))
}
}
sumMaxReplicas += int64(maxReplicas)
if minReplicas > maxReplicas {
errList = append(errList, field.Invalid(fldPath.Index(i).Child("minReplicas"), subset.MaxReplicas,
fmt.Sprintf("subset[%d].minReplicas must be more than or equal to maxReplicas", i)))
}
}
if hasReplicasSettings && hasCapacitySettings {
errList = append(errList, field.Invalid(fldPath, subsets, "subset.Replicas and subset.MinReplicas/subset.MaxReplicas are mutually exclusive in a UnitedDeployment"))
return errList
}
if hasCapacitySettings {
if *expectedReplicas == -1 {
errList = append(errList, field.Invalid(fldPath, expectedReplicas, "spec.replicas must be not empty if you set subset.minReplicas/maxReplicas"))
}
if countMaxReplicas >= len(subsets) {
errList = append(errList, field.Invalid(fldPath, countMaxReplicas, "at least one subset.maxReplicas must be empty"))
}
if sumMinReplicas > sumMaxReplicas {
errList = append(errList, field.Invalid(fldPath, sumMinReplicas, "sum of indicated subset.minReplicas should not be greater than sum of indicated subset.maxReplicas"))
}
} else {
if *expectedReplicas != -1 {
// sum of subset replicas may be less than uniteddployment replicas
if sumReplicas > int64(*expectedReplicas) {
errList = append(errList, field.Invalid(fldPath, sumReplicas, fmt.Sprintf("sum of indicated subset replicas %d should not be greater than UnitedDeployment replicas %d", sumReplicas, expectedReplicas)))
}
if countReplicas > 0 && countReplicas == len(subsets) && sumReplicas != int64(*expectedReplicas) {
errList = append(errList, field.Invalid(fldPath, sumReplicas, fmt.Sprintf("if replicas of all subsets are provided, the sum of indicated subset replicas %d should equal UnitedDeployment replicas %d", sumReplicas, expectedReplicas)))
}
} else if countReplicas != len(subsets) {
// validate all of subsets replicas are not nil
errList = append(errList, field.Invalid(fldPath, sumReplicas, "if UnitedDeployment replicas is not provided, replicas of all subsets should be provided"))
}
}
return errList
}
// validateUnitedDeployment validates a UnitedDeployment.
func validateUnitedDeployment(unitedDeployment *appsv1alpha1.UnitedDeployment) field.ErrorList {
allErrs := apivalidation.ValidateObjectMeta(&unitedDeployment.ObjectMeta, true, apimachineryvalidation.NameIsDNSSubdomain, field.NewPath("metadata"))
allErrs = append(allErrs, validateUnitedDeploymentSpec(&unitedDeployment.Spec, field.NewPath("spec"))...)
return allErrs
}
// ValidateUnitedDeploymentUpdate tests if required fields in the UnitedDeployment are set.
func ValidateUnitedDeploymentUpdate(unitedDeployment, oldUnitedDeployment *appsv1alpha1.UnitedDeployment) field.ErrorList {
allErrs := apivalidation.ValidateObjectMetaUpdate(&unitedDeployment.ObjectMeta, &oldUnitedDeployment.ObjectMeta, field.NewPath("metadata"))
allErrs = append(allErrs, validateUnitedDeploymentSpecUpdate(&unitedDeployment.Spec, &oldUnitedDeployment.Spec, field.NewPath("spec"))...)
if unitedDeployment.Spec.Replicas != nil {
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(*unitedDeployment.Spec.Replicas), field.NewPath("spec", "replicas"))...)
}
return allErrs
}
func validateUnitedDeploymentSpecUpdate(spec, oldSpec *appsv1alpha1.UnitedDeploymentSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, validateSubsetTemplateUpdate(&spec.Template, &oldSpec.Template, fldPath.Child("template"))...)
allErrs = append(allErrs, validateUnitedDeploymentTopology(&spec.Topology, &oldSpec.Topology, fldPath.Child("topology"))...)
return allErrs
}
func validateUnitedDeploymentTopology(topology, oldTopology *appsv1alpha1.Topology, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if topology == nil || oldTopology == nil {
return allErrs
}
oldSubsets := map[string]*appsv1alpha1.Subset{}
for i, subset := range oldTopology.Subsets {
oldSubsets[subset.Name] = &oldTopology.Subsets[i]
}
for i, subset := range topology.Subsets {
if oldSubset, exist := oldSubsets[subset.Name]; exist {
if !apiequality.Semantic.DeepEqual(oldSubset.NodeSelectorTerm, subset.NodeSelectorTerm) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("subsets").Index(i).Child("nodeSelectorTerm"), "may not be changed in an update"))
}
if !apiequality.Semantic.DeepEqual(oldSubset.Tolerations, subset.Tolerations) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("subsets").Index(i).Child("tolerations"), "may not be changed in an update"))
}
}
}
for i, subset := range topology.Subsets {
if oldSubset, exist := oldSubsets[subset.Name]; exist {
if !apiequality.Semantic.DeepEqual(oldSubset.Tolerations, subset.Tolerations) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("subsets").Index(i).Child("tolerations"), "may not be changed in an update"))
}
}
}
return allErrs
}
func validateSubsetTemplateUpdate(template, oldTemplate *appsv1alpha1.SubsetTemplate, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if template.StatefulSetTemplate != nil && oldTemplate.StatefulSetTemplate != nil {
allErrs = append(allErrs, validateStatefulSetUpdate(template.StatefulSetTemplate, oldTemplate.StatefulSetTemplate, fldPath.Child("statefulSetTemplate"))...)
} else if template.AdvancedStatefulSetTemplate != nil && oldTemplate.AdvancedStatefulSetTemplate != nil {
allErrs = append(allErrs, validateAdvancedStatefulSetUpdate(template.AdvancedStatefulSetTemplate, oldTemplate.AdvancedStatefulSetTemplate, fldPath.Child("advancedStatefulSetTemplate"))...)
} else if template.DeploymentTemplate != nil && oldTemplate.DeploymentTemplate != nil {
allErrs = append(allErrs, validateDeploymentUpdate(template.DeploymentTemplate, oldTemplate.DeploymentTemplate, fldPath.Child("deploymentTemplate"))...)
}
return allErrs
}
func validateSubsetTemplate(template *appsv1alpha1.SubsetTemplate, selector labels.Selector, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
var templateCount int
if template.StatefulSetTemplate != nil {
templateCount++
}
if template.AdvancedStatefulSetTemplate != nil {
templateCount++
}
if template.CloneSetTemplate != nil {
templateCount++
}
if template.DeploymentTemplate != nil {
templateCount++
}
if templateCount < 1 {
allErrs = append(allErrs, field.Required(fldPath, "should provide one of statefulSetTemplate, advancedStatefulSetTemplate, cloneSetTemplate, or deploymentTemplate"))
} else if templateCount > 1 {
allErrs = append(allErrs, field.Invalid(fldPath, template, "should provide only one of statefulSetTemplate, advancedStatefulSetTemplate, cloneSetTemplate, or deploymentTemplate"))
}
if template.StatefulSetTemplate != nil {
labels := labels.Set(template.StatefulSetTemplate.Labels)
if !selector.Matches(labels) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("statefulSetTemplate", "metadata", "labels"), template.StatefulSetTemplate.Labels, "`selector` does not match template `labels`"))
}
allErrs = append(allErrs, validateStatefulSet(template.StatefulSetTemplate, fldPath.Child("statefulSetTemplate"))...)
template := template.StatefulSetTemplate.Spec.Template
coreTemplate, err := convertor.ConvertPodTemplateSpec(&template)
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Root(), template, fmt.Sprintf("Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec failed: %v", err)))
return allErrs
}
allErrs = append(allErrs, appsvalidation.ValidatePodTemplateSpecForStatefulSet(coreTemplate, selector, fldPath.Child("statefulSetTemplate", "spec", "template"), webhookutil.DefaultPodValidationOptions)...)
} else if template.AdvancedStatefulSetTemplate != nil {
labels := labels.Set(template.AdvancedStatefulSetTemplate.Labels)
if !selector.Matches(labels) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("statefulSetTemplate", "metadata", "labels"), template.AdvancedStatefulSetTemplate.Labels, "`selector` does not match template `labels`"))
}
allErrs = append(allErrs, validateAdvancedStatefulSet(template.AdvancedStatefulSetTemplate, fldPath.Child("advancedStatefulSetTemplate"))...)
template := template.AdvancedStatefulSetTemplate.Spec.Template
coreTemplate, err := convertor.ConvertPodTemplateSpec(&template)
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Root(), template, fmt.Sprintf("Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec failed: %v", err)))
return allErrs
}
allErrs = append(allErrs, appsvalidation.ValidatePodTemplateSpecForStatefulSet(coreTemplate, selector, fldPath.Child("advancedStatefulSetTemplate", "spec", "template"), webhookutil.DefaultPodValidationOptions)...)
} else if template.DeploymentTemplate != nil {
labels := labels.Set(template.DeploymentTemplate.Labels)
if !selector.Matches(labels) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("deploymentTemplate", "metadata", "labels"), template.DeploymentTemplate.Labels, "`selector` does not match template `labels`"))
}
allErrs = append(allErrs, validateDeployment(template.DeploymentTemplate, fldPath.Child("deploymentTemplate"))...)
template := template.DeploymentTemplate.Spec.Template
coreTemplate, err := convertor.ConvertPodTemplateSpec(&template)
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Root(), template, fmt.Sprintf("Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec failed: %v", err)))
return allErrs
}
allErrs = append(allErrs, appsvalidation.ValidatePodTemplateSpecForReplicaSet(coreTemplate, nil, selector, 0, fldPath.Child("deploymentTemplate", "spec", "template"), webhookutil.DefaultPodValidationOptions)...)
}
return allErrs
}
func validateStatefulSet(statefulSet *appsv1alpha1.StatefulSetTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if statefulSet.Spec.Replicas != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("spec", "replicas"), *statefulSet.Spec.Replicas, "replicas in statefulSetTemplate will not be used"))
}
if statefulSet.Spec.UpdateStrategy.Type == appsv1.RollingUpdateStatefulSetStrategyType &&
statefulSet.Spec.UpdateStrategy.RollingUpdate != nil &&
statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("spec", "updateStrategy", "rollingUpdate", "partition"), *statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition, "partition in statefulSetTemplate will not be used"))
}
return allErrs
}
func validateAdvancedStatefulSet(statefulSet *appsv1alpha1.AdvancedStatefulSetTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if statefulSet.Spec.Replicas != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("spec", "replicas"), *statefulSet.Spec.Replicas, "replicas in advancedStatefulSetTemplate will not be used"))
}
if statefulSet.Spec.UpdateStrategy.Type == appsv1.RollingUpdateStatefulSetStrategyType &&
statefulSet.Spec.UpdateStrategy.RollingUpdate != nil &&
statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("spec", "updateStrategy", "rollingUpdate", "partition"), *statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition, "partition in advancedStatefulSetTemplate will not be used"))
}
return allErrs
}
func validateDeployment(deployment *appsv1alpha1.DeploymentTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if deployment.Spec.Replicas != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("spec", "replicas"), *deployment.Spec.Replicas, "replicas in deploymentTemplate will not be used"))
}
return allErrs
}
func validateStatefulSetUpdate(statefulSet, oldStatefulSet *appsv1alpha1.StatefulSetTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
restoreReplicas := statefulSet.Spec.Replicas
statefulSet.Spec.Replicas = oldStatefulSet.Spec.Replicas
restoreTemplate := statefulSet.Spec.Template
statefulSet.Spec.Template = oldStatefulSet.Spec.Template
restoreStrategy := statefulSet.Spec.UpdateStrategy
statefulSet.Spec.UpdateStrategy = oldStatefulSet.Spec.UpdateStrategy
if !apiequality.Semantic.DeepEqual(statefulSet.Spec, oldStatefulSet.Spec) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("spec"), "updates to statefulsetTemplate spec for fields other than 'template', and 'updateStrategy' are forbidden"))
}
statefulSet.Spec.Replicas = restoreReplicas
statefulSet.Spec.Template = restoreTemplate
statefulSet.Spec.UpdateStrategy = restoreStrategy
if statefulSet.Spec.Replicas != nil {
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(*statefulSet.Spec.Replicas), fldPath.Child("spec", "replicas"))...)
}
return allErrs
}
func validateAdvancedStatefulSetUpdate(statefulSet, oldStatefulSet *appsv1alpha1.AdvancedStatefulSetTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
restoreReplicas := statefulSet.Spec.Replicas
statefulSet.Spec.Replicas = oldStatefulSet.Spec.Replicas
restoreTemplate := statefulSet.Spec.Template
statefulSet.Spec.Template = oldStatefulSet.Spec.Template
restoreStrategy := statefulSet.Spec.UpdateStrategy
statefulSet.Spec.UpdateStrategy = oldStatefulSet.Spec.UpdateStrategy
if !apiequality.Semantic.DeepEqual(statefulSet.Spec, oldStatefulSet.Spec) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("spec"), "updates to advancedStatefulsetTemplate spec for fields other than 'template', and 'updateStrategy' are forbidden"))
}
statefulSet.Spec.Replicas = restoreReplicas
statefulSet.Spec.Template = restoreTemplate
statefulSet.Spec.UpdateStrategy = restoreStrategy
if statefulSet.Spec.Replicas != nil {
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(*statefulSet.Spec.Replicas), fldPath.Child("spec", "replicas"))...)
}
return allErrs
}
func validateDeploymentUpdate(deployment, oldDeployment *appsv1alpha1.DeploymentTemplateSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if deployment.Spec.Replicas != nil {
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(*deployment.Spec.Replicas), fldPath.Child("spec", "replicas"))...)
}
return allErrs
}
/*
Copyright 2019 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/webhook/types"
)
// +kubebuilder:webhook:path=/validate-apps-kruise-io-v1alpha1-uniteddeployment,mutating=false,failurePolicy=fail,sideEffects=None,admissionReviewVersions=v1;v1beta1,groups=apps.kruise.io,resources=uniteddeployments,verbs=create;update;delete,versions=v1alpha1,name=vuniteddeployment.kb.io
var (
// HandlerGetterMap contains admission webhook handlers
HandlerGetterMap = map[string]types.HandlerGetter{
"validate-apps-kruise-io-v1alpha1-uniteddeployment": func(mgr manager.Manager) admission.Handler {
return &UnitedDeploymentCreateUpdateHandler{Decoder: admission.NewDecoder(mgr.GetScheme())}
},
}
)
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
"github.com/openkruise/kruise/pkg/webhook/types"
)
// +kubebuilder:webhook:path=/validate-apps-kruise-io-v1alpha1-workloadspread,mutating=false,failurePolicy=fail,sideEffects=None,admissionReviewVersions=v1;v1beta1,groups=apps.kruise.io,resources=workloadspreads,verbs=create;update,versions=v1alpha1,name=vworkloadspread.kb.io
var (
// HandlerGetterMap contains admission webhook handlers
HandlerGetterMap = map[string]types.HandlerGetter{
"validate-apps-kruise-io-v1alpha1-workloadspread": func(mgr manager.Manager) admission.Handler {
return &WorkloadSpreadCreateUpdateHandler{
Client: mgr.GetClient(),
Decoder: admission.NewDecoder(mgr.GetScheme()),
}
},
}
)
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"context"
"fmt"
"net/http"
admissionv1 "k8s.io/api/admission/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
)
// WorkloadSpreadCreateUpdateHandler handles WorkloadSpread
type WorkloadSpreadCreateUpdateHandler struct {
// To use the client, you need to do the following:
// - uncomment it
// - import sigs.k8s.io/controller-runtime/pkg/client
// - uncomment the InjectClient method at the bottom of this file.
Client client.Client
// Decoder decodes objects
Decoder admission.Decoder
}
var _ admission.Handler = &WorkloadSpreadCreateUpdateHandler{}
// Handle handles admission requests.
func (h *WorkloadSpreadCreateUpdateHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
obj := &appsv1alpha1.WorkloadSpread{}
oldObj := &appsv1alpha1.WorkloadSpread{}
if !utilfeature.DefaultFeatureGate.Enabled(features.WorkloadSpread) {
return admission.Errored(http.StatusForbidden, fmt.Errorf("feature-gate %s is not enabled", features.WorkloadSpread))
}
switch req.AdmissionRequest.Operation {
case admissionv1.Create:
if err := h.Decoder.Decode(req, obj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
if allErrs := h.validatingWorkloadSpreadFn(obj); len(allErrs) > 0 {
return admission.Errored(http.StatusBadRequest, allErrs.ToAggregate())
}
case admissionv1.Update:
if err := h.Decoder.Decode(req, obj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
if err := h.Decoder.DecodeRaw(req.AdmissionRequest.OldObject, oldObj); err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
validationErrorList := h.validatingWorkloadSpreadFn(obj)
updateErrorList := validateWorkloadSpreadUpdate(obj, oldObj)
if allErrs := append(validationErrorList, updateErrorList...); len(allErrs) > 0 {
return admission.Errored(http.StatusBadRequest, allErrs.ToAggregate())
}
}
return admission.ValidationResponse(true, "")
}
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validating
import (
"context"
"encoding/json"
"fmt"
"math"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/strategicpatch"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/apis/core"
corev1 "k8s.io/kubernetes/pkg/apis/core/v1"
corevalidation "k8s.io/kubernetes/pkg/apis/core/validation"
"sigs.k8s.io/controller-runtime/pkg/client"
webhookutil "github.com/openkruise/kruise/pkg/webhook/util"
"github.com/openkruise/kruise/pkg/webhook/util/convertor"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
appsvbeta1 "github.com/openkruise/kruise/apis/apps/v1beta1"
"github.com/openkruise/kruise/pkg/util/configuration"
)
const (
MaxScheduledFailedDuration = 300 * time.Second
)
var (
controllerKruiseKindCS = appsv1alpha1.SchemeGroupVersion.WithKind("CloneSet")
controllerKindSts = appsv1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKindRS = appsv1.SchemeGroupVersion.WithKind("ReplicaSet")
controllerKindDep = appsv1.SchemeGroupVersion.WithKind("Deployment")
controllerKindJob = batchv1.SchemeGroupVersion.WithKind("Job")
controllerKruiseKindBetaSts = appsvbeta1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKruiseKindAlphaSts = appsv1alpha1.SchemeGroupVersion.WithKind("StatefulSet")
)
func verifyGroupKind(ref *appsv1alpha1.TargetReference, expectedKind string, expectedGroups []string) (bool, error) {
gv, err := schema.ParseGroupVersion(ref.APIVersion)
if err != nil {
klog.ErrorS(err, "failed to parse GroupVersion for apiVersion", "apiVersion", ref.APIVersion)
return false, err
}
if ref.Kind != expectedKind {
return false, nil
}
for _, group := range expectedGroups {
if group == gv.Group {
return true, nil
}
}
return false, nil
}
func (h *WorkloadSpreadCreateUpdateHandler) validatingWorkloadSpreadFn(obj *appsv1alpha1.WorkloadSpread) field.ErrorList {
// validate ws.spec.
allErrs := validateWorkloadSpreadSpec(h, obj, field.NewPath("spec"))
// validate whether ws.spec.targetRef is in conflict with others.
wsList := &appsv1alpha1.WorkloadSpreadList{}
if err := h.Client.List(context.TODO(), wsList, &client.ListOptions{Namespace: obj.Namespace}); err != nil {
allErrs = append(allErrs, field.InternalError(field.NewPath(""), fmt.Errorf("query other WorkloadSpread failed, err: %v", err)))
} else {
allErrs = append(allErrs, validateWorkloadSpreadConflict(obj, wsList.Items, field.NewPath("spec"))...)
}
return allErrs
}
func validateWorkloadSpreadSpec(h *WorkloadSpreadCreateUpdateHandler, obj *appsv1alpha1.WorkloadSpread, fldPath *field.Path) field.ErrorList {
spec := &obj.Spec
allErrs := field.ErrorList{}
var workloadTemplate client.Object
// validate targetRef
if spec.TargetReference == nil {
allErrs = append(allErrs, field.Required(fldPath.Child("targetRef"), "no targetRef defined in WorkloadSpread"))
} else {
if spec.TargetReference.APIVersion == "" || spec.TargetReference.Name == "" || spec.TargetReference.Kind == "" {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "empty TargetReference is not valid for WorkloadSpread."))
} else {
switch spec.TargetReference.Kind {
case controllerKruiseKindCS.Kind:
ok, err := verifyGroupKind(spec.TargetReference, controllerKruiseKindCS.Kind, []string{controllerKruiseKindCS.Group})
if !ok || err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference is not valid for CloneSet."))
} else {
set := &appsv1alpha1.CloneSet{}
if getErr := h.Client.Get(context.TODO(), client.ObjectKey{Name: spec.TargetReference.Name, Namespace: obj.Namespace}, set); getErr == nil {
workloadTemplate = set
}
}
case controllerKindDep.Kind:
ok, err := verifyGroupKind(spec.TargetReference, controllerKindDep.Kind, []string{controllerKindDep.Group})
if !ok || err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference is not valid for Deployment."))
} else {
set := &appsv1.Deployment{}
if getErr := h.Client.Get(context.TODO(), client.ObjectKey{Name: spec.TargetReference.Name, Namespace: obj.Namespace}, set); getErr == nil {
workloadTemplate = set
}
}
case controllerKindRS.Kind:
ok, err := verifyGroupKind(spec.TargetReference, controllerKindRS.Kind, []string{controllerKindRS.Group})
if !ok || err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference is not valid for ReplicaSet."))
} else {
set := &appsv1.ReplicaSet{}
if getErr := h.Client.Get(context.TODO(), client.ObjectKey{Name: spec.TargetReference.Name, Namespace: obj.Namespace}, set); getErr == nil {
workloadTemplate = set
}
}
case controllerKindJob.Kind:
ok, err := verifyGroupKind(spec.TargetReference, controllerKindJob.Kind, []string{controllerKindJob.Group})
if !ok || err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference is not valid for Job."))
} else {
set := &batchv1.Job{}
if getErr := h.Client.Get(context.TODO(), client.ObjectKey{Name: spec.TargetReference.Name, Namespace: obj.Namespace}, set); getErr == nil {
workloadTemplate = set
}
}
case controllerKindSts.Kind:
ok, err := verifyGroupKind(spec.TargetReference, controllerKindSts.Kind, []string{controllerKindSts.Group, controllerKruiseKindAlphaSts.Group, controllerKruiseKindBetaSts.Group})
if !ok || err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference is not valid for StatefulSet."))
} else {
set := &appsv1.StatefulSet{}
if getErr := h.Client.Get(context.TODO(), client.ObjectKey{Name: spec.TargetReference.Name, Namespace: obj.Namespace}, set); getErr == nil {
workloadTemplate = set
}
}
default:
whiteList, err := configuration.GetWSWatchCustomWorkloadWhiteList(h.Client)
if err != nil {
allErrs = append(allErrs, field.InternalError(fldPath.Child("targetRef"), err))
break
}
matched := false
for _, wl := range whiteList.Workloads {
if ok, _ := verifyGroupKind(spec.TargetReference, wl.Kind, []string{wl.Group}); ok {
matched = true
break
}
}
if !matched {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), spec.TargetReference, "TargetReference's GroupKind is not permitted."))
}
}
}
}
// validate subsets
allErrs = append(allErrs, validateWorkloadSpreadSubsets(obj, spec.Subsets, workloadTemplate, fldPath.Child("subsets"))...)
// validate scheduleStrategy
if spec.ScheduleStrategy.Type != "" &&
spec.ScheduleStrategy.Type != appsv1alpha1.FixedWorkloadSpreadScheduleStrategyType &&
spec.ScheduleStrategy.Type != appsv1alpha1.AdaptiveWorkloadSpreadScheduleStrategyType {
allErrs = append(allErrs, field.Invalid(fldPath.Child("scheduleStrategy").Child("type"),
spec.ScheduleStrategy.Type, "ScheduleStrategy's type is not valid"))
}
if spec.ScheduleStrategy.Adaptive != nil {
if spec.ScheduleStrategy.Type != appsv1alpha1.AdaptiveWorkloadSpreadScheduleStrategyType {
allErrs = append(allErrs, field.Invalid(fldPath.Child("scheduleStrategy").Child("type"),
spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds, "the scheduleStrategy's type must be adaptive when using adaptive scheduleStrategy"))
}
if len(spec.Subsets) > 1 && spec.Subsets[len(spec.Subsets)-1].MaxReplicas != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("scheduleStrategy").Child("adaptive"),
spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds, "the last subset's maxReplicas must be not specified when using adaptive scheduleStrategy"))
}
allowedMaxSeconds := int32(math.MaxInt32)
if len(spec.Subsets) > 1 {
// This constraint is to avoid the scene where a pod is re-scheduled among unschedulable subsets over and over again.
// MaxScheduledFailedDurationSeconds is the maximum safe value in theory.
// Deducting 5 is out of the consideration of reconcile cost, etc.
allowedMaxSeconds = int32(MaxScheduledFailedDuration.Seconds()-5) / int32(len(spec.Subsets)-1)
}
if spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds != nil &&
(*spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds < 0 || *spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds > allowedMaxSeconds) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("scheduleStrategy").Child("adaptive").Child("rescheduleCriticalSeconds"),
spec.ScheduleStrategy.Adaptive.RescheduleCriticalSeconds, fmt.Sprintf("rescheduleCriticalSeconds < 0 or rescheduleCriticalSeconds > %d is not permitted", allowedMaxSeconds)))
}
}
// validate targetFilter
if spec.TargetFilter != nil {
if _, err := metav1.LabelSelectorAsSelector(spec.TargetFilter.Selector); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetFilter"), spec.TargetFilter, err.Error()))
}
}
return allErrs
}
func validateWorkloadSpreadSubsets(ws *appsv1alpha1.WorkloadSpread, subsets []appsv1alpha1.WorkloadSpreadSubset, workloadTemplate client.Object, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
//if len(subsets) < 2 {
// allErrs = append(allErrs, field.Required(fldPath, "subsets number must >= 2 in WorkloadSpread"))
// return allErrs
//}
if len(subsets) == 0 {
allErrs = append(allErrs, field.Required(fldPath, "subsets number must >= 1 in WorkloadSpread"))
return allErrs
}
subSetNames := sets.String{}
maxReplicasSum := 0
var firstMaxReplicasType *intstr.Type
for i, subset := range subsets {
subsetName := subset.Name
if subsetName == "" {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), subsetName, ""))
} else {
if subSetNames.Has(subsetName) {
// Name should be unique between all of the subsets under one WorkloadSpread.
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), subsetName, fmt.Sprintf("duplicated subset name %s", subsetName)))
}
subSetNames.Insert(subsetName)
}
// at least one of requiredNodeSelectorTerm, preferredNodeSelectorTerms, tolerations.
//if subset.RequiredNodeSelectorTerm == nil && subset.PreferredNodeSelectorTerms == nil && subset.Tolerations == nil {
// allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("requiredNodeSelectorTerm"), subset.RequiredNodeSelectorTerm, "The requiredNodeSelectorTerm, preferredNodeSelectorTerms and tolerations are empty that is not valid for WorkloadSpread"))
//} else {
if subset.RequiredNodeSelectorTerm != nil {
coreNodeSelectorTerm := &core.NodeSelectorTerm{}
if err := corev1.Convert_v1_NodeSelectorTerm_To_core_NodeSelectorTerm(subset.RequiredNodeSelectorTerm.DeepCopy(), coreNodeSelectorTerm, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("requiredNodeSelectorTerm"), subset.RequiredNodeSelectorTerm, fmt.Sprintf("Convert_v1_NodeSelectorTerm_To_core_NodeSelectorTerm failed: %v", err)))
} else {
allErrs = append(allErrs, corevalidation.ValidateNodeSelectorTerm(*coreNodeSelectorTerm, fldPath.Index(i).Child("requiredNodeSelectorTerm"))...)
}
}
if subset.PreferredNodeSelectorTerms != nil {
corePreferredSchedulingTerms := make([]core.PreferredSchedulingTerm, 0, len(subset.PreferredNodeSelectorTerms))
for i, term := range subset.PreferredNodeSelectorTerms {
corePreferredSchedulingTerm := &core.PreferredSchedulingTerm{}
if err := corev1.Convert_v1_PreferredSchedulingTerm_To_core_PreferredSchedulingTerm(term.DeepCopy(), corePreferredSchedulingTerm, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("preferredSchedulingTerms"), subset.PreferredNodeSelectorTerms, fmt.Sprintf("Convert_v1_PreferredSchedulingTerm_To_core_PreferredSchedulingTerm failed: %v", err)))
} else {
corePreferredSchedulingTerms = append(corePreferredSchedulingTerms, *corePreferredSchedulingTerm)
}
}
allErrs = append(allErrs, corevalidation.ValidatePreferredSchedulingTerms(corePreferredSchedulingTerms, fldPath.Index(i).Child("preferredSchedulingTerms"))...)
}
//}
if subset.Tolerations != nil {
var coreTolerations []core.Toleration
for i, toleration := range subset.Tolerations {
coreToleration := &core.Toleration{}
if err := corev1.Convert_v1_Toleration_To_core_Toleration(&toleration, coreToleration, nil); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("tolerations"), subset.Tolerations, fmt.Sprintf("Convert_v1_Toleration_To_core_Toleration failed: %v", err)))
} else {
coreTolerations = append(coreTolerations, *coreToleration)
}
}
allErrs = append(allErrs, corevalidation.ValidateTolerations(coreTolerations, fldPath.Index(i).Child("tolerations"))...)
}
if subset.Patch.Raw != nil {
// In the case the WorkloadSpread is created before the workload,so no workloadTemplate is obtained, skip the remaining checks.
if workloadTemplate != nil {
// get the PodTemplateSpec from the workload
var podSpec v1.PodTemplateSpec
switch workloadTemplate.GetObjectKind().GroupVersionKind() {
case controllerKruiseKindCS:
cs := workloadTemplate.(*appsv1alpha1.CloneSet)
podSpec = withVolumeClaimTemplates(cs.Spec.Template, cs.Spec.VolumeClaimTemplates)
case controllerKindDep:
podSpec = workloadTemplate.(*appsv1.Deployment).Spec.Template
case controllerKindRS:
podSpec = workloadTemplate.(*appsv1.ReplicaSet).Spec.Template
case controllerKindJob:
podSpec = workloadTemplate.(*batchv1.Job).Spec.Template
case controllerKindSts:
sts := workloadTemplate.(*appsv1.StatefulSet)
podSpec = withVolumeClaimTemplates(sts.Spec.Template, sts.Spec.VolumeClaimTemplates)
}
podBytes, _ := json.Marshal(podSpec)
modified, err := strategicpatch.StrategicMergePatch(podBytes, subset.Patch.Raw, &v1.Pod{})
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("patch"), string(subset.Patch.Raw), fmt.Sprintf("failed to merge patch: %v", err)))
}
newPod := &v1.Pod{}
if err = json.Unmarshal(modified, newPod); err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("patch"), string(subset.Patch.Raw), fmt.Sprintf("failed to unmarshal: %v", err)))
}
coreNewPod, CovErr := convertor.ConvertPod(newPod)
if CovErr != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("patch"), newPod, fmt.Sprintf("Convert_v1_Pod_To_core_Pod failed: %v", err)))
}
allErrs = append(allErrs, corevalidation.ValidatePodSpec(&coreNewPod.Spec, &coreNewPod.ObjectMeta, fldPath.Index(i).Child("patch"), webhookutil.DefaultPodValidationOptions)...)
}
}
//1. All subset maxReplicas must be the same type: int or percent.
//2. Adaptive: the last subset must be not specified.
//3. If all maxReplicas is specified as percent, the total maxReplicas must equal 1, except the last subset is not specified.
if subset.MaxReplicas != nil {
if firstMaxReplicasType == nil {
firstMaxReplicasType = &subset.MaxReplicas.Type
} else if subset.MaxReplicas.Type != *firstMaxReplicasType {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("maxReplicas"), subset.MaxReplicas, "the maxReplicas type of all subsets must be the same"))
return allErrs
}
if ws.Spec.TargetReference != nil && ws.Spec.TargetReference.Kind == controllerKindSts.Kind && subset.MaxReplicas.Type != intstr.Int {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("maxReplicas"), subset.MaxReplicas, "the maxReplicas type must be Int for StatefulSet"))
return allErrs
}
subsetMaxReplicas, err := intstr.GetValueFromIntOrPercent(subset.MaxReplicas, 100, true)
if err != nil || subsetMaxReplicas < 0 {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("maxReplicas"), subset.MaxReplicas, "maxReplicas is not valid for subset"))
return allErrs
}
if subset.MaxReplicas.Type == intstr.String {
maxReplicasSum += subsetMaxReplicas
if maxReplicasSum > 100 {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("maxReplicas"), subset.MaxReplicas, "the sum of all subset's maxReplicas exceeds 100% is no permitted"))
return allErrs
}
}
}
}
if firstMaxReplicasType != nil && *firstMaxReplicasType == intstr.String && maxReplicasSum < 100 && subsets[len(subsets)-1].MaxReplicas != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(0).Child("maxReplicas"), subsets[0].MaxReplicas, "maxReplicas sum of all subsets must equal 100% when type is specified as percent"))
}
return allErrs
}
func withVolumeClaimTemplates(pod v1.PodTemplateSpec, claims []v1.PersistentVolumeClaim) v1.PodTemplateSpec {
for _, pvc := range claims {
pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{
Name: pvc.Name,
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc.Name,
},
},
})
}
return pod
}
func validateWorkloadSpreadConflict(ws *appsv1alpha1.WorkloadSpread, others []appsv1alpha1.WorkloadSpread, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
for _, other := range others {
if other.Name == ws.Name {
continue
}
// TargetReference cannot be managed by multiple ws
if ws.Spec.TargetReference != nil && other.Spec.TargetReference != nil {
targetRef1 := ws.Spec.TargetReference
targetRef2 := other.Spec.TargetReference
gv1, _ := schema.ParseGroupVersion(targetRef1.APIVersion)
gv2, _ := schema.ParseGroupVersion(targetRef2.APIVersion)
if gv1.Group == gv2.Group && targetRef1.Kind == targetRef2.Kind && targetRef1.Name == targetRef2.Name {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), ws.Spec.TargetReference, fmt.Sprintf(
"ws.spec.targetRef is in conflict with other WorkloadSpread %s", other.Name)))
return allErrs
}
}
}
return allErrs
}
func validateWorkloadSpreadUpdate(new, old *appsv1alpha1.WorkloadSpread) field.ErrorList {
// validate metadata
allErrs := corevalidation.ValidateObjectMetaUpdate(&new.ObjectMeta, &old.ObjectMeta, field.NewPath("metadata"))
// validate targetRef
allErrs = append(allErrs, validateWorkloadSpreadTargetRefUpdate(new.Spec.TargetReference, old.Spec.TargetReference, field.NewPath("spec"))...)
return allErrs
}
func validateWorkloadSpreadTargetRefUpdate(targetRef, oldTargetRef *appsv1alpha1.TargetReference, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if targetRef != nil && oldTargetRef != nil {
gv1, _ := schema.ParseGroupVersion(targetRef.APIVersion)
gv2, _ := schema.ParseGroupVersion(oldTargetRef.APIVersion)
if gv1.Group != gv2.Group || targetRef.Kind != oldTargetRef.Kind || targetRef.Name != oldTargetRef.Name {
allErrs = append(allErrs, field.Invalid(fldPath.Child("targetRef"), targetRef, "change TargetReference is not permitted for WorkloadSpread"))
}
}
return allErrs
}