/* Copyright 2014 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package cert import ( "crypto" "crypto/ecdsa" "crypto/elliptic" cryptorand "crypto/rand" "crypto/rsa" "crypto/x509" "crypto/x509/pkix" "errors" "math" "math/big" mathrand "math/rand" "net" "time" ) const ( rsaKeySize = 2048 ) // Config contains the basic fields required for creating a certificate type Config struct { CommonName string Organization []string AltNames AltNames Usages []x509.ExtKeyUsage NotBefore, NotAfter *time.Time } // AltNames contains the domain names and IP addresses that will be added // to the API Server's x509 certificate SubAltNames field. The values will // be passed directly to the x509.Certificate object. type AltNames struct { DNSNames []string IPs []net.IP } // NewRSAPrivateKey creates an RSA private key func NewRSAPrivateKey() (*rsa.PrivateKey, error) { return rsa.GenerateKey(cryptorand.Reader, rsaKeySize) } // NewECDSAPrivateKey creates an ECDSA private key func NewECDSAPrivateKey() (*ecdsa.PrivateKey, error) { return ecdsa.GenerateKey(elliptic.P256(), cryptorand.Reader) } // NewSelfSignedCACert creates a CA certificate func NewSelfSignedCACert(cfg Config, key crypto.Signer, duration time.Duration) (*x509.Certificate, error) { now := time.Now() tmpl := x509.Certificate{ SerialNumber: new(big.Int).SetInt64(randomSerialNumber()), Subject: pkix.Name{ CommonName: cfg.CommonName, Organization: cfg.Organization, }, NotBefore: now.UTC(), NotAfter: now.Add(duration).UTC(), KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, BasicConstraintsValid: true, IsCA: true, DNSNames: cfg.AltNames.DNSNames, } if cfg.NotBefore != nil { tmpl.NotBefore = *cfg.NotBefore } if cfg.NotAfter != nil { tmpl.NotAfter = *cfg.NotAfter } certDERBytes, err := x509.CreateCertificate(cryptorand.Reader, &tmpl, &tmpl, key.Public(), key) if err != nil { return nil, err } return x509.ParseCertificate(certDERBytes) } // NewSignedCert creates a signed certificate using the given CA certificate and key func NewSignedCert(cfg Config, key crypto.Signer, caCert *x509.Certificate, caKey crypto.Signer, duration time.Duration) (*x509.Certificate, error) { serial, err := cryptorand.Int(cryptorand.Reader, new(big.Int).SetInt64(math.MaxInt64)) if err != nil { return nil, err } if len(cfg.CommonName) == 0 { return nil, errors.New("must specify a CommonName") } if len(cfg.Usages) == 0 { return nil, errors.New("must specify at least one ExtKeyUsage") } certTmpl := x509.Certificate{ Subject: pkix.Name{ CommonName: cfg.CommonName, Organization: cfg.Organization, }, DNSNames: cfg.AltNames.DNSNames, IPAddresses: cfg.AltNames.IPs, SerialNumber: serial, NotBefore: caCert.NotBefore, NotAfter: time.Now().Add(duration).UTC(), KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, ExtKeyUsage: cfg.Usages, } if cfg.NotBefore != nil { certTmpl.NotBefore = *cfg.NotBefore } if cfg.NotAfter != nil { certTmpl.NotAfter = *cfg.NotAfter } certDERBytes, err := x509.CreateCertificate(cryptorand.Reader, &certTmpl, caCert, key.Public(), caKey) if err != nil { return nil, err } return x509.ParseCertificate(certDERBytes) } // randomSerialNumber returns a random int64 serial number based on // time.Now. It is defined separately from the generator interface so // that the caller doesn't have to worry about an input template or // error - these are unnecessary when creating a random serial. func randomSerialNumber() int64 { r := mathrand.New(mathrand.NewSource(time.Now().UTC().UnixNano())) return r.Int63() }
/* Copyright 2014 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package cert import ( "crypto" "crypto/ecdsa" "crypto/rsa" "crypto/x509" "encoding/pem" "errors" "fmt" ) const ( // ECPrivateKeyBlockType is a possible value for pem.Block.Type. ECPrivateKeyBlockType = "EC PRIVATE KEY" // RSAPrivateKeyBlockType is a possible value for pem.Block.Type. RSAPrivateKeyBlockType = "RSA PRIVATE KEY" // PrivateKeyBlockType is a possible value for pem.Block.Type. PrivateKeyBlockType = "PRIVATE KEY" // PublicKeyBlockType is a possible value for pem.Block.Type. PublicKeyBlockType = "PUBLIC KEY" // CertificateBlockType is a possible value for pem.Block.Type. CertificateBlockType = "CERTIFICATE" // CertificateRequestBlockType is a possible value for pem.Block.Type. CertificateRequestBlockType = "CERTIFICATE REQUEST" ) // EncodePublicKeyPEM returns PEM-encoded public data func EncodePublicKeyPEM(key *rsa.PublicKey) ([]byte, error) { der, err := x509.MarshalPKIXPublicKey(key) if err != nil { return []byte{}, err } block := pem.Block{ Type: PublicKeyBlockType, Bytes: der, } return pem.EncodeToMemory(&block), nil } // EncodePrivateKeyPEM returns PEM-encoded private key data func EncodePrivateKeyPEM(key crypto.PrivateKey) []byte { switch t := key.(type) { case *ecdsa.PrivateKey: derBytes, err := x509.MarshalECPrivateKey(t) if err != nil { return nil } block := &pem.Block{ Type: ECPrivateKeyBlockType, Bytes: derBytes, } return pem.EncodeToMemory(block) case *rsa.PrivateKey: block := &pem.Block{ Type: RSAPrivateKeyBlockType, Bytes: x509.MarshalPKCS1PrivateKey(t), } return pem.EncodeToMemory(block) default: return nil } } // EncodeCertPEM returns PEM-endcoded certificate data func EncodeCertPEM(cert *x509.Certificate) []byte { block := pem.Block{ Type: CertificateBlockType, Bytes: cert.Raw, } return pem.EncodeToMemory(&block) } // ParsePrivateKeyPEM returns a private key parsed from a PEM block in the supplied data. // Recognizes PEM blocks for "EC PRIVATE KEY", "RSA PRIVATE KEY", or "PRIVATE KEY" func ParsePrivateKeyPEM(keyData []byte) (interface{}, error) { var privateKeyPemBlock *pem.Block for { privateKeyPemBlock, keyData = pem.Decode(keyData) if privateKeyPemBlock == nil { break } switch privateKeyPemBlock.Type { case ECPrivateKeyBlockType: // ECDSA Private Key in ASN.1 format if key, err := x509.ParseECPrivateKey(privateKeyPemBlock.Bytes); err == nil { return key, nil } case RSAPrivateKeyBlockType: // RSA Private Key in PKCS#1 format if key, err := x509.ParsePKCS1PrivateKey(privateKeyPemBlock.Bytes); err == nil { return key, nil } case PrivateKeyBlockType: // RSA or ECDSA Private Key in unencrypted PKCS#8 format if key, err := x509.ParsePKCS8PrivateKey(privateKeyPemBlock.Bytes); err == nil { return key, nil } } // tolerate non-key PEM blocks for compatibility with things like "EC PARAMETERS" blocks // originally, only the first PEM block was parsed and expected to be a key block } // we read all the PEM blocks and didn't recognize one return nil, fmt.Errorf("data does not contain a valid RSA or ECDSA private key") } // ParsePublicKeysPEM is a helper function for reading an array of rsa.PublicKey or ecdsa.PublicKey from a PEM-encoded byte array. // Reads public keys from both public and private key files. func ParsePublicKeysPEM(keyData []byte) ([]interface{}, error) { var block *pem.Block keys := []interface{}{} for { // read the next block block, keyData = pem.Decode(keyData) if block == nil { break } // test block against parsing functions if privateKey, err := parseRSAPrivateKey(block.Bytes); err == nil { keys = append(keys, &privateKey.PublicKey) continue } if publicKey, err := parseRSAPublicKey(block.Bytes); err == nil { keys = append(keys, publicKey) continue } if privateKey, err := parseECPrivateKey(block.Bytes); err == nil { keys = append(keys, &privateKey.PublicKey) continue } if publicKey, err := parseECPublicKey(block.Bytes); err == nil { keys = append(keys, publicKey) continue } // tolerate non-key PEM blocks for backwards compatibility // originally, only the first PEM block was parsed and expected to be a key block } if len(keys) == 0 { return nil, fmt.Errorf("data does not contain any valid RSA or ECDSA public keys") } return keys, nil } // ParseCertsPEM returns the x509.Certificates contained in the given PEM-encoded byte array // Returns an error if a certificate could not be parsed, or if the data does not contain any certificates func ParseCertsPEM(pemCerts []byte) ([]*x509.Certificate, error) { ok := false certs := []*x509.Certificate{} for len(pemCerts) > 0 { var block *pem.Block block, pemCerts = pem.Decode(pemCerts) if block == nil { break } // Only use PEM "CERTIFICATE" blocks without extra headers if block.Type != CertificateBlockType || len(block.Headers) != 0 { continue } cert, err := x509.ParseCertificate(block.Bytes) if err != nil { return certs, err } certs = append(certs, cert) ok = true } if !ok { return certs, errors.New("data does not contain any valid RSA or ECDSA certificates") } return certs, nil } // parseRSAPublicKey parses a single RSA public key from the provided data func parseRSAPublicKey(data []byte) (*rsa.PublicKey, error) { var err error // Parse the key var parsedKey interface{} if parsedKey, err = x509.ParsePKIXPublicKey(data); err != nil { if cert, err := x509.ParseCertificate(data); err == nil { parsedKey = cert.PublicKey } else { return nil, err } } // Test if parsed key is an RSA Public Key var pubKey *rsa.PublicKey var ok bool if pubKey, ok = parsedKey.(*rsa.PublicKey); !ok { return nil, fmt.Errorf("data doesn't contain valid RSA Public Key") } return pubKey, nil } // parseRSAPrivateKey parses a single RSA private key from the provided data func parseRSAPrivateKey(data []byte) (*rsa.PrivateKey, error) { var err error // Parse the key var parsedKey interface{} if parsedKey, err = x509.ParsePKCS1PrivateKey(data); err != nil { if parsedKey, err = x509.ParsePKCS8PrivateKey(data); err != nil { return nil, err } } // Test if parsed key is an RSA Private Key var privKey *rsa.PrivateKey var ok bool if privKey, ok = parsedKey.(*rsa.PrivateKey); !ok { return nil, fmt.Errorf("data doesn't contain valid RSA Private Key") } return privKey, nil } // parseECPublicKey parses a single ECDSA public key from the provided data func parseECPublicKey(data []byte) (*ecdsa.PublicKey, error) { var err error // Parse the key var parsedKey interface{} if parsedKey, err = x509.ParsePKIXPublicKey(data); err != nil { if cert, err := x509.ParseCertificate(data); err == nil { parsedKey = cert.PublicKey } else { return nil, err } } // Test if parsed key is an ECDSA Public Key var pubKey *ecdsa.PublicKey var ok bool if pubKey, ok = parsedKey.(*ecdsa.PublicKey); !ok { return nil, fmt.Errorf("data doesn't contain valid ECDSA Public Key") } return pubKey, nil } // parseECPrivateKey parses a single ECDSA private key from the provided data func parseECPrivateKey(data []byte) (*ecdsa.PrivateKey, error) { var err error // Parse the key var parsedKey interface{} if parsedKey, err = x509.ParseECPrivateKey(data); err != nil { return nil, err } // Test if parsed key is an ECDSA Private Key var privKey *ecdsa.PrivateKey var ok bool if privKey, ok = parsedKey.(*ecdsa.PrivateKey); !ok { return nil, fmt.Errorf("data doesn't contain valid ECDSA Private Key") } return privKey, nil }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2018 Red Hat, Inc. * */ package mutating_webhook import ( "encoding/json" "net/http" admissionv1 "k8s.io/api/admission/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" webhookutils "kubevirt.io/kubevirt/pkg/util/webhooks" "kubevirt.io/kubevirt/pkg/virt-api/webhooks" "kubevirt.io/kubevirt/pkg/virt-api/webhooks/mutating-webhook/mutators" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" ) type mutator interface { Mutate(*admissionv1.AdmissionReview) *admissionv1.AdmissionResponse } func serve(resp http.ResponseWriter, req *http.Request, m mutator) { review, err := webhookutils.GetAdmissionReview(req) if err != nil { resp.WriteHeader(http.StatusBadRequest) return } response := admissionv1.AdmissionReview{ TypeMeta: metav1.TypeMeta{ APIVersion: admissionv1.SchemeGroupVersion.String(), Kind: "AdmissionReview", }, } reviewResponse := m.Mutate(review) if reviewResponse != nil { response.Response = reviewResponse response.Response.UID = review.Request.UID } // reset the Object and OldObject, they are not needed in a response. review.Request.Object = runtime.RawExtension{} review.Request.OldObject = runtime.RawExtension{} responseBytes, err := json.Marshal(response) if err != nil { log.Log.Reason(err).Errorf("failed json encode webhook response") resp.WriteHeader(http.StatusBadRequest) return } if _, err := resp.Write(responseBytes); err != nil { log.Log.Reason(err).Errorf("failed to write webhook response") resp.WriteHeader(http.StatusBadRequest) return } } func ServeVMs(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient) { serve(resp, req, mutators.NewVMsMutator(clusterConfig, virtCli)) } func ServeVMIs(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, informers *webhooks.Informers, kubeVirtServiceAccounts map[string]struct{}) { serve(resp, req, &mutators.VMIsMutator{ClusterConfig: clusterConfig, VMIPresetInformer: informers.VMIPresetInformer, KubeVirtServiceAccounts: kubeVirtServiceAccounts}) } func ServeMigrationCreate(resp http.ResponseWriter, req *http.Request) { serve(resp, req, &mutators.MigrationCreateMutator{}) } func ServeClones(resp http.ResponseWriter, req *http.Request) { serve(resp, req, mutators.NewCloneCreateMutator()) }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2018 Red Hat, Inc. * */ package validating_webhook import ( "net/http" "kubevirt.io/client-go/kubecli" storageAdmitters "kubevirt.io/kubevirt/pkg/storage/admitters" validating_webhooks "kubevirt.io/kubevirt/pkg/util/webhooks/validating-webhooks" "kubevirt.io/kubevirt/pkg/virt-api/webhooks" "kubevirt.io/kubevirt/pkg/virt-api/webhooks/validating-webhook/admitters" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" ) func ServeVMICreate( resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, kubeVirtServiceAccounts map[string]struct{}, specValidators ...admitters.SpecValidator, ) { validating_webhooks.Serve(resp, req, &admitters.VMICreateAdmitter{ ClusterConfig: clusterConfig, KubeVirtServiceAccounts: kubeVirtServiceAccounts, SpecValidators: specValidators, }) } func ServeVMIUpdate(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, kubeVirtServiceAccounts map[string]struct{}) { validating_webhooks.Serve(resp, req, admitters.NewVMIUpdateAdmitter(clusterConfig, kubeVirtServiceAccounts)) } func ServeVMs( resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient, informers *webhooks.Informers, kubeVirtServiceAccounts map[string]struct{}, ) { validating_webhooks.Serve(resp, req, admitters.NewVMsAdmitter(clusterConfig, virtCli, informers, kubeVirtServiceAccounts)) } func ServeVMIRS(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig) { validating_webhooks.Serve(resp, req, &admitters.VMIRSAdmitter{ClusterConfig: clusterConfig}) } func ServeVMPool(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, kubeVirtServiceAccounts map[string]struct{}) { validating_webhooks.Serve(resp, req, &admitters.VMPoolAdmitter{ClusterConfig: clusterConfig, KubeVirtServiceAccounts: kubeVirtServiceAccounts}) } func ServeVMIPreset(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.VMIPresetAdmitter{}) } func ServeMigrationCreate(resp http.ResponseWriter, req *http.Request, virtCli kubecli.KubevirtClient) { validating_webhooks.Serve(resp, req, admitters.NewMigrationCreateAdmitter(virtCli.GeneratedKubeVirtClient())) } func ServeMigrationUpdate(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.MigrationUpdateAdmitter{}) } func ServeVMSnapshots(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient) { validating_webhooks.Serve(resp, req, storageAdmitters.NewVMSnapshotAdmitter(clusterConfig, virtCli)) } func ServeVMRestores(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient, informers *webhooks.Informers) { validating_webhooks.Serve(resp, req, storageAdmitters.NewVMRestoreAdmitter(clusterConfig, virtCli, informers.VMRestoreInformer)) } func ServeVMExports(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig) { validating_webhooks.Serve(resp, req, storageAdmitters.NewVMExportAdmitter(clusterConfig)) } func ServeVmInstancetypes(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.InstancetypeAdmitter{}) } func ServeVmClusterInstancetypes(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.ClusterInstancetypeAdmitter{}) } func ServeVmPreferences(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.PreferenceAdmitter{}) } func ServeVmClusterPreferences(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, &admitters.ClusterPreferenceAdmitter{}) } func ServeStatusValidation(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient, informers *webhooks.Informers, kubeVirtServiceAccounts map[string]struct{}, ) { validating_webhooks.Serve(resp, req, &admitters.StatusAdmitter{ VmsAdmitter: admitters.NewVMsAdmitter(clusterConfig, virtCli, informers, kubeVirtServiceAccounts), }) } func ServePodEvictionInterceptor(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient) { validating_webhooks.Serve(resp, req, admitters.NewPodEvictionAdmitter(clusterConfig, virtCli, virtCli.GeneratedKubeVirtClient())) } func ServeMigrationPolicies(resp http.ResponseWriter, req *http.Request) { validating_webhooks.Serve(resp, req, admitters.NewMigrationPolicyAdmitter()) } func ServeVirtualMachineClones(resp http.ResponseWriter, req *http.Request, clusterConfig *virtconfig.ClusterConfig, virtCli kubecli.KubevirtClient) { validating_webhooks.Serve(resp, req, admitters.NewVMCloneAdmitter(clusterConfig, virtCli)) }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2022 Red Hat, Inc. * */ package clone import ( "context" "errors" "fmt" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" clone "kubevirt.io/api/clone/v1beta1" k6tv1 "kubevirt.io/api/core/v1" snapshotv1 "kubevirt.io/api/snapshot/v1beta1" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/pointer" backendstorage "kubevirt.io/kubevirt/pkg/storage/backend-storage" virtsnapshot "kubevirt.io/kubevirt/pkg/storage/snapshot" ) type cloneSourceType string const ( sourceTypeVM cloneSourceType = "VirtualMachine" sourceTypeSnapshot cloneSourceType = "VirtualMachineSnapshot" ) type cloneTargetType string const ( targetTypeVM cloneTargetType = "VirtualMachine" defaultType cloneTargetType = targetTypeVM ) type syncInfoType struct { err error snapshotName string snapshotReady bool restoreName string restoreReady bool targetVMName string targetVMCreated bool pvcBound bool event Event reason string isCloneFailing bool isClonePending bool } // vmCloneInfo stores the current vmclone information type vmCloneInfo struct { vmClone *clone.VirtualMachineClone sourceType cloneSourceType snapshot *snapshotv1.VirtualMachineSnapshot snapshotName string sourceVm *k6tv1.VirtualMachine } func (ctrl *VMCloneController) execute(key string) error { logger := log.Log obj, cloneExists, err := ctrl.vmCloneIndexer.GetByKey(key) if err != nil { return err } var vmClone *clone.VirtualMachineClone if cloneExists { vmClone = obj.(*clone.VirtualMachineClone) logger = logger.Object(vmClone) } else { return nil } if vmClone.Status.Phase == clone.Succeeded { _, vmExists, err := ctrl.vmStore.GetByKey(fmt.Sprintf("%s/%s", vmClone.Namespace, *vmClone.Status.TargetName)) if err != nil { return err } if !vmExists { if vmClone.DeletionTimestamp == nil { logger.V(3).Infof("Deleting vm clone for deleted vm %s/%s", vmClone.Namespace, *vmClone.Status.TargetName) return ctrl.client.VirtualMachineClone(vmClone.Namespace).Delete(context.Background(), vmClone.Name, v1.DeleteOptions{}) } // nothing to process for a vm clone that's being deleted return nil } } syncInfo, err := ctrl.sync(vmClone) if err != nil { return fmt.Errorf("sync error: %v", err) } err = ctrl.updateStatus(vmClone, syncInfo) if err != nil { return fmt.Errorf("error updating status: %v", err) } if syncErr := syncInfo.err; syncErr != nil { return fmt.Errorf("sync error: %v", syncErr) } return nil } func (ctrl *VMCloneController) sync(vmClone *clone.VirtualMachineClone) (syncInfoType, error) { cloneInfo, err := ctrl.retrieveCloneInfo(vmClone) if err != nil { switch errors.Unwrap(err) { case ErrSourceDoesntExist: // If source does not exist we will wait for source // to be created and then vmclone will get reconciled again. return syncInfoType{ isClonePending: true, event: SourceDoesNotExist, reason: err.Error(), }, nil case ErrSourceWithBackendStorage: return syncInfoType{ isCloneFailing: true, event: SourceWithBackendStorageInvalid, reason: err.Error(), }, nil default: return syncInfoType{}, err } } if ctrl.getTargetType(cloneInfo.vmClone) == targetTypeVM { return ctrl.syncTargetVM(cloneInfo), nil } return syncInfoType{err: fmt.Errorf("target type is unknown: %s", ctrl.getTargetType(cloneInfo.vmClone))}, nil } // retrieveCloneInfo initializes all the snapshot and restore information that can be populated from the vm clone resource func (ctrl *VMCloneController) retrieveCloneInfo(vmClone *clone.VirtualMachineClone) (*vmCloneInfo, error) { sourceInfo := vmClone.Spec.Source cloneInfo := vmCloneInfo{ vmClone: vmClone, sourceType: cloneSourceType(sourceInfo.Kind), } switch cloneSourceType(sourceInfo.Kind) { case sourceTypeVM: sourceVMObj, err := ctrl.getSource(vmClone, sourceInfo.Name, vmClone.Namespace, string(sourceTypeVM), ctrl.vmStore) if err != nil { return nil, err } sourceVM := sourceVMObj.(*k6tv1.VirtualMachine) if backendstorage.IsBackendStorageNeededForVM(sourceVM) { return nil, fmt.Errorf("%w: VM %s/%s", ErrSourceWithBackendStorage, vmClone.Namespace, sourceInfo.Name) } cloneInfo.sourceVm = sourceVM case sourceTypeSnapshot: sourceSnapshotObj, err := ctrl.getSource(vmClone, sourceInfo.Name, vmClone.Namespace, string(sourceTypeSnapshot), ctrl.snapshotStore) if err != nil { return nil, err } sourceSnapshot := sourceSnapshotObj.(*snapshotv1.VirtualMachineSnapshot) cloneInfo.snapshot = sourceSnapshot cloneInfo.snapshotName = sourceSnapshot.Name default: return nil, fmt.Errorf("clone %s is defined with an unknown source type %s", vmClone.Name, sourceInfo.Kind) } if cloneInfo.snapshotName == "" && vmClone.Status.SnapshotName != nil { cloneInfo.snapshotName = *vmClone.Status.SnapshotName } return &cloneInfo, nil } func (ctrl *VMCloneController) syncTargetVM(vmCloneInfo *vmCloneInfo) syncInfoType { vmClone := vmCloneInfo.vmClone syncInfo := syncInfoType{} switch vmClone.Status.Phase { case clone.PhaseUnset, clone.SnapshotInProgress: if vmCloneInfo.sourceType == sourceTypeVM { if vmClone.Status.SnapshotName == nil { syncInfo = ctrl.createSnapshotFromVm(vmClone, vmCloneInfo.sourceVm, syncInfo) return syncInfo } } vmCloneInfo.snapshot, syncInfo = ctrl.verifySnapshotReady(vmClone, vmCloneInfo.snapshotName, vmCloneInfo.vmClone.Namespace, syncInfo) if syncInfo.isFailingOrError() || !syncInfo.snapshotReady { return syncInfo } fallthrough case clone.RestoreInProgress: // Here we have to know the snapshot name if vmCloneInfo.snapshot == nil { vmCloneInfo.snapshot, syncInfo = ctrl.getSnapshot(vmCloneInfo.snapshotName, vmCloneInfo.vmClone.Namespace, syncInfo) if syncInfo.isFailingOrError() { return syncInfo } } if vmClone.Status.RestoreName == nil { vm, err := ctrl.getVmFromSnapshot(vmCloneInfo.snapshot) if err != nil { syncInfo.setError(fmt.Errorf("cannot get VM manifest from snapshot: %v", err)) return syncInfo } syncInfo = ctrl.createRestoreFromVm(vmClone, vm, vmCloneInfo.snapshotName, syncInfo) return syncInfo } syncInfo = ctrl.verifyRestoreReady(vmClone, vmCloneInfo.vmClone.Namespace, syncInfo) if syncInfo.isFailingOrError() || !syncInfo.restoreReady { return syncInfo } fallthrough case clone.CreatingTargetVM: syncInfo = ctrl.verifyVmReady(vmClone, syncInfo) if syncInfo.isFailingOrError() { return syncInfo } fallthrough case clone.Succeeded: if vmClone.Status.RestoreName != nil { syncInfo = ctrl.verifyPVCBound(vmClone, syncInfo) if syncInfo.isFailingOrError() || !syncInfo.pvcBound { return syncInfo } syncInfo = ctrl.cleanupRestore(vmClone, syncInfo) if syncInfo.isFailingOrError() { return syncInfo } if vmCloneInfo.sourceType == sourceTypeVM { syncInfo = ctrl.cleanupSnapshot(vmClone, syncInfo) if syncInfo.isFailingOrError() { return syncInfo } } } default: log.Log.Object(vmClone).Infof("clone %s is in phase %s - nothing to do", vmClone.Name, string(vmClone.Status.Phase)) } return syncInfo } func (ctrl *VMCloneController) updateStatus(origClone *clone.VirtualMachineClone, syncInfo syncInfoType) error { vmClone := origClone.DeepCopy() var phaseChanged bool assignPhase := func(phase clone.VirtualMachineClonePhase) { vmClone.Status.Phase = phase phaseChanged = true } switch { case syncInfo.isClonePending: ctrl.logAndRecord(vmClone, syncInfo.event, syncInfo.reason) updateCloneConditions(vmClone, newProgressingCondition(corev1.ConditionFalse, "Pending"), newReadyCondition(corev1.ConditionFalse, syncInfo.reason), ) case syncInfo.isCloneFailing: ctrl.logAndRecord(vmClone, syncInfo.event, syncInfo.reason) assignPhase(clone.Failed) updateCloneConditions(vmClone, newProgressingCondition(corev1.ConditionFalse, "Failed"), newReadyCondition(corev1.ConditionFalse, syncInfo.reason), ) default: updateCloneConditions(vmClone, newProgressingCondition(corev1.ConditionTrue, "Still processing"), newReadyCondition(corev1.ConditionFalse, "Still processing"), ) } if isInPhase(vmClone, clone.PhaseUnset) && !syncInfo.isClonePending { assignPhase(clone.SnapshotInProgress) } if isInPhase(vmClone, clone.SnapshotInProgress) { if snapshotName := syncInfo.snapshotName; snapshotName != "" { vmClone.Status.SnapshotName = pointer.P(snapshotName) } if syncInfo.snapshotReady { assignPhase(clone.RestoreInProgress) } } if isInPhase(vmClone, clone.RestoreInProgress) { if restoreName := syncInfo.restoreName; restoreName != "" { vmClone.Status.RestoreName = pointer.P(restoreName) } if syncInfo.restoreReady { assignPhase(clone.CreatingTargetVM) } } if isInPhase(vmClone, clone.CreatingTargetVM) { if targetVMName := syncInfo.targetVMName; targetVMName != "" { vmClone.Status.TargetName = pointer.P(targetVMName) } if syncInfo.targetVMCreated { assignPhase(clone.Succeeded) } } if isInPhase(vmClone, clone.Succeeded) { updateCloneConditions(vmClone, newProgressingCondition(corev1.ConditionFalse, "Ready"), newReadyCondition(corev1.ConditionTrue, "Ready"), ) } if syncInfo.pvcBound { vmClone.Status.SnapshotName = nil vmClone.Status.RestoreName = nil } if !equality.Semantic.DeepEqual(vmClone.Status, origClone.Status) { if phaseChanged { log.Log.Object(vmClone).Infof("Changing phase to %s", vmClone.Status.Phase) } _, err := ctrl.client.VirtualMachineClone(vmClone.Namespace).UpdateStatus(context.Background(), vmClone, v1.UpdateOptions{}) if err != nil { return err } } return nil } func validateVolumeSnapshotStatus(vm *k6tv1.VirtualMachine) error { var vssErr error for _, v := range vm.Spec.Template.Spec.Volumes { if v.PersistentVolumeClaim != nil || v.DataVolume != nil { found := false for _, vss := range vm.Status.VolumeSnapshotStatuses { if v.Name == vss.Name { if !vss.Enabled { vssErr = errors.Join(vssErr, fmt.Errorf(ErrVolumeNotSnapshotable, v.Name)) } found = true break } } if !found { vssErr = errors.Join(vssErr, fmt.Errorf(ErrVolumeSnapshotSupportUnknown, v.Name)) } } } return vssErr } func (ctrl *VMCloneController) createSnapshotFromVm(vmClone *clone.VirtualMachineClone, vm *k6tv1.VirtualMachine, syncInfo syncInfoType) syncInfoType { err := validateVolumeSnapshotStatus(vm) if err != nil { return syncInfoType{ isClonePending: true, event: VMVolumeSnapshotsInvalid, reason: err.Error(), } } snapshot := generateSnapshot(vmClone, vm) log.Log.Object(vmClone).Infof("creating snapshot %s for clone %s", snapshot.Name, vmClone.Name) createdSnapshot, err := ctrl.client.VirtualMachineSnapshot(snapshot.Namespace).Create(context.Background(), snapshot, v1.CreateOptions{}) if err != nil { if !k8serrors.IsAlreadyExists(err) { syncInfo.setError(fmt.Errorf("failed creating snapshot %s for clone %s: %v", snapshot.Name, vmClone.Name, err)) return syncInfo } syncInfo.snapshotName = snapshot.Name return syncInfo } snapshot = createdSnapshot ctrl.logAndRecord(vmClone, SnapshotCreated, fmt.Sprintf("created snapshot %s for clone %s", snapshot.Name, vmClone.Name)) syncInfo.snapshotName = snapshot.Name log.Log.Object(vmClone).V(defaultVerbosityLevel).Infof("snapshot %s was just created, reenqueuing to let snapshot time to finish", snapshot.Name) return syncInfo } func (ctrl *VMCloneController) verifySnapshotReady(vmClone *clone.VirtualMachineClone, name, namespace string, syncInfo syncInfoType) (*snapshotv1.VirtualMachineSnapshot, syncInfoType) { obj, exists, err := ctrl.snapshotStore.GetByKey(getKey(name, namespace)) if err != nil { syncInfo.setError(fmt.Errorf("error getting snapshot %s from cache for clone %s: %v", name, vmClone.Name, err)) return nil, syncInfo } else if !exists { syncInfo.setError(fmt.Errorf("snapshot %s is not created yet for clone %s", name, vmClone.Name)) return nil, syncInfo } snapshot := obj.(*snapshotv1.VirtualMachineSnapshot) log.Log.Object(vmClone).Infof("found snapshot %s for clone %s", snapshot.Name, vmClone.Name) if !virtsnapshot.VmSnapshotReady(snapshot) { log.Log.Object(vmClone).V(defaultVerbosityLevel).Infof("snapshot %s for clone %s is not ready to use yet", snapshot.Name, vmClone.Name) return snapshot, syncInfo } if err := ctrl.verifySnapshotContent(snapshot); err != nil { // At this point the snapshot is already succeded and ready. // If there is an issue with the snapshot content something is not right // and the clone should fail syncInfo.isCloneFailing = true syncInfo.event = SnapshotContentInvalid syncInfo.reason = err.Error() return nil, syncInfo } ctrl.logAndRecord(vmClone, SnapshotReady, fmt.Sprintf("snapshot %s for clone %s is ready to use", snapshot.Name, vmClone.Name)) syncInfo.snapshotReady = true return snapshot, syncInfo } func (ctrl *VMCloneController) getSnapshotContent(snapshot *snapshotv1.VirtualMachineSnapshot) (*snapshotv1.VirtualMachineSnapshotContent, error) { contentName := virtsnapshot.GetVMSnapshotContentName(snapshot) contentKey := getKey(contentName, snapshot.Namespace) contentObj, exists, err := ctrl.snapshotContentStore.GetByKey(contentKey) if !exists { return nil, fmt.Errorf("snapshot content %s in namespace %s does not exist", contentName, snapshot.Namespace) } else if err != nil { return nil, err } return contentObj.(*snapshotv1.VirtualMachineSnapshotContent), nil } func (ctrl *VMCloneController) verifySnapshotContent(snapshot *snapshotv1.VirtualMachineSnapshot) error { content, err := ctrl.getSnapshotContent(snapshot) if err != nil { return err } if content.Spec.VirtualMachineSnapshotName == nil { return fmt.Errorf("cannot get snapshot name from content %s", content.Name) } snapshotName := *content.Spec.VirtualMachineSnapshotName vm := content.Spec.Source.VirtualMachine if vm.Spec.Template == nil { return nil } if backendstorage.IsBackendStorageNeededForVMI(&vm.Spec.Template.Spec) { return fmt.Errorf("%w: snapshot %s/%s", ErrSourceWithBackendStorage, snapshot.Namespace, snapshot.Name) } var volumesNotBackedUpErr error for _, volume := range vm.Spec.Template.Spec.Volumes { if volume.PersistentVolumeClaim == nil && volume.DataVolume == nil { continue } foundBackup := false for _, volumeBackup := range content.Spec.VolumeBackups { if volume.Name == volumeBackup.VolumeName { foundBackup = true break } } if !foundBackup { volumesNotBackedUpErr = errors.Join(volumesNotBackedUpErr, fmt.Errorf(ErrVolumeNotBackedUp, volume.Name, snapshotName)) } } return volumesNotBackedUpErr } // This method assumes the snapshot exists. If it doesn't - syncInfo is updated accordingly. func (ctrl *VMCloneController) getSnapshot(snapshotName string, sourceNamespace string, syncInfo syncInfoType) (*snapshotv1.VirtualMachineSnapshot, syncInfoType) { obj, exists, err := ctrl.snapshotStore.GetByKey(getKey(snapshotName, sourceNamespace)) if !exists { // At this point the snapshot is already created. If it doesn't exist it means that it's deleted for some // reason and the clone should fail syncInfo.isCloneFailing = true syncInfo.event = SnapshotDeleted syncInfo.reason = fmt.Sprintf("snapshot %s does not exist anymore", snapshotName) return nil, syncInfo } if err != nil { syncInfo.setError(fmt.Errorf("error getting snapshot %s from cache: %v", snapshotName, err)) return nil, syncInfo } snapshot := obj.(*snapshotv1.VirtualMachineSnapshot) return snapshot, syncInfo } func (ctrl *VMCloneController) createRestoreFromVm(vmClone *clone.VirtualMachineClone, vm *k6tv1.VirtualMachine, snapshotName string, syncInfo syncInfoType) syncInfoType { patches, err := generatePatches(vm, &vmClone.Spec) if err != nil { retErr := fmt.Errorf("error generating patches for clone %s: %v", vmClone.Name, err) ctrl.recorder.Event(vmClone, corev1.EventTypeWarning, string(RestoreCreationFailed), retErr.Error()) syncInfo.setError(retErr) return syncInfo } restore := generateRestore(vmClone.Spec.Target, vm.Name, vmClone.Namespace, vmClone.Name, snapshotName, vmClone.UID, patches) log.Log.Object(vmClone).Infof("creating restore %s for clone %s", restore.Name, vmClone.Name) createdRestore, err := ctrl.client.VirtualMachineRestore(restore.Namespace).Create(context.Background(), restore, v1.CreateOptions{}) if err != nil { if !k8serrors.IsAlreadyExists(err) { retErr := fmt.Errorf("failed creating restore %s for clone %s: %v", restore.Name, vmClone.Name, err) ctrl.recorder.Event(vmClone, corev1.EventTypeWarning, string(RestoreCreationFailed), retErr.Error()) syncInfo.setError(retErr) return syncInfo } syncInfo.restoreName = restore.Name return syncInfo } restore = createdRestore ctrl.logAndRecord(vmClone, RestoreCreated, fmt.Sprintf("created restore %s for clone %s", restore.Name, vmClone.Name)) syncInfo.restoreName = restore.Name log.Log.Object(vmClone).V(defaultVerbosityLevel).Infof("restore %s was just created, reenqueuing to let snapshot time to finish", restore.Name) return syncInfo } func (ctrl *VMCloneController) verifyRestoreReady(vmClone *clone.VirtualMachineClone, sourceNamespace string, syncInfo syncInfoType) syncInfoType { obj, exists, err := ctrl.restoreStore.GetByKey(getKey(*vmClone.Status.RestoreName, sourceNamespace)) if !exists { syncInfo.setError(fmt.Errorf("restore %s is not created yet for clone %s", *vmClone.Status.RestoreName, vmClone.Name)) return syncInfo } else if err != nil { syncInfo.setError(fmt.Errorf("error getting restore %s from cache for clone %s: %v", *vmClone.Status.RestoreName, vmClone.Name, err)) return syncInfo } restore := obj.(*snapshotv1.VirtualMachineRestore) log.Log.Object(vmClone).Infof("found target restore %s for clone %s", restore.Name, vmClone.Name) if virtsnapshot.VmRestoreProgressing(restore) { log.Log.Object(vmClone).V(defaultVerbosityLevel).Infof("restore %s for clone %s is not ready to use yet", restore.Name, vmClone.Name) return syncInfo } ctrl.logAndRecord(vmClone, RestoreReady, fmt.Sprintf("restore %s for clone %s is ready to use", restore.Name, vmClone.Name)) syncInfo.restoreReady = true syncInfo.targetVMName = restore.Spec.Target.Name return syncInfo } func (ctrl *VMCloneController) verifyVmReady(vmClone *clone.VirtualMachineClone, syncInfo syncInfoType) syncInfoType { targetVMInfo := vmClone.Spec.Target _, exists, err := ctrl.vmStore.GetByKey(getKey(targetVMInfo.Name, vmClone.Namespace)) if !exists { syncInfo.setError(fmt.Errorf("target VM %s is not created yet for clone %s", targetVMInfo.Name, vmClone.Name)) return syncInfo } else if err != nil { syncInfo.setError(fmt.Errorf("error getting VM %s from cache for clone %s: %v", targetVMInfo.Name, vmClone.Name, err)) return syncInfo } ctrl.logAndRecord(vmClone, TargetVMCreated, fmt.Sprintf("created target VM %s for clone %s", targetVMInfo.Name, vmClone.Name)) syncInfo.targetVMCreated = true return syncInfo } func (ctrl *VMCloneController) verifyPVCBound(vmClone *clone.VirtualMachineClone, syncInfo syncInfoType) syncInfoType { obj, exists, err := ctrl.restoreStore.GetByKey(getKey(*vmClone.Status.RestoreName, vmClone.Namespace)) if !exists { syncInfo.setError(fmt.Errorf("restore %s is not created yet for clone %s", *vmClone.Status.RestoreName, vmClone.Name)) return syncInfo } else if err != nil { syncInfo.setError(fmt.Errorf("error getting restore %s from cache for clone %s: %v", *vmClone.Status.SnapshotName, vmClone.Name, err)) return syncInfo } restore := obj.(*snapshotv1.VirtualMachineRestore) for _, volumeRestore := range restore.Status.Restores { obj, exists, err = ctrl.pvcStore.GetByKey(getKey(volumeRestore.PersistentVolumeClaimName, vmClone.Namespace)) if !exists { syncInfo.setError(fmt.Errorf("PVC %s is not created yet for clone %s", volumeRestore.PersistentVolumeClaimName, vmClone.Name)) return syncInfo } else if err != nil { syncInfo.setError(fmt.Errorf("error getting PVC %s from cache for clone %s: %v", volumeRestore.PersistentVolumeClaimName, vmClone.Name, err)) return syncInfo } pvc := obj.(*corev1.PersistentVolumeClaim) if pvc.Status.Phase != corev1.ClaimBound { log.Log.Object(vmClone).V(defaultVerbosityLevel).Infof("pvc %s for clone %s is not bound yet", pvc.Name, vmClone.Name) return syncInfo } } ctrl.logAndRecord(vmClone, PVCBound, fmt.Sprintf("all PVC for clone %s are bound", vmClone.Name)) syncInfo.pvcBound = true return syncInfo } func (ctrl *VMCloneController) cleanupSnapshot(vmClone *clone.VirtualMachineClone, syncInfo syncInfoType) syncInfoType { err := ctrl.client.VirtualMachineSnapshot(vmClone.Namespace).Delete(context.Background(), *vmClone.Status.SnapshotName, v1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { syncInfo.setError(fmt.Errorf("cannot clean up snapshot %s for clone %s", *vmClone.Status.SnapshotName, vmClone.Name)) return syncInfo } return syncInfo } func (ctrl *VMCloneController) cleanupRestore(vmClone *clone.VirtualMachineClone, syncInfo syncInfoType) syncInfoType { err := ctrl.client.VirtualMachineRestore(vmClone.Namespace).Delete(context.Background(), *vmClone.Status.RestoreName, v1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { syncInfo.setError(fmt.Errorf("cannot clean up restore %s for clone %s", *vmClone.Status.RestoreName, vmClone.Name)) return syncInfo } return syncInfo } func (ctrl *VMCloneController) logAndRecord(vmClone *clone.VirtualMachineClone, event Event, msg string) { ctrl.recorder.Eventf(vmClone, corev1.EventTypeNormal, string(event), msg) log.Log.Object(vmClone).Infof(msg) } func (ctrl *VMCloneController) getTargetType(vmClone *clone.VirtualMachineClone) cloneTargetType { if vmClone.Spec.Target != nil { return cloneTargetType(vmClone.Spec.Target.Kind) } else { return defaultType } } func (ctrl *VMCloneController) getSource(vmClone *clone.VirtualMachineClone, name, namespace, sourceKind string, store cache.Store) (interface{}, error) { key := getKey(name, namespace) obj, exists, err := store.GetByKey(key) if err != nil { return nil, fmt.Errorf("error getting %s %s in namespace %s from cache: %v", sourceKind, name, namespace, err) } if !exists { return nil, fmt.Errorf("%w: %s %s/%s", ErrSourceDoesntExist, sourceKind, namespace, name) } return obj, nil } func (ctrl *VMCloneController) getVmFromSnapshot(snapshot *snapshotv1.VirtualMachineSnapshot) (*k6tv1.VirtualMachine, error) { content, err := ctrl.getSnapshotContent(snapshot) if err != nil { return nil, err } contentVmSpec := content.Spec.Source.VirtualMachine vm := &k6tv1.VirtualMachine{ ObjectMeta: contentVmSpec.ObjectMeta, Spec: contentVmSpec.Spec, Status: contentVmSpec.Status, } return vm, nil } func (s *syncInfoType) setError(err error) { s.err = err } func (s *syncInfoType) isFailingOrError() bool { return s.err != nil || s.isCloneFailing }
package clone import ( "errors" "fmt" "time" k8scorev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" clonebase "kubevirt.io/api/clone" clone "kubevirt.io/api/clone/v1beta1" virtv1 "kubevirt.io/api/core/v1" snapshotv1 "kubevirt.io/api/snapshot/v1beta1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/storage/snapshot" ) type Event string const ( defaultVerbosityLevel = 2 unknownTypeErrFmt = "clone controller expected object of type %s but found object of unknown type" SnapshotCreated Event = "SnapshotCreated" SnapshotReady Event = "SnapshotReady" RestoreCreated Event = "RestoreCreated" RestoreCreationFailed Event = "RestoreCreationFailed" RestoreReady Event = "RestoreReady" TargetVMCreated Event = "TargetVMCreated" PVCBound Event = "PVCBound" SnapshotDeleted Event = "SnapshotDeleted" SnapshotContentInvalid Event = "SnapshotContentInvalid" SourceDoesNotExist Event = "SourceDoesNotExist" SourceWithBackendStorageInvalid Event = "SourceVMWithBackendStorageInvalid" VMVolumeSnapshotsInvalid Event = "VMVolumeSnapshotsInvalid" ) var ( ErrVolumeNotSnapshotable = "Virtual Machine volume %s does not support snapshots" ErrVolumeSnapshotSupportUnknown = "Virtual Machine volume %s snapshot support unknown" ErrVolumeNotBackedUp = "volume %s is not backed up in snapshot %s" ErrSourceDoesntExist = errors.New("Source doesnt exist") ErrSourceWithBackendStorage = errors.New("Clone of source with backendstorage is not supported") ) type VMCloneController struct { client kubecli.KubevirtClient vmCloneIndexer cache.Indexer snapshotStore cache.Store restoreStore cache.Store vmStore cache.Store snapshotContentStore cache.Store pvcStore cache.Store recorder record.EventRecorder vmCloneQueue workqueue.TypedRateLimitingInterface[string] hasSynced func() bool } func NewVmCloneController(client kubecli.KubevirtClient, vmCloneInformer, snapshotInformer, restoreInformer, vmInformer, snapshotContentInformer, pvcInformer cache.SharedIndexInformer, recorder record.EventRecorder) (*VMCloneController, error) { ctrl := VMCloneController{ client: client, vmCloneIndexer: vmCloneInformer.GetIndexer(), snapshotStore: snapshotInformer.GetStore(), restoreStore: restoreInformer.GetStore(), vmStore: vmInformer.GetStore(), snapshotContentStore: snapshotContentInformer.GetStore(), pvcStore: pvcInformer.GetStore(), recorder: recorder, vmCloneQueue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-vmclone"}, ), } ctrl.hasSynced = func() bool { return vmCloneInformer.HasSynced() && snapshotInformer.HasSynced() && restoreInformer.HasSynced() && vmInformer.HasSynced() && snapshotInformer.HasSynced() && pvcInformer.HasSynced() } _, err := vmCloneInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: ctrl.handleVMClone, UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handleVMClone(newObj) }, DeleteFunc: ctrl.handleVMClone, }, ) if err != nil { return nil, err } _, err = snapshotInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: ctrl.handleSnapshot, UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handleSnapshot(newObj) }, DeleteFunc: ctrl.handleSnapshot, }, ) if err != nil { return nil, err } _, err = restoreInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: ctrl.handleRestore, UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handleRestore(newObj) }, DeleteFunc: ctrl.handleRestore, }, ) if err != nil { return nil, err } _, err = pvcInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: ctrl.handlePVC, UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handlePVC(newObj) }, DeleteFunc: ctrl.handlePVC, }, ) if err != nil { return nil, err } _, err = vmInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: ctrl.handleAddedSourceVM, UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handleUpdateSourceVM(oldObj, newObj) }, DeleteFunc: ctrl.handleDeletedTargetVM, }, ) if err != nil { return nil, err } return &ctrl, nil } func (ctrl *VMCloneController) handleVMClone(obj interface{}) { if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil { obj = unknown.Obj } vmClone, ok := obj.(*clone.VirtualMachineClone) if !ok { log.Log.Errorf(unknownTypeErrFmt, clonebase.ResourceVMCloneSingular) return } objName, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vmClone) if err != nil { log.Log.Errorf("vm clone controller failed to get key from object: %v, %v", err, vmClone) return } log.Log.V(defaultVerbosityLevel).Infof("enqueued %q for sync", objName) ctrl.vmCloneQueue.Add(objName) } func (ctrl *VMCloneController) handleSnapshot(obj interface{}) { if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil { obj = unknown.Obj } snapshot, ok := obj.(*snapshotv1.VirtualMachineSnapshot) if !ok { log.Log.Errorf(unknownTypeErrFmt, "virtualmachinesnapshot") return } if ownedByClone, key := isOwnedByClone(snapshot); ownedByClone { ctrl.vmCloneQueue.AddRateLimited(key) } snapshotKey, err := cache.MetaNamespaceKeyFunc(snapshot) if err != nil { log.Log.Object(snapshot).Reason(err).Error("cannot get snapshot key") return } snapshotSourceKeys, err := ctrl.vmCloneIndexer.IndexKeys("snapshotSource", snapshotKey) if err != nil { log.Log.Object(snapshot).Reason(err).Error("cannot get clone snapshotSourceKeys from snapshotSource indexer") return } snapshotWaitingKeys, err := ctrl.vmCloneIndexer.IndexKeys(string(clone.SnapshotInProgress), snapshotKey) if err != nil { log.Log.Object(snapshot).Reason(err).Error("cannot get clone snapshotWaitingKeys from " + string(clone.SnapshotInProgress) + " indexer") return } for _, key := range append(snapshotSourceKeys, snapshotWaitingKeys...) { ctrl.vmCloneQueue.AddRateLimited(key) } } func (ctrl *VMCloneController) handleRestore(obj interface{}) { if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil { obj = unknown.Obj } restore, ok := obj.(*snapshotv1.VirtualMachineRestore) if !ok { log.Log.Errorf(unknownTypeErrFmt, "virtualmachinerestore") return } if ownedByClone, key := isOwnedByClone(restore); ownedByClone { ctrl.vmCloneQueue.AddRateLimited(key) } restoreKey, err := cache.MetaNamespaceKeyFunc(restore) if err != nil { log.Log.Object(restore).Reason(err).Error("cannot get snapshot key") return } restoreWaitingKeys, err := ctrl.vmCloneIndexer.IndexKeys(string(clone.RestoreInProgress), restoreKey) if err != nil { log.Log.Object(restore).Reason(err).Error("cannot get clone restoreWaitingKeys from " + string(clone.RestoreInProgress) + " indexer") return } for _, key := range restoreWaitingKeys { ctrl.vmCloneQueue.AddRateLimited(key) } } func (ctrl *VMCloneController) handlePVC(obj interface{}) { if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil { obj = unknown.Obj } pvc, ok := obj.(*k8scorev1.PersistentVolumeClaim) if !ok { log.Log.Errorf(unknownTypeErrFmt, "persistentvolumeclaim") return } var ( restoreName string exists bool ) if restoreName, exists = pvc.Annotations[snapshot.RestoreNameAnnotation]; !exists { return } if pvc.Status.Phase != k8scorev1.ClaimBound { return } restoreKey := getKey(restoreName, pvc.Namespace) succeededWaitingKeys, err := ctrl.vmCloneIndexer.IndexKeys(string(clone.Succeeded), restoreKey) if err != nil { log.Log.Object(pvc).Reason(err).Error("cannot get clone succeededWaitingKeys from " + string(clone.Succeeded) + " indexer") return } for _, key := range succeededWaitingKeys { ctrl.vmCloneQueue.AddRateLimited(key) } } func (ctrl *VMCloneController) handleAddedSourceVM(obj interface{}) { vm, ok := obj.(*virtv1.VirtualMachine) if !ok { log.Log.Reason(fmt.Errorf("unexpected obj %#v", obj)).Error("Failed to process notification") return } vmKey, err := cache.MetaNamespaceKeyFunc(vm) if err != nil { log.Log.Object(vm).Reason(err).Error("cannot get vm key") return } keys, err := ctrl.vmCloneIndexer.IndexKeys("vmSource", vmKey) if err != nil { log.Log.Object(vm).Reason(err).Error("cannot get clone from vmSource indexer") return } for _, k := range keys { ctrl.vmCloneQueue.Add(k) } } func (ctrl *VMCloneController) handleUpdateSourceVM(oldObj, newObj interface{}) { oldVM, ok := oldObj.(*virtv1.VirtualMachine) if !ok { log.Log.Reason(fmt.Errorf("unexpected old obj %#v", oldObj)).Error("Failed to process notification") return } newVM, ok := newObj.(*virtv1.VirtualMachine) if !ok { log.Log.Reason(fmt.Errorf("unexpected new obj %#v", newObj)).Error("Failed to process notification") return } // we care only for updates in a vmsource volumeSnapshotStatuses if equality.Semantic.DeepEqual(newVM.Status.VolumeSnapshotStatuses, oldVM.Status.VolumeSnapshotStatuses) { return } vmKey, err := cache.MetaNamespaceKeyFunc(newVM) if err != nil { log.Log.Object(newVM).Reason(err).Error("cannot get vm key") return } keys, err := ctrl.vmCloneIndexer.IndexKeys("vmSource", vmKey) if err != nil { log.Log.Object(newVM).Reason(err).Error("cannot get clone from vmSource indexer") return } for _, k := range keys { ctrl.vmCloneQueue.Add(k) } } func (ctrl *VMCloneController) handleDeletedTargetVM(obj interface{}) { vm, ok := obj.(*virtv1.VirtualMachine) // When a delete is dropped, the relist will notice a vm in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error("Failed to process delete notification") return } vm, ok = tombstone.Obj.(*virtv1.VirtualMachine) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a vm %#v", obj)).Error("Failed to process delete notification") return } } vmKey, err := cache.MetaNamespaceKeyFunc(vm) if err != nil { log.Log.Object(vm).Reason(err).Error("cannot get vm key") return } keys, err := ctrl.vmCloneIndexer.IndexKeys("vmTarget", vmKey) if err != nil { log.Log.Object(vm).Reason(err).Error("cannot get clone from vmTarget indexer") return } for _, k := range keys { ctrl.vmCloneQueue.Add(k) } } func (ctrl *VMCloneController) Run(threadiness int, stopCh <-chan struct{}) error { defer utilruntime.HandleCrash() defer ctrl.vmCloneQueue.ShutDown() log.Log.Info("Starting clone controller") defer log.Log.Info("Shutting down clone controller") if !cache.WaitForCacheSync( stopCh, ctrl.hasSynced, ) { return fmt.Errorf("failed to wait for caches to sync") } for i := 0; i < threadiness; i++ { go wait.Until(ctrl.runWorker, time.Second, stopCh) } <-stopCh return nil } func (ctrl *VMCloneController) Execute() bool { key, quit := ctrl.vmCloneQueue.Get() if quit { return false } defer ctrl.vmCloneQueue.Done(key) err := ctrl.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing clone %v", key) ctrl.vmCloneQueue.AddRateLimited(key) } else { log.Log.V(defaultVerbosityLevel).Infof("processed clone %v", key) ctrl.vmCloneQueue.Forget(key) } return true } func (ctrl *VMCloneController) runWorker() { for ctrl.Execute() { } }
package clone import ( "fmt" "k8s.io/apimachinery/pkg/types" "kubevirt.io/kubevirt/pkg/pointer" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" snapshotv1 "kubevirt.io/api/snapshot/v1beta1" "k8s.io/apimachinery/pkg/util/rand" clone "kubevirt.io/api/clone/v1beta1" v1 "kubevirt.io/api/core/v1" ) const ( vmKind = "VirtualMachine" kubevirtApiGroup = "kubevirt.io" ) // variable so can be overridden in tests var currentTime = func() *metav1.Time { t := metav1.Now() return &t } func getKey(name, namespace string) string { return fmt.Sprintf("%s/%s", namespace, name) } func generateNameWithRandomSuffix(names ...string) string { const randomStringLength = 5 if len(names) == 0 { return "" } generatedName := names[0] for _, name := range names[1:] { generatedName = fmt.Sprintf("%s-%s", generatedName, name) } // Kubernetes' object names have limit of 252 characters. // For more info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/ if len(generatedName) > 252 { generatedName = "clone-object" } generatedName = fmt.Sprintf("%s-%s", generatedName, rand.String(randomStringLength)) return generatedName } func generateSnapshotName(vmCloneUID types.UID) string { return fmt.Sprintf("tmp-snapshot-%s", string(vmCloneUID)) } func generateRestoreName(vmCloneUID types.UID) string { return fmt.Sprintf("tmp-restore-%s", string(vmCloneUID)) } func generateVMName(oldVMName string) string { return generateNameWithRandomSuffix(oldVMName, "clone") } func isInPhase(vmClone *clone.VirtualMachineClone, phase clone.VirtualMachineClonePhase) bool { return vmClone.Status.Phase == phase } func generateSnapshot(vmClone *clone.VirtualMachineClone, sourceVM *v1.VirtualMachine) *snapshotv1.VirtualMachineSnapshot { return &snapshotv1.VirtualMachineSnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: generateSnapshotName(vmClone.UID), Namespace: sourceVM.Namespace, OwnerReferences: []metav1.OwnerReference{ getCloneOwnerReference(vmClone.Name, vmClone.UID), }, }, Spec: snapshotv1.VirtualMachineSnapshotSpec{ Source: corev1.TypedLocalObjectReference{ Kind: vmKind, Name: sourceVM.Name, APIGroup: pointer.P(kubevirtApiGroup), }, }, } } func generateRestore(targetInfo *corev1.TypedLocalObjectReference, sourceVMName, namespace, cloneName, snapshotName string, cloneUID types.UID, patches []string) *snapshotv1.VirtualMachineRestore { targetInfo = targetInfo.DeepCopy() if targetInfo.Name == "" { targetInfo.Name = generateVMName(sourceVMName) } return &snapshotv1.VirtualMachineRestore{ ObjectMeta: metav1.ObjectMeta{ Name: generateRestoreName(cloneUID), Namespace: namespace, OwnerReferences: []metav1.OwnerReference{ getCloneOwnerReference(cloneName, cloneUID), }, }, Spec: snapshotv1.VirtualMachineRestoreSpec{ Target: *targetInfo, VirtualMachineSnapshotName: snapshotName, Patches: patches, }, } } func getCloneOwnerReference(cloneName string, cloneUID types.UID) metav1.OwnerReference { return metav1.OwnerReference{ APIVersion: clone.VirtualMachineCloneKind.GroupVersion().String(), Kind: clone.VirtualMachineCloneKind.Kind, Name: cloneName, UID: cloneUID, Controller: pointer.P(true), BlockOwnerDeletion: pointer.P(true), } } // If the provided object is owned by a clone object, the first return parameter would be true // and the second one would be the key of the clone. Otherwise, the first return parameter would // be false and the second parameter is to be ignored. func isOwnedByClone(obj metav1.Object) (isOwned bool, key string) { cloneKind := clone.VirtualMachineCloneKind.Kind cloneApiVersion := clone.VirtualMachineCloneKind.GroupVersion().String() ownerRefs := obj.GetOwnerReferences() for _, ownerRef := range ownerRefs { if ownerRef.Kind != cloneKind || ownerRef.APIVersion != cloneApiVersion { continue } key = getKey(ownerRef.Name, obj.GetNamespace()) return true, key } return false, "" // TODO: Unit test this? } func updateCondition(conditions []clone.Condition, c clone.Condition, includeReason bool) []clone.Condition { found := false for i := range conditions { if conditions[i].Type == c.Type { if conditions[i].Status != c.Status || (includeReason && conditions[i].Reason != c.Reason) { conditions[i] = c } found = true break } } if !found { conditions = append(conditions, c) } return conditions } func updateCloneConditions(vmClone *clone.VirtualMachineClone, conditions ...clone.Condition) { for _, cond := range conditions { vmClone.Status.Conditions = updateCondition(vmClone.Status.Conditions, cond, true) } } func newReadyCondition(status corev1.ConditionStatus, reason string) clone.Condition { return clone.Condition{ Type: clone.ConditionReady, Status: status, Reason: reason, LastTransitionTime: *currentTime(), } } func newProgressingCondition(status corev1.ConditionStatus, reason string) clone.Condition { return clone.Condition{ Type: clone.ConditionProgressing, Status: status, Reason: reason, LastTransitionTime: *currentTime(), } }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2022 Red Hat, Inc. * */ package clone import ( "encoding/json" "fmt" "regexp" "strings" "kubevirt.io/client-go/log" clone "kubevirt.io/api/clone/v1beta1" k6tv1 "kubevirt.io/api/core/v1" "kubevirt.io/kubevirt/pkg/apimachinery/patch" ) func generatePatches(source *k6tv1.VirtualMachine, cloneSpec *clone.VirtualMachineCloneSpec) ([]string, error) { patchSet := patch.New() addMacAddressPatches(patchSet, source.Spec.Template.Spec.Domain.Devices.Interfaces, cloneSpec.NewMacAddresses) addSmbiosSerialPatches(patchSet, source.Spec.Template.Spec.Domain.Firmware, cloneSpec.NewSMBiosSerial) addRemovePatchesFromFilter(patchSet, source.Labels, cloneSpec.LabelFilters, "/metadata/labels") addAnnotationPatches(patchSet, source.Annotations, cloneSpec.AnnotationFilters) addRemovePatchesFromFilter(patchSet, source.Spec.Template.ObjectMeta.Labels, cloneSpec.Template.LabelFilters, "/spec/template/metadata/labels") addRemovePatchesFromFilter(patchSet, source.Spec.Template.ObjectMeta.Annotations, cloneSpec.Template.AnnotationFilters, "/spec/template/metadata/annotations") addFirmwareUUIDPatches(patchSet, source.Spec.Template.Spec.Domain.Firmware) patches, err := generateStringPatchOperations(patchSet) if err != nil { return nil, err } log.Log.V(defaultVerbosityLevel).Object(source).Infof("patches generated for vm %s clone: %v", source.Name, patches) return patches, nil } func generateStringPatchOperations(set *patch.PatchSet) ([]string, error) { var patches []string for _, patchOp := range set.GetPatches() { payloadBytes, err := json.Marshal(patchOp) if err != nil { return nil, err } patches = append(patches, string(payloadBytes)) } return patches, nil } func addMacAddressPatches(patchSet *patch.PatchSet, interfaces []k6tv1.Interface, newMacAddresses map[string]string) { for idx, iface := range interfaces { // If a new mac address is not specified for the current interface an empty mac address would be assigned. // This is OK for clusters that have Kube Mac Pool enabled. For clusters that don't have KMP it is the users' // responsibility to assign new mac address to every network interface. newMac := newMacAddresses[iface.Name] patchSet.AddOption(patch.WithReplace(fmt.Sprintf("/spec/template/spec/domain/devices/interfaces/%d/macAddress", idx), newMac)) } } func addSmbiosSerialPatches(patchSet *patch.PatchSet, firmware *k6tv1.Firmware, newSMBiosSerial *string) { if firmware == nil { return } newSerial := "" if newSMBiosSerial != nil { newSerial = *newSMBiosSerial } patchSet.AddOption(patch.WithReplace("/spec/template/spec/domain/firmware/serial", newSerial)) } func addAnnotationPatches(patchSet *patch.PatchSet, annotations map[string]string, filters []string) { // Some keys are needed for restore functionality. // Deleting the item from the annotation list prevents // from remove patch being generated delete(annotations, "restore.kubevirt.io/lastRestoreUID") addRemovePatchesFromFilter(patchSet, annotations, filters, "/metadata/annotations") } func addRemovePatchesFromFilter(patchSet *patch.PatchSet, m map[string]string, filters []string, baseJSONPath string) { if filters == nil { return } var regularFilters, negationFilters []string for _, filter := range filters { // wildcard alone is not a legal wildcard if filter == "*" { regularFilters = append(regularFilters, ".*") continue } if strings.HasPrefix(filter, "!") { negationFilters = append(negationFilters, filter[1:]) } else { regularFilters = append(regularFilters, filter) } } matchRegex := func(regex, s string) (matched bool) { var err error matched, err = regexp.MatchString(regex, s) if err != nil { log.Log.Errorf("matching regex %s to string %s failed: %v", regex, s, err) } return matched } includedKeys := map[string]struct{}{} // Negation filters have precedence, therefore regular filters would be applied first for key := range m { for _, filter := range regularFilters { if matchRegex(filter, key) { includedKeys[key] = struct{}{} } } for _, negationFilter := range negationFilters { if matchRegex(negationFilter, key) { delete(includedKeys, key) } } } // Appending removal patches for originalKey := range m { if _, isIncluded := includedKeys[originalKey]; !isIncluded { patchSet.AddOption(patch.WithRemove(fmt.Sprintf("%s/%s", baseJSONPath, patch.EscapeJSONPointer(originalKey)))) } } } func addFirmwareUUIDPatches(patchSet *patch.PatchSet, firmware *k6tv1.Firmware) { if firmware == nil { return } patchSet.AddOption(patch.WithReplace("/spec/template/spec/domain/firmware/uuid", "")) }
package disruptionbudget import ( "context" "fmt" "time" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" "k8s.io/apimachinery/pkg/api/equality" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" virtv1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/util/migrations" "kubevirt.io/kubevirt/pkg/util/pdbs" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" ) const deleteNotifFail = "Failed to process delete notification" const ( // FailedCreatePodDisruptionBudgetReason is added in an event if creating a PodDisruptionBudget failed. FailedCreatePodDisruptionBudgetReason = "FailedCreate" // SuccessfulCreatePodDisruptionBudgetReason is added in an event if creating a PodDisruptionBudget succeeded. SuccessfulCreatePodDisruptionBudgetReason = "SuccessfulCreate" // FailedDeletePodDisruptionBudgetReason is added in an event if deleting a PodDisruptionBudget failed. FailedDeletePodDisruptionBudgetReason = "FailedDelete" // SuccessfulDeletePodDisruptionBudgetReason is added in an event if deleting a PodDisruptionBudget succeeded. SuccessfulDeletePodDisruptionBudgetReason = "SuccessfulDelete" // FailedUpdatePodDisruptionBudgetReason is added in an event if updating a PodDisruptionBudget failed. FailedUpdatePodDisruptionBudgetReason = "FailedUpdate" // SuccessfulUpdatePodDisruptionBudgetReason is added in an event if updating a PodDisruptionBudget succeeded. SuccessfulUpdatePodDisruptionBudgetReason = "SuccessfulUpdate" ) type DisruptionBudgetController struct { clientset kubecli.KubevirtClient clusterConfig *virtconfig.ClusterConfig Queue workqueue.TypedRateLimitingInterface[string] vmiStore cache.Store pdbIndexer cache.Indexer podIndexer cache.Indexer migrationIndexer cache.Indexer recorder record.EventRecorder podDisruptionBudgetExpectations *controller.UIDTrackingControllerExpectations hasSynced func() bool } func NewDisruptionBudgetController( vmiInformer cache.SharedIndexInformer, pdbInformer cache.SharedIndexInformer, podInformer cache.SharedIndexInformer, migrationInformer cache.SharedIndexInformer, recorder record.EventRecorder, clientset kubecli.KubevirtClient, clusterConfig *virtconfig.ClusterConfig, ) (*DisruptionBudgetController, error) { c := &DisruptionBudgetController{ Queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-disruption-budget"}, ), vmiStore: vmiInformer.GetStore(), pdbIndexer: pdbInformer.GetIndexer(), podIndexer: podInformer.GetIndexer(), migrationIndexer: migrationInformer.GetIndexer(), recorder: recorder, clientset: clientset, clusterConfig: clusterConfig, podDisruptionBudgetExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), } c.hasSynced = func() bool { return vmiInformer.HasSynced() && pdbInformer.HasSynced() && podInformer.HasSynced() && migrationInformer.HasSynced() } _, err := vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVirtualMachineInstance, DeleteFunc: c.deleteVirtualMachineInstance, UpdateFunc: c.updateVirtualMachineInstance, }) if err != nil { return nil, err } _, err = pdbInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPodDisruptionBudget, DeleteFunc: c.deletePodDisruptionBudget, UpdateFunc: c.updatePodDisruptionBudget, }) if err != nil { return nil, err } _, err = podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: c.updatePod, }) if err != nil { return nil, err } _, err = migrationInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: c.updateMigration, }) if err != nil { return nil, err } return c, nil } func (c *DisruptionBudgetController) updateMigration(_, curr interface{}) { vmim := curr.(*virtv1.VirtualMachineInstanceMigration) if vmim.DeletionTimestamp != nil { return } vmi := &virtv1.VirtualMachineInstance{ ObjectMeta: v1.ObjectMeta{ Namespace: vmim.GetNamespace(), Name: vmim.Spec.VMIName, }, } c.enqueueVirtualMachine(vmi) } func (c *DisruptionBudgetController) updatePod(_, curr interface{}) { pod := curr.(*corev1.Pod) if pod.DeletionTimestamp != nil { return } controllerRef := v1.GetControllerOf(pod) if controllerRef == nil { return } vmi := c.resolveControllerRef(pod.Namespace, controllerRef) if vmi == nil { return } c.enqueueVirtualMachine(vmi) } func (c *DisruptionBudgetController) addVirtualMachineInstance(obj interface{}) { c.enqueueVMI(obj) } func (c *DisruptionBudgetController) deleteVirtualMachineInstance(obj interface{}) { c.enqueueVMI(obj) } func (c *DisruptionBudgetController) updateVirtualMachineInstance(_, curr interface{}) { c.enqueueVMI(curr) } func (c *DisruptionBudgetController) enqueueVMI(obj interface{}) { logger := log.Log vmi, ok := obj.(*virtv1.VirtualMachineInstance) // When a delete is dropped, the relist will notice a pdb in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the pdb // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(deleteNotifFail) return } vmi, ok = tombstone.Obj.(*virtv1.VirtualMachineInstance) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a pdb %#v", obj)).Error(deleteNotifFail) return } } key, err := controller.KeyFunc(vmi) if err != nil { logger.Object(vmi).Reason(err).Error("Failed to extract key from vmi.") } c.Queue.Add(key) } // When a pdb is created, enqueue the vmi that manages it and update its pdbExpectations. func (c *DisruptionBudgetController) addPodDisruptionBudget(obj interface{}) { pdb := obj.(*policyv1.PodDisruptionBudget) if pdb.DeletionTimestamp != nil { // on a restart of the controller manager, it's possible a new pdb shows up in a state that // is already pending deletion. Prevent the pdb from being a creation observation. c.deletePodDisruptionBudget(pdb) return } controllerRef := v1.GetControllerOf(pdb) vmi := c.resolveControllerRef(pdb.Namespace, controllerRef) if vmi == nil { return } vmiKey, err := controller.KeyFunc(vmi) if err != nil { return } log.Log.V(4).Object(pdb).Infof("PodDisruptionBudget created") c.podDisruptionBudgetExpectations.CreationObserved(vmiKey) c.enqueueVirtualMachine(vmi) } // When a pdb is updated, figure out what vmi/s manage it and wake them // up. If the labels of the pdb have changed we need to awaken both the old // and new vmi. old and cur must be *v1.PodDisruptionBudget types. func (c *DisruptionBudgetController) updatePodDisruptionBudget(old, cur interface{}) { curPodDisruptionBudget := cur.(*policyv1.PodDisruptionBudget) oldPodDisruptionBudget := old.(*policyv1.PodDisruptionBudget) if curPodDisruptionBudget.ResourceVersion == oldPodDisruptionBudget.ResourceVersion { // Periodic resync will send update events for all known pdbs. // Two different versions of the same pdb will always have different RVs. return } if curPodDisruptionBudget.DeletionTimestamp != nil { labelChanged := !equality.Semantic.DeepEqual(curPodDisruptionBudget.Labels, oldPodDisruptionBudget.Labels) // having a pdb marked for deletion is enough to count as a deletion expectation c.deletePodDisruptionBudget(curPodDisruptionBudget) if labelChanged { // we don't need to check the oldPodDisruptionBudget.DeletionTimestamp because DeletionTimestamp cannot be unset. c.deletePodDisruptionBudget(oldPodDisruptionBudget) } return } curControllerRef := v1.GetControllerOf(curPodDisruptionBudget) oldControllerRef := v1.GetControllerOf(oldPodDisruptionBudget) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged { // The ControllerRef was changed. Sync the old controller, if any. if vmi := c.resolveControllerRef(oldPodDisruptionBudget.Namespace, oldControllerRef); vmi != nil { c.enqueueVirtualMachine(vmi) } } vmi := c.resolveControllerRef(curPodDisruptionBudget.Namespace, curControllerRef) if vmi == nil { return } log.Log.V(4).Object(curPodDisruptionBudget).Infof("PodDisruptionBudget updated") c.enqueueVirtualMachine(vmi) return } // When a pdb is deleted, enqueue the vmi that manages the pdb and update its pdbExpectations. // obj could be an *v1.PodDisruptionBudget, or a DeletionFinalStateUnknown marker item. func (c *DisruptionBudgetController) deletePodDisruptionBudget(obj interface{}) { pdb, ok := obj.(*policyv1.PodDisruptionBudget) // When a delete is dropped, the relist will notice a pdb in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the pdb // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(deleteNotifFail) return } pdb, ok = tombstone.Obj.(*policyv1.PodDisruptionBudget) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a pdb %#v", obj)).Error(deleteNotifFail) return } } controllerRef := v1.GetControllerOf(pdb) vmi := c.resolveControllerRef(pdb.Namespace, controllerRef) if vmi == nil { return } vmiKey, err := controller.KeyFunc(vmi) if err != nil { return } key, err := controller.KeyFunc(pdb) if err != nil { return } c.podDisruptionBudgetExpectations.DeletionObserved(vmiKey, key) c.enqueueVirtualMachine(vmi) } func (c *DisruptionBudgetController) enqueueVirtualMachine(obj interface{}) { logger := log.Log vmi := obj.(*virtv1.VirtualMachineInstance) key, err := controller.KeyFunc(vmi) if err != nil { logger.Object(vmi).Reason(err).Error("Failed to extract key from virtualmachineinstance.") return } c.Queue.Add(key) } // resolveControllerRef returns the controller referenced by a ControllerRef, // or nil if the ControllerRef could not be resolved to a matching controller // of the correct Kind. func (c *DisruptionBudgetController) resolveControllerRef(namespace string, controllerRef *v1.OwnerReference) *virtv1.VirtualMachineInstance { // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it is nil or the wrong Kind. if controllerRef == nil || controllerRef.Kind != virtv1.VirtualMachineInstanceGroupVersionKind.Kind { return nil } return &virtv1.VirtualMachineInstance{ ObjectMeta: v1.ObjectMeta{ Name: controllerRef.Name, Namespace: namespace, UID: controllerRef.UID, }, } } // Run runs the passed in NodeController. func (c *DisruptionBudgetController) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.Queue.ShutDown() log.Log.Info("Starting disruption budget controller.") // Wait for cache sync before we start the node controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping disruption budget controller.") } func (c *DisruptionBudgetController) runWorker() { for c.Execute() { } } func (c *DisruptionBudgetController) Execute() bool { key, quit := c.Queue.Get() if quit { return false } defer c.Queue.Done(key) err := c.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing VirtualMachineInstance %v", key) c.Queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed VirtualMachineInstance %v", key) c.Queue.Forget(key) } return true } func (c *DisruptionBudgetController) execute(key string) error { if !c.podDisruptionBudgetExpectations.SatisfiedExpectations(key) { return nil } // Fetch the latest Vm state from cache obj, vmiExists, err := c.vmiStore.GetByKey(key) if err != nil { return err } var vmi *virtv1.VirtualMachineInstance // Once all finalizers are removed the vmi gets deleted and we can clean all expectations if vmiExists { vmi = obj.(*virtv1.VirtualMachineInstance) } else { namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { log.DefaultLogger().Reason(err).Error("Could not extract namespace and name from the controller key.") return err } vmi = virtv1.NewVMIReferenceFromNameWithNS(namespace, name) } // Only consider pdbs which belong to this vmi pdbs, err := pdbs.PDBsForVMI(vmi, c.pdbIndexer) if err != nil { log.DefaultLogger().Reason(err).Error("Failed to fetch pod disruption budgets for namespace from cache.") // If the situation does not change there is no benefit in retrying return nil } if len(pdbs) == 0 { return c.sync(key, vmiExists, vmi, nil) } for i := range pdbs { if syncErr := c.sync(key, vmiExists, vmi, pdbs[i]); syncErr != nil { err = syncErr } } return err } func (c *DisruptionBudgetController) isMigrationComplete(vmi *virtv1.VirtualMachineInstance, migrationName string) (bool, error) { objs, err := c.migrationIndexer.ByIndex(cache.NamespaceIndex, vmi.Namespace) if err != nil { return false, err } var migration *virtv1.VirtualMachineInstanceMigration for _, obj := range objs { vmim := obj.(*virtv1.VirtualMachineInstanceMigration) if vmim.GetName() == migrationName { migration = vmim break } } if migration == nil { // if no migration is found we consider it as completed return true, nil } else if !migration.IsFinal() { return false, nil } runningPods := controller.VMIActivePodsCount(vmi, c.podIndexer) return runningPods == 1, nil } func (c *DisruptionBudgetController) isVMIMCompletedForPDB(pdb *policyv1.PodDisruptionBudget, vmi *virtv1.VirtualMachineInstance) (bool, error) { migrationName := pdb.ObjectMeta.Labels[virtv1.MigrationNameLabel] if migrationName == "" { return false, nil } return c.isMigrationComplete(vmi, migrationName) } func (c *DisruptionBudgetController) deletePDB(key string, pdb *policyv1.PodDisruptionBudget, vmi *virtv1.VirtualMachineInstance) error { if pdb != nil && pdb.DeletionTimestamp == nil { pdbKey, err := cache.MetaNamespaceKeyFunc(pdb) if err != nil { return err } c.podDisruptionBudgetExpectations.ExpectDeletions(key, []string{pdbKey}) err = c.clientset.PolicyV1().PodDisruptionBudgets(pdb.Namespace).Delete(context.Background(), pdb.Name, v1.DeleteOptions{}) if err != nil { c.podDisruptionBudgetExpectations.DeletionObserved(key, pdbKey) c.recorder.Eventf(vmi, corev1.EventTypeWarning, FailedDeletePodDisruptionBudgetReason, "Error deleting the PodDisruptionBudget %s: %v", pdb.Name, err) return err } c.recorder.Eventf(vmi, corev1.EventTypeNormal, SuccessfulDeletePodDisruptionBudgetReason, "Deleted PodDisruptionBudget %s", pdb.Name) } return nil } func (c *DisruptionBudgetController) shrinkPDB(vmi *virtv1.VirtualMachineInstance, pdb *policyv1.PodDisruptionBudget) error { if pdb != nil && pdb.DeletionTimestamp == nil && pdb.Spec.MinAvailable != nil && pdb.Spec.MinAvailable.IntValue() != 1 { patches := patch.New( patch.WithReplace("/spec/minAvailable", 1), patch.WithRemove(fmt.Sprintf("/metadata/labels/%s", patch.EscapeJSONPointer(virtv1.MigrationNameLabel))), ) patchOps, err := patches.GeneratePayload() if err != nil { return err } _, err = c.clientset.PolicyV1().PodDisruptionBudgets(pdb.Namespace).Patch(context.Background(), pdb.Name, types.JSONPatchType, patchOps, v1.PatchOptions{}) if err != nil { c.recorder.Eventf(vmi, corev1.EventTypeWarning, FailedUpdatePodDisruptionBudgetReason, "Error updating the PodDisruptionBudget %s: %v", pdb.Name, err) return err } c.recorder.Eventf(vmi, corev1.EventTypeNormal, SuccessfulUpdatePodDisruptionBudgetReason, "shrank PodDisruptionBudget %s", pdb.Name) } return nil } func (c *DisruptionBudgetController) createPDB(key string, vmi *virtv1.VirtualMachineInstance) error { minAvailable := intstr.FromInt(1) c.podDisruptionBudgetExpectations.ExpectCreations(key, 1) createdPDB, err := c.clientset.PolicyV1().PodDisruptionBudgets(vmi.Namespace).Create(context.Background(), &policyv1.PodDisruptionBudget{ ObjectMeta: v1.ObjectMeta{ OwnerReferences: []v1.OwnerReference{ *v1.NewControllerRef(vmi, virtv1.VirtualMachineInstanceGroupVersionKind), }, GenerateName: "kubevirt-disruption-budget-", }, Spec: policyv1.PodDisruptionBudgetSpec{ MinAvailable: &minAvailable, Selector: &v1.LabelSelector{ MatchLabels: map[string]string{ virtv1.CreatedByLabel: string(vmi.UID), }, }, }, }, v1.CreateOptions{}) if err != nil { c.podDisruptionBudgetExpectations.CreationObserved(key) c.recorder.Eventf(vmi, corev1.EventTypeWarning, FailedCreatePodDisruptionBudgetReason, "Error creating a PodDisruptionBudget: %v", err) return err } c.recorder.Eventf(vmi, corev1.EventTypeNormal, SuccessfulCreatePodDisruptionBudgetReason, "Created PodDisruptionBudget %s", createdPDB.Name) return nil } func isPDBFromOldVMI(vmi *virtv1.VirtualMachineInstance, pdb *policyv1.PodDisruptionBudget) bool { // The pdb might be from an old vmi with a different uid, delete and later create the correct one // The VMI always has a minimum grace period, so normally this should not happen, therefore no optimizations if pdb == nil { return false } ownerRef := v1.GetControllerOf(pdb) return ownerRef != nil && ownerRef.UID != vmi.UID } func (c *DisruptionBudgetController) sync(key string, vmiExists bool, vmi *virtv1.VirtualMachineInstance, pdb *policyv1.PodDisruptionBudget) error { needsEvictionProtection := c.vmiNeedsEvictionPDB(vmiExists, vmi) if pdb == nil { if needsEvictionProtection { // If no PDB exists and eviction protection is needed, create a new PDB log.Log.Object(vmi).Infof("creating pdb for VMI %s/%s", vmi.Namespace, vmi.Name) return c.createPDB(key, vmi) } return nil } if shouldDelete, reason := shouldDeletePDB(vmiExists, vmi, pdb, needsEvictionProtection); shouldDelete { log.Log.Object(vmi).Infof("deleting pdb %s/%s due to %s", pdb.Namespace, pdb.Name, reason) return c.deletePDB(key, pdb, vmi) } vmimCompleted, err := c.isVMIMCompletedForPDB(pdb, vmi) if err != nil { return err } if vmimCompleted { log.Log.Object(vmi).Infof("shrinking pdb %s/%s due to migration completion", pdb.Namespace, pdb.Name) return c.shrinkPDB(vmi, pdb) } return nil } func shouldDeletePDB(vmiExists bool, vmi *virtv1.VirtualMachineInstance, pdb *policyv1.PodDisruptionBudget, needsEvictionProtection bool) (shouldDelete bool, deletionReason string) { if pdb == nil { return false, "" } switch { case !vmiExists || vmi.DeletionTimestamp != nil: return true, "VMI deletion" case !needsEvictionProtection: return true, "VMI not using evictionStrategy: LiveMigration|External" case vmi.IsFinal(): return true, "VMI has moved to a Final state and is no longer active" case isPDBFromOldVMI(vmi, pdb): return true, "VMI not existing anymore" case pdbs.IsPDBFromOldMigrationController(pdb): return true, "PDB generated by an old migration controller" default: return false, "" } } func (c *DisruptionBudgetController) vmiNeedsEvictionPDB(vmiExists bool, vmi *virtv1.VirtualMachineInstance) bool { if !vmiExists || vmi.DeletionTimestamp != nil { return false } evictionStrategy := migrations.VMIEvictionStrategy(c.clusterConfig, vmi) if evictionStrategy == nil { return false } switch *evictionStrategy { case virtv1.EvictionStrategyLiveMigrate, virtv1.EvictionStrategyExternal: return true case virtv1.EvictionStrategyLiveMigrateIfPossible: return vmi.IsMigratable() default: return false } }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2017, 2018 Red Hat, Inc. * */ package migration import ( "context" "errors" "fmt" "sort" "strconv" "strings" "sync" "time" backendstorage "kubevirt.io/kubevirt/pkg/storage/backend-storage" "github.com/opencontainers/selinux/go-selinux" "kubevirt.io/api/migrations/v1alpha1" k8sv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/util" "kubevirt.io/kubevirt/pkg/util/pdbs" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" "kubevirt.io/kubevirt/pkg/util/migrations" virtv1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/controller" storagetypes "kubevirt.io/kubevirt/pkg/storage/types" "kubevirt.io/kubevirt/pkg/virt-controller/services" "kubevirt.io/kubevirt/pkg/virt-controller/watch/descheduler" ) const ( failedToProcessDeleteNotificationErrMsg = "Failed to process delete notification" successfulUpdatePodDisruptionBudgetReason = "SuccessfulUpdate" failedUpdatePodDisruptionBudgetReason = "FailedUpdate" failedGetAttractionPodsFmt = "failed to get attachment pods: %v" ) // This is the timeout used when a target pod is stuck in // a pending unschedulable state. const defaultUnschedulablePendingTimeoutSeconds = int64(60 * 5) // This is how many finalized migration objects left in // the system before we begin garbage collecting the oldest // migration objects const defaultFinalizedMigrationGarbageCollectionBuffer = 5 // This is catch all timeout used when a target pod is stuck in // a in the pending phase for any reason. The theory behind this timeout // being longer than the unschedulable timeout is that we don't necessarily // know all the reasons a pod will be stuck in pending for an extended // period of time, so we want to make this timeout long enough that it doesn't // cause the migration to fail when it could have reasonably succeeded. const defaultCatchAllPendingTimeoutSeconds = int64(60 * 15) var migrationBackoffError = errors.New(controller.MigrationBackoffReason) type Controller struct { templateService services.TemplateService clientset kubecli.KubevirtClient Queue workqueue.TypedRateLimitingInterface[string] vmiStore cache.Store podIndexer cache.Indexer migrationIndexer cache.Indexer nodeStore cache.Store pvcStore cache.Store storageClassStore cache.Store storageProfileStore cache.Store pdbIndexer cache.Indexer migrationPolicyStore cache.Store resourceQuotaIndexer cache.Indexer recorder record.EventRecorder podExpectations *controller.UIDTrackingControllerExpectations pvcExpectations *controller.UIDTrackingControllerExpectations migrationStartLock *sync.Mutex clusterConfig *virtconfig.ClusterConfig hasSynced func() bool // the set of cancelled migrations before being handed off to virt-handler. // the map keys are migration keys handOffLock sync.Mutex handOffMap map[string]struct{} unschedulablePendingTimeoutSeconds int64 catchAllPendingTimeoutSeconds int64 } func NewController(templateService services.TemplateService, vmiInformer cache.SharedIndexInformer, podInformer cache.SharedIndexInformer, migrationInformer cache.SharedIndexInformer, nodeInformer cache.SharedIndexInformer, pvcInformer cache.SharedIndexInformer, storageClassInformer cache.SharedIndexInformer, storageProfileInformer cache.SharedIndexInformer, pdbInformer cache.SharedIndexInformer, migrationPolicyInformer cache.SharedIndexInformer, resourceQuotaInformer cache.SharedIndexInformer, recorder record.EventRecorder, clientset kubecli.KubevirtClient, clusterConfig *virtconfig.ClusterConfig, ) (*Controller, error) { c := &Controller{ templateService: templateService, Queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-migration"}, ), vmiStore: vmiInformer.GetStore(), podIndexer: podInformer.GetIndexer(), migrationIndexer: migrationInformer.GetIndexer(), nodeStore: nodeInformer.GetStore(), pvcStore: pvcInformer.GetStore(), storageClassStore: storageClassInformer.GetStore(), storageProfileStore: storageProfileInformer.GetStore(), pdbIndexer: pdbInformer.GetIndexer(), resourceQuotaIndexer: resourceQuotaInformer.GetIndexer(), migrationPolicyStore: migrationPolicyInformer.GetStore(), recorder: recorder, clientset: clientset, podExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), pvcExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), migrationStartLock: &sync.Mutex{}, clusterConfig: clusterConfig, handOffMap: make(map[string]struct{}), unschedulablePendingTimeoutSeconds: defaultUnschedulablePendingTimeoutSeconds, catchAllPendingTimeoutSeconds: defaultCatchAllPendingTimeoutSeconds, } c.hasSynced = func() bool { return vmiInformer.HasSynced() && podInformer.HasSynced() && migrationInformer.HasSynced() && pdbInformer.HasSynced() && resourceQuotaInformer.HasSynced() } _, err := vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVMI, DeleteFunc: c.deleteVMI, UpdateFunc: c.updateVMI, }) if err != nil { return nil, err } _, err = podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPod, DeleteFunc: c.deletePod, UpdateFunc: c.updatePod, }) if err != nil { return nil, err } _, err = migrationInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addMigration, DeleteFunc: c.deleteMigration, UpdateFunc: c.updateMigration, }) if err != nil { return nil, err } _, err = pdbInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: c.updatePDB, }) if err != nil { return nil, err } _, err = pvcInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPVC, }) _, err = resourceQuotaInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: c.updateResourceQuota, DeleteFunc: c.deleteResourceQuota, }) if err != nil { return nil, err } return c, nil } func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.Queue.ShutDown() log.Log.Info("Starting migration controller.") // Wait for cache sync before we start the pod controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping migration controller.") } func (c *Controller) runWorker() { for c.Execute() { } } func (c *Controller) Execute() bool { key, quit := c.Queue.Get() if quit { return false } defer c.Queue.Done(key) err := c.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing Migration %v", key) c.Queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed Migration %v", key) c.Queue.Forget(key) } return true } func ensureSelectorLabelPresent(migration *virtv1.VirtualMachineInstanceMigration) { if migration.Labels == nil { migration.Labels = map[string]string{virtv1.MigrationSelectorLabel: migration.Spec.VMIName} } else if _, exist := migration.Labels[virtv1.MigrationSelectorLabel]; !exist { migration.Labels[virtv1.MigrationSelectorLabel] = migration.Spec.VMIName } } func (c *Controller) patchVMI(origVMI, newVMI *virtv1.VirtualMachineInstance) error { patchSet := patch.New() if !equality.Semantic.DeepEqual(origVMI.Status.MigrationState, newVMI.Status.MigrationState) { if origVMI.Status.MigrationState == nil { patchSet.AddOption(patch.WithAdd("/status/migrationState", newVMI.Status.MigrationState)) } else { patchSet.AddOption( patch.WithTest("/status/migrationState", origVMI.Status.MigrationState), patch.WithReplace("/status/migrationState", newVMI.Status.MigrationState), ) } } if !equality.Semantic.DeepEqual(origVMI.Labels, newVMI.Labels) { patchSet.AddOption( patch.WithTest("/metadata/labels", origVMI.Labels), patch.WithReplace("/metadata/labels", newVMI.Labels), ) } if !patchSet.IsEmpty() { patchBytes, err := patchSet.GeneratePayload() if err != nil { return err } if _, err = c.clientset.VirtualMachineInstance(origVMI.Namespace).Patch(context.Background(), origVMI.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}); err != nil { return err } } return nil } func (c *Controller) execute(key string) error { var vmi *virtv1.VirtualMachineInstance var targetPods []*k8sv1.Pod // Fetch the latest state from cache obj, exists, err := c.migrationIndexer.GetByKey(key) if err != nil { return err } if !exists { c.podExpectations.DeleteExpectations(key) c.removeHandOffKey(key) return nil } migration := obj.(*virtv1.VirtualMachineInstanceMigration) logger := log.Log.Object(migration) // this must be first step in execution. Writing the object // when api version changes ensures our api stored version is updated. if !controller.ObservedLatestApiVersionAnnotation(migration) { migration := migration.DeepCopy() controller.SetLatestApiVersionAnnotation(migration) // Ensure the migration contains our selector label ensureSelectorLabelPresent(migration) _, err = c.clientset.VirtualMachineInstanceMigration(migration.Namespace).Update(context.Background(), migration, metav1.UpdateOptions{}) return err } vmiObj, vmiExists, err := c.vmiStore.GetByKey(fmt.Sprintf("%s/%s", migration.Namespace, migration.Spec.VMIName)) if err != nil { return err } if !vmiExists { if migration.DeletionTimestamp == nil { logger.V(3).Infof("Deleting migration for deleted vmi %s/%s", migration.Namespace, migration.Spec.VMIName) return c.clientset.VirtualMachineInstanceMigration(migration.Namespace).Delete(context.Background(), migration.Name, v1.DeleteOptions{}) } // nothing to process for a migration that has no VMI return nil } vmi = vmiObj.(*virtv1.VirtualMachineInstance) targetPods, err = c.listMatchingTargetPods(migration, vmi) if err != nil { return err } needsSync := c.podExpectations.SatisfiedExpectations(key) && c.pvcExpectations.SatisfiedExpectations(key) logger.V(4).Infof("processing migration: needsSync %t, hasVMI %t, targetPod len %d", needsSync, vmiExists, len(targetPods)) var syncErr error if needsSync { syncErr = c.sync(key, migration, vmi, targetPods) } err = c.updateStatus(migration, vmi, targetPods, syncErr) if err != nil { return err } if syncErr != nil { return syncErr } if migration.IsFinal() { err = c.garbageCollectFinalizedMigrations(vmi) if err != nil { return err } } return nil } func (c *Controller) canMigrateVMI(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) (bool, error) { if vmi.Status.MigrationState == nil { return true, nil } else if vmi.Status.MigrationState.MigrationUID == migration.UID { return true, nil } else if vmi.Status.MigrationState.MigrationUID == "" { return true, nil } curMigrationUID := vmi.Status.MigrationState.MigrationUID // check to see if the curMigrationUID still exists or is finalized objs, err := c.migrationIndexer.ByIndex(cache.NamespaceIndex, migration.Namespace) if err != nil { return false, err } for _, obj := range objs { curMigration := obj.(*virtv1.VirtualMachineInstanceMigration) if curMigration.UID != curMigrationUID { continue } if curMigration.IsFinal() { // If the other job already completed, it's okay to take over the migration. return true, nil } return false, nil } return true, nil } func (c *Controller) failMigration(migration *virtv1.VirtualMachineInstanceMigration) error { err := backendstorage.MigrationAbort(c.clientset, migration) if err != nil { return err } migration.Status.Phase = virtv1.MigrationFailed return nil } func (c *Controller) interruptMigration(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil || !backendstorage.IsBackendStorageNeededForVMI(&vmi.Spec) { return c.failMigration(migration) } return backendstorage.RecoverFromBrokenMigration(c.clientset, migration, c.pvcStore, vmi, c.templateService.GetLauncherImage()) } func (c *Controller) updateStatus(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pods []*k8sv1.Pod, syncError error) error { var pod *k8sv1.Pod = nil var attachmentPod *k8sv1.Pod = nil conditionManager := controller.NewVirtualMachineInstanceMigrationConditionManager() migrationCopy := migration.DeepCopy() podExists, attachmentPodExists := len(pods) > 0, false if podExists { pod = pods[0] if attachmentPods, err := controller.AttachmentPods(pod, c.podIndexer); err != nil { return fmt.Errorf(failedGetAttractionPodsFmt, err) } else { attachmentPodExists = len(attachmentPods) > 0 if attachmentPodExists { attachmentPod = attachmentPods[0] } } } // Status checking of active Migration job. // // - Fail if any obvious failure is found // - Interrupt if something unexpectedly disappeared // - Begin progressing migration state based on VMI's MigrationState status. if migration.IsFinal() { if vmi.Status.MigrationState != nil && migration.UID == vmi.Status.MigrationState.MigrationUID { // Store the finalized migration state data from the VMI status in the migration object migrationCopy.Status.MigrationState = vmi.Status.MigrationState } // Remove the finalizer and conditions if the migration has already completed controller.RemoveFinalizer(migrationCopy, virtv1.VirtualMachineInstanceMigrationFinalizer) } else if vmi == nil { err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration failed because vmi does not exist.") log.Log.Object(migration).Error("vmi does not exist") } else if vmi.IsFinal() { err := c.interruptMigration(migrationCopy, vmi) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration failed vmi shutdown during migration.") log.Log.Object(migration).Error("Unable to migrate vmi because vmi is shutdown.") } else if migration.DeletionTimestamp != nil && !c.isMigrationHandedOff(migration, vmi) { c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration failed due to being canceled") if !conditionManager.HasCondition(migration, virtv1.VirtualMachineInstanceMigrationAbortRequested) { condition := virtv1.VirtualMachineInstanceMigrationCondition{ Type: virtv1.VirtualMachineInstanceMigrationAbortRequested, Status: k8sv1.ConditionTrue, LastProbeTime: v1.Now(), } migrationCopy.Status.Conditions = append(migrationCopy.Status.Conditions, condition) } err := c.failMigration(migrationCopy) if err != nil { return err } } else if podExists && controller.PodIsDown(pod) { err := c.interruptMigration(migrationCopy, vmi) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration failed because target pod shutdown during migration") log.Log.Object(migration).Errorf("target pod %s/%s shutdown during migration", pod.Namespace, pod.Name) } else if migration.TargetIsCreated() && !podExists { err := c.interruptMigration(migrationCopy, vmi) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration target pod was removed during active migration.") log.Log.Object(migration).Error("target pod disappeared during migration") } else if migration.TargetIsHandedOff() && vmi.Status.MigrationState == nil { err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "VMI's migration state was cleared during the active migration.") log.Log.Object(migration).Error("vmi migration state cleared during migration") } else if migration.TargetIsHandedOff() && vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID != migration.UID { err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "VMI's migration state was taken over by another migration job during active migration.") log.Log.Object(migration).Error("vmi's migration state was taken over by another migration object") } else if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == migration.UID && vmi.Status.MigrationState.Failed { err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "source node reported migration failed") log.Log.Object(migration).Errorf("VMI %s/%s reported migration failed", vmi.Namespace, vmi.Name) } else if migration.DeletionTimestamp != nil && !migration.IsFinal() && !conditionManager.HasCondition(migration, virtv1.VirtualMachineInstanceMigrationAbortRequested) { condition := virtv1.VirtualMachineInstanceMigrationCondition{ Type: virtv1.VirtualMachineInstanceMigrationAbortRequested, Status: k8sv1.ConditionTrue, LastProbeTime: v1.Now(), } migrationCopy.Status.Conditions = append(migrationCopy.Status.Conditions, condition) } else if attachmentPodExists && controller.PodIsDown(attachmentPod) { err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "Migration failed because target attachment pod shutdown during migration") log.Log.Object(migration).Errorf("target attachment pod %s/%s shutdown during migration", attachmentPod.Namespace, attachmentPod.Name) } else { err := c.processMigrationPhase(migration, migrationCopy, pod, attachmentPod, vmi, syncError) if err != nil { return err } } if migrationCopy.Status.Phase == virtv1.MigrationFailed { if err := descheduler.MarkSourcePodEvictionCompleted(c.clientset, migrationCopy, c.podIndexer); err != nil { return err } } controller.SetVMIMigrationPhaseTransitionTimestamp(migration, migrationCopy) controller.SetSourcePod(migrationCopy, vmi, c.podIndexer) if !equality.Semantic.DeepEqual(migration.Status, migrationCopy.Status) { var err error migration, err = c.clientset.VirtualMachineInstanceMigration(migrationCopy.Namespace).UpdateStatus(context.Background(), migrationCopy, v1.UpdateOptions{}) if err != nil { return err } } if !equality.Semantic.DeepEqual(migration.Finalizers, migrationCopy.Finalizers) { _, err := c.clientset.VirtualMachineInstanceMigration(migrationCopy.Namespace).Update(context.Background(), migrationCopy, metav1.UpdateOptions{}) if err != nil { return err } } return nil } func (c *Controller) processMigrationPhase( migration, migrationCopy *virtv1.VirtualMachineInstanceMigration, pod, attachmentPod *k8sv1.Pod, vmi *virtv1.VirtualMachineInstance, syncError error, ) error { conditionManager := controller.NewVirtualMachineInstanceMigrationConditionManager() vmiConditionManager := controller.NewVirtualMachineInstanceConditionManager() switch migration.Status.Phase { case virtv1.MigrationPhaseUnset: canMigrate, err := c.canMigrateVMI(migration, vmi) if err != nil { return err } if canMigrate { migrationCopy.Status.Phase = virtv1.MigrationPending } else { // can not migrate because there is an active migration already // in progress for this VMI. err := c.failMigration(migrationCopy) if err != nil { return err } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedMigrationReason, "VMI is not eligible for migration because another migration job is in progress.") log.Log.Object(migration).Error("Migration object ont eligible for migration because another job is in progress") } case virtv1.MigrationPending: if pod != nil { if controller.VMIHasHotplugVolumes(vmi) { if attachmentPod != nil { migrationCopy.Status.Phase = virtv1.MigrationScheduling } } else { migrationCopy.Status.Phase = virtv1.MigrationScheduling } } else if syncError != nil && strings.Contains(syncError.Error(), "exceeded quota") && !conditionManager.HasCondition(migration, virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota) { condition := virtv1.VirtualMachineInstanceMigrationCondition{ Type: virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota, Status: k8sv1.ConditionTrue, LastProbeTime: v1.Now(), } migrationCopy.Status.Conditions = append(migrationCopy.Status.Conditions, condition) } case virtv1.MigrationScheduling: if conditionManager.HasCondition(migrationCopy, virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota) { conditionManager.RemoveCondition(migrationCopy, virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota) } if controller.IsPodReady(pod) { if controller.VMIHasHotplugVolumes(vmi) { if attachmentPod != nil && controller.IsPodReady(attachmentPod) { log.Log.Object(migration).Infof("Attachment pod %s for vmi %s/%s is ready", attachmentPod.Name, vmi.Namespace, vmi.Name) migrationCopy.Status.Phase = virtv1.MigrationScheduled } } else { migrationCopy.Status.Phase = virtv1.MigrationScheduled } } case virtv1.MigrationScheduled: if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == migration.UID && vmi.Status.MigrationState.TargetNode != "" { migrationCopy.Status.Phase = virtv1.MigrationPreparingTarget } case virtv1.MigrationPreparingTarget: if vmi.Status.MigrationState.TargetNode != "" && vmi.Status.MigrationState.TargetNodeAddress != "" { migrationCopy.Status.Phase = virtv1.MigrationTargetReady } case virtv1.MigrationTargetReady: if vmi.Status.MigrationState.StartTimestamp != nil { migrationCopy.Status.Phase = virtv1.MigrationRunning } case virtv1.MigrationRunning: _, exists := pod.Annotations[virtv1.MigrationTargetReadyTimestamp] if !exists && vmi.Status.MigrationState.TargetNodeDomainReadyTimestamp != nil { if backendstorage.IsBackendStorageNeededForVMI(&vmi.Spec) { err := backendstorage.MigrationHandoff(c.clientset, c.pvcStore, migration) if err != nil { return err } } patchBytes, err := patch.New( patch.WithAdd(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(virtv1.MigrationTargetReadyTimestamp)), vmi.Status.MigrationState.TargetNodeDomainReadyTimestamp.String()), ).GeneratePayload() if err != nil { return err } if _, err = c.clientset.CoreV1().Pods(pod.Namespace).Patch(context.Background(), pod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}); err != nil { return err } } if vmi.Status.MigrationState.Completed && !vmiConditionManager.HasCondition(vmi, virtv1.VirtualMachineInstanceVCPUChange) && !vmiConditionManager.HasConditionWithStatus(vmi, virtv1.VirtualMachineInstanceMemoryChange, k8sv1.ConditionTrue) { migrationCopy.Status.Phase = virtv1.MigrationSucceeded c.recorder.Eventf(migration, k8sv1.EventTypeNormal, controller.SuccessfulMigrationReason, "Source node reported migration succeeded") log.Log.Object(migration).Infof("VMI reported migration succeeded.") } } return nil } func setTargetPodSELinuxLevel(pod *k8sv1.Pod, vmiSeContext string) error { // The target pod may share resources with the sources pod (RWX disks for example) // Therefore, it needs to share the same SELinux categories to inherit the same permissions // Note: there is a small probablility that the target pod will share the same categories as another pod on its node. // It is a slight security concern, but not as bad as removing categories on all shared objects for the duration of the migration. if vmiSeContext == "none" { // The SelinuxContext is explicitly set to "none" when SELinux is not present return nil } if vmiSeContext == "" { return fmt.Errorf("SELinux context not set on VMI status") } else { seContext, err := selinux.NewContext(vmiSeContext) if err != nil { return err } level, exists := seContext["level"] if exists && level != "" { // The SELinux context looks like "system_u:object_r:container_file_t:s0:c1,c2", we care about "s0:c1,c2" if pod.Spec.SecurityContext == nil { pod.Spec.SecurityContext = &k8sv1.PodSecurityContext{} } pod.Spec.SecurityContext.SELinuxOptions = &k8sv1.SELinuxOptions{ Level: level, } } } return nil } func (c *Controller) createTargetPod(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, sourcePod *k8sv1.Pod) error { if !c.pvcExpectations.SatisfiedExpectations(controller.MigrationKey(migration)) { // Give time to the PVC informer to update itself return nil } templatePod, err := c.templateService.RenderMigrationManifest(vmi, migration, sourcePod) if err != nil { return fmt.Errorf("failed to render launch manifest: %v", err) } antiAffinityTerm := k8sv1.PodAffinityTerm{ LabelSelector: &v1.LabelSelector{ MatchLabels: map[string]string{ virtv1.CreatedByLabel: string(vmi.UID), }, }, TopologyKey: k8sv1.LabelHostname, } antiAffinityRule := &k8sv1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []k8sv1.PodAffinityTerm{antiAffinityTerm}, } if templatePod.Spec.Affinity == nil { templatePod.Spec.Affinity = &k8sv1.Affinity{ PodAntiAffinity: antiAffinityRule, } } else if templatePod.Spec.Affinity.PodAntiAffinity == nil { templatePod.Spec.Affinity.PodAntiAffinity = antiAffinityRule } else { templatePod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(templatePod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, antiAffinityTerm) } templatePod.ObjectMeta.Labels[virtv1.MigrationJobLabel] = string(migration.UID) templatePod.ObjectMeta.Annotations[virtv1.MigrationJobNameAnnotation] = migration.Name // If cpu model is "host model" allow migration only to nodes that supports this cpu model if cpu := vmi.Spec.Domain.CPU; cpu != nil && cpu.Model == virtv1.CPUModeHostModel { node, err := c.getNodeForVMI(vmi) if err != nil { return err } err = prepareNodeSelectorForHostCpuModel(node, templatePod, sourcePod) if err != nil { return err } } matchLevelOnTarget := c.clusterConfig.GetMigrationConfiguration().MatchSELinuxLevelOnMigration if matchLevelOnTarget == nil || *matchLevelOnTarget { err = setTargetPodSELinuxLevel(templatePod, vmi.Status.SelinuxContext) if err != nil { return err } } // This is used by the functional test to simulate failures computeImageOverride, ok := migration.Annotations[virtv1.FuncTestMigrationTargetImageOverrideAnnotation] if ok && computeImageOverride != "" { for i, container := range templatePod.Spec.Containers { if container.Name == "compute" { container.Image = computeImageOverride templatePod.Spec.Containers[i] = container break } } } key := controller.MigrationKey(migration) c.podExpectations.ExpectCreations(key, 1) pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), templatePod, v1.CreateOptions{}) if err != nil { if k8serrors.IsForbidden(err) && strings.Contains(err.Error(), "violates PodSecurity") { err = fmt.Errorf("failed to create target pod for vmi %s/%s, it needs a privileged namespace to run: %w", vmi.GetNamespace(), vmi.GetName(), err) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, services.FailedToRenderLaunchManifestErrFormat, err) } else { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, "Error creating pod: %v", err) err = fmt.Errorf("failed to create vmi migration target pod: %v", err) } c.podExpectations.CreationObserved(key) return err } log.Log.Object(vmi).Infof("Created migration target pod %s/%s with uuid %s for migration %s with uuid %s", pod.Namespace, pod.Name, string(pod.UID), migration.Name, string(migration.UID)) c.recorder.Eventf(migration, k8sv1.EventTypeNormal, controller.SuccessfulCreatePodReason, "Created migration target pod %s", pod.Name) return nil } func (c *Controller) expandPDB(pdb *policyv1.PodDisruptionBudget, vmi *virtv1.VirtualMachineInstance, vmim *virtv1.VirtualMachineInstanceMigration) error { minAvailable := 2 if pdb.Spec.MinAvailable != nil && pdb.Spec.MinAvailable.IntValue() == minAvailable && pdb.Labels[virtv1.MigrationNameLabel] == vmim.Name { log.Log.V(4).Object(vmi).Infof("PDB has been already expanded") return nil } patchBytes := []byte(fmt.Sprintf(`{"spec":{"minAvailable": %d},"metadata":{"labels":{"%s": "%s"}}}`, minAvailable, virtv1.MigrationNameLabel, vmim.Name)) _, err := c.clientset.PolicyV1().PodDisruptionBudgets(pdb.Namespace).Patch(context.Background(), pdb.Name, types.StrategicMergePatchType, patchBytes, v1.PatchOptions{}) if err != nil { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, failedUpdatePodDisruptionBudgetReason, "Error expanding the PodDisruptionBudget %s: %v", pdb.Name, err) return err } log.Log.Object(vmi).Infof("expanding pdb for VMI %s/%s to protect migration %s", vmi.Namespace, vmi.Name, vmim.Name) c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, successfulUpdatePodDisruptionBudgetReason, "Expanded PodDisruptionBudget %s", pdb.Name) return nil } // handleMigrationBackoff introduce a backoff (when needed) only for migrations // created by the evacuation controller. func (c *Controller) handleMigrationBackoff(key string, vmi *virtv1.VirtualMachineInstance, migration *virtv1.VirtualMachineInstanceMigration) error { if _, exists := migration.Annotations[virtv1.FuncTestForceIgnoreMigrationBackoffAnnotation]; exists { return nil } _, existsEvacMig := migration.Annotations[virtv1.EvacuationMigrationAnnotation] _, existsWorkUpdMig := migration.Annotations[virtv1.WorkloadUpdateMigrationAnnotation] if !existsEvacMig && !existsWorkUpdMig { return nil } migrations, err := c.listBackoffEligibleMigrations(vmi.Namespace, vmi.Name) if err != nil { return err } if len(migrations) < 2 { return nil } // Newest first sort.Sort(sort.Reverse(vmimCollection(migrations))) if migrations[0].UID != migration.UID { return nil } backoff := time.Second * 0 for _, m := range migrations[1:] { if m.Status.Phase == virtv1.MigrationSucceeded { break } if m.DeletionTimestamp != nil { continue } if m.Status.Phase == virtv1.MigrationFailed { if backoff == 0 { backoff = time.Second * 20 } else { backoff = backoff * 2 } } } if backoff == 0 { return nil } getFailedTS := func(migration *virtv1.VirtualMachineInstanceMigration) metav1.Time { for _, ts := range migration.Status.PhaseTransitionTimestamps { if ts.Phase == virtv1.MigrationFailed { return ts.PhaseTransitionTimestamp } } return metav1.Time{} } outOffBackoffTS := getFailedTS(migrations[1]).Add(backoff) backoff = outOffBackoffTS.Sub(time.Now()) if backoff > 0 { log.Log.Object(vmi).Errorf("vmi in migration backoff, re-enqueueing after %v", backoff) c.Queue.AddAfter(key, backoff) return migrationBackoffError } return nil } func (c *Controller) handleMarkMigrationFailedOnVMI(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) error { // Mark Migration Done on VMI if virt handler never started it. // Once virt-handler starts the migration, it's up to handler // to finalize it. vmiCopy := vmi.DeepCopy() now := v1.NewTime(time.Now()) vmiCopy.Status.MigrationState.StartTimestamp = &now vmiCopy.Status.MigrationState.EndTimestamp = &now vmiCopy.Status.MigrationState.Failed = true vmiCopy.Status.MigrationState.Completed = true err := c.patchVMI(vmi, vmiCopy) if err != nil { log.Log.Reason(err).Object(vmi).Errorf("Failed to patch VMI status to indicate migration %s/%s failed.", migration.Namespace, migration.Name) return err } log.Log.Object(vmi).Infof("Marked Migration %s/%s failed on vmi due to target pod disappearing before migration kicked off.", migration.Namespace, migration.Name) failureReason := "Target pod is down" c.recorder.Event(vmi, k8sv1.EventTypeWarning, controller.FailedMigrationReason, fmt.Sprintf("VirtualMachineInstance migration uid %s failed. reason: %s", string(migration.UID), failureReason)) if vmiCopy.Status.MigrationState.FailureReason == "" { // Only set the failure reason if empty, as virt-handler may already have provided a better one vmiCopy.Status.MigrationState.FailureReason = failureReason } return nil } func (c *Controller) handlePreHandoffMigrationCancel(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { if pod == nil { return nil } c.podExpectations.ExpectDeletions(controller.MigrationKey(migration), []string{controller.PodKey(pod)}) err := c.clientset.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) if err != nil { c.podExpectations.DeletionObserved(controller.MigrationKey(migration), controller.PodKey(pod)) c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedDeletePodReason, "Error deleting canceled migration target pod: %v", err) return fmt.Errorf("cannot delete pending target pod %s/%s for migration although migration is aborted", pod.Name, pod.Namespace) } reason := fmt.Sprintf("migration canceled and pod %s/%s is deleted", pod.Namespace, pod.Name) log.Log.Object(vmi).Infof("Deleted pending migration target pod with uuid %s for migration %s with uuid %s with reason [%s]", string(pod.UID), migration.Name, string(migration.UID), reason) c.recorder.Event(migration, k8sv1.EventTypeNormal, controller.SuccessfulDeletePodReason, reason) return nil } func (c *Controller) handleTargetPodHandoff(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == migration.UID { // already handed off return nil } vmiCopy := vmi.DeepCopy() vmiCopy.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ MigrationUID: migration.UID, TargetNode: pod.Spec.NodeName, SourceNode: vmi.Status.NodeName, TargetPod: pod.Name, } if migration.Status.MigrationState != nil { vmiCopy.Status.MigrationState.SourcePod = migration.Status.MigrationState.SourcePod vmiCopy.Status.MigrationState.SourcePersistentStatePVCName = migration.Status.MigrationState.SourcePersistentStatePVCName vmiCopy.Status.MigrationState.TargetPersistentStatePVCName = migration.Status.MigrationState.TargetPersistentStatePVCName } // By setting this label, virt-handler on the target node will receive // the vmi and prepare the local environment for the migration vmiCopy.ObjectMeta.Labels[virtv1.MigrationTargetNodeNameLabel] = pod.Spec.NodeName if controller.VMIHasHotplugVolumes(vmiCopy) { attachmentPods, err := controller.AttachmentPods(pod, c.podIndexer) if err != nil { return fmt.Errorf(failedGetAttractionPodsFmt, err) } if len(attachmentPods) > 0 { log.Log.Object(migration).Infof("Target attachment pod for vmi %s/%s: %s", vmiCopy.Namespace, vmiCopy.Name, string(attachmentPods[0].UID)) vmiCopy.Status.MigrationState.TargetAttachmentPodUID = attachmentPods[0].UID } else { return fmt.Errorf("target attachment pod not found") } } clusterMigrationConfigs := c.clusterConfig.GetMigrationConfiguration().DeepCopy() err := c.matchMigrationPolicy(vmiCopy, clusterMigrationConfigs) if err != nil { return fmt.Errorf("failed to match migration policy: %v", err) } if !c.isMigrationPolicyMatched(vmiCopy) { vmiCopy.Status.MigrationState.MigrationConfiguration = clusterMigrationConfigs } if controller.VMIHasHotplugCPU(vmi) && vmi.IsCPUDedicated() { cpuLimitsCount, err := getTargetPodLimitsCount(pod) if err != nil { return err } vmiCopy.ObjectMeta.Labels[virtv1.VirtualMachinePodCPULimitsLabel] = strconv.Itoa(int(cpuLimitsCount)) } if controller.VMIHasHotplugMemory(vmi) { memoryReq, err := getTargetPodMemoryRequests(pod) if err != nil { return err } vmiCopy.ObjectMeta.Labels[virtv1.VirtualMachinePodMemoryRequestsLabel] = memoryReq } if backendStoragePVC := backendstorage.PVCForMigrationTarget(c.pvcStore, migration); backendStoragePVC != nil { bs := backendstorage.NewBackendStorage(c.clientset, c.clusterConfig, c.storageClassStore, c.storageProfileStore, c.pvcStore) bs.UpdateVolumeStatus(vmiCopy, backendStoragePVC) } err = c.patchVMI(vmi, vmiCopy) if err != nil { c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedHandOverPodReason, fmt.Sprintf("Failed to set MigrationStat in VMI status. :%v", err)) return err } c.addHandOffKey(controller.MigrationKey(migration)) log.Log.Object(vmi).Infof("Handed off migration %s/%s to target virt-handler.", migration.Namespace, migration.Name) c.recorder.Eventf(migration, k8sv1.EventTypeNormal, controller.SuccessfulHandOverPodReason, "Migration target pod is ready for preparation by virt-handler.") return nil } func (c *Controller) markMigrationAbortInVmiStatus(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) error { if vmi.Status.MigrationState == nil { return fmt.Errorf("migration state is nil when trying to mark migratio abortion in vmi status") } vmiCopy := vmi.DeepCopy() vmiCopy.Status.MigrationState.AbortRequested = true if !equality.Semantic.DeepEqual(vmi.Status, vmiCopy.Status) { newStatus := vmiCopy.Status oldStatus := vmi.Status patchBytes, err := patch.New( patch.WithTest("/status", oldStatus), patch.WithReplace("/status", newStatus), ).GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err != nil { msg := fmt.Sprintf("failed to set MigrationState in VMI status. :%v", err) c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedAbortMigrationReason, msg) return fmt.Errorf(msg) } log.Log.Object(vmi).Infof("Signaled migration %s/%s to be aborted.", migration.Namespace, migration.Name) c.recorder.Eventf(migration, k8sv1.EventTypeNormal, controller.SuccessfulAbortMigrationReason, "Migration is ready to be canceled by virt-handler.") } return nil } func isMigrationProtected(pdb *policyv1.PodDisruptionBudget) bool { return pdb.Status.DesiredHealthy == 2 && pdb.Generation == pdb.Status.ObservedGeneration } func filterOutOldPDBs(pdbList []*policyv1.PodDisruptionBudget) []*policyv1.PodDisruptionBudget { var filteredPdbs []*policyv1.PodDisruptionBudget for i := range pdbList { if !pdbs.IsPDBFromOldMigrationController(pdbList[i]) { filteredPdbs = append(filteredPdbs, pdbList[i]) } } return filteredPdbs } func (c *Controller) handleTargetPodCreation(key string, migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, sourcePod *k8sv1.Pod) error { c.migrationStartLock.Lock() defer c.migrationStartLock.Unlock() // Don't start new migrations if we wait for cache updates on migration target pods if c.podExpectations.AllPendingCreations() > 0 { c.Queue.AddAfter(key, 1*time.Second) return nil } else if controller.VMIActivePodsCount(vmi, c.podIndexer) > 1 { log.Log.Object(migration).Infof("Waiting to schedule target pod for migration because there are already multiple pods running for vmi %s/%s", vmi.Namespace, vmi.Name) c.Queue.AddAfter(key, 1*time.Second) return nil } // Don't start new migrations if we wait for migration object updates because of new target pods runningMigrations, err := c.findRunningMigrations() if err != nil { return fmt.Errorf("failed to determin the number of running migrations: %v", err) } // XXX: Make this configurable, think about limit per node, bandwidth per migration, and so on. if len(runningMigrations) >= int(*c.clusterConfig.GetMigrationConfiguration().ParallelMigrationsPerCluster) { log.Log.Object(migration).Infof("Waiting to schedule target pod for vmi [%s/%s] migration because total running parallel migration count [%d] is currently at the global cluster limit.", vmi.Namespace, vmi.Name, len(runningMigrations)) // Let's wait until some migrations are done c.Queue.AddAfter(key, time.Second*5) return nil } outboundMigrations, err := c.outboundMigrationsOnNode(vmi.Status.NodeName, runningMigrations) if err != nil { return err } if outboundMigrations >= int(*c.clusterConfig.GetMigrationConfiguration().ParallelOutboundMigrationsPerNode) { // Let's ensure that we only have two outbound migrations per node // XXX: Make this configurable, thinkg about inbound migration limit, bandwidh per migration, and so on. log.Log.Object(migration).Infof("Waiting to schedule target pod for vmi [%s/%s] migration because total running parallel outbound migrations on target node [%d] has hit outbound migrations per node limit.", vmi.Namespace, vmi.Name, outboundMigrations) c.Queue.AddAfter(key, time.Second*5) return nil } // migration was accepted into the system, now see if we // should create the target pod if vmi.IsRunning() { if migrations.VMIMigratableOnEviction(c.clusterConfig, vmi) { pdbs, err := pdbs.PDBsForVMI(vmi, c.pdbIndexer) if err != nil { return err } // removes pdbs from old implementation from list. pdbs = filterOutOldPDBs(pdbs) if len(pdbs) < 1 { log.Log.Object(vmi).Errorf("Found no PDB protecting the vmi") return fmt.Errorf("Found no PDB protecting the vmi %s", vmi.Name) } pdb := pdbs[0] if err := c.expandPDB(pdb, vmi, migration); err != nil { return err } // before proceeding we have to check that the k8s pdb controller has processed // the pdb expansion and is actually protecting the VMI migration if !isMigrationProtected(pdb) { log.Log.V(4).Object(migration).Infof("Waiting for the pdb-controller to protect the migration pods, postponing migration start") return nil } } err = c.handleBackendStorage(migration, vmi) if err != nil { return err } return c.createTargetPod(migration, vmi, sourcePod) } return nil } func (c *Controller) handleBackendStorage(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) error { if !backendstorage.IsBackendStorageNeededForVMI(&vmi.Spec) { return nil } if migration.Status.MigrationState == nil { migration.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{} } migration.Status.MigrationState.SourcePersistentStatePVCName = backendstorage.CurrentPVCName(vmi) if migration.Status.MigrationState.SourcePersistentStatePVCName == "" { return fmt.Errorf("no backend-storage PVC found in VMI volume status") } pvc := backendstorage.PVCForMigrationTarget(c.pvcStore, migration) if pvc != nil { migration.Status.MigrationState.TargetPersistentStatePVCName = pvc.Name } if migration.Status.MigrationState.TargetPersistentStatePVCName != "" { // backend storage pvc has already been created or has ReadWriteMany access-mode return nil } bs := backendstorage.NewBackendStorage(c.clientset, c.clusterConfig, c.storageClassStore, c.storageProfileStore, c.pvcStore) vmiKey, err := controller.KeyFunc(vmi) if err != nil { return err } c.pvcExpectations.ExpectCreations(vmiKey, 1) backendStoragePVC, err := bs.CreatePVCForMigrationTarget(vmi, migration.Name) if err != nil { c.pvcExpectations.CreationObserved(vmiKey) return err } migration.Status.MigrationState.TargetPersistentStatePVCName = backendStoragePVC.Name if migration.Status.MigrationState.SourcePersistentStatePVCName == migration.Status.MigrationState.TargetPersistentStatePVCName { // The PVC is shared between source and target, satisfy the expectation since the creation will never happen c.pvcExpectations.CreationObserved(vmiKey) } return nil } func (c *Controller) createAttachmentPod(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod) error { sourcePod, err := controller.CurrentVMIPod(vmi, c.podIndexer) if err != nil { return fmt.Errorf("failed to get current VMI pod: %v", err) } volumes := controller.GetHotplugVolumes(vmi, sourcePod) volumeNamesPVCMap, err := storagetypes.VirtVolumesToPVCMap(volumes, c.pvcStore, virtLauncherPod.Namespace) if err != nil { return fmt.Errorf("failed to get PVC map: %v", err) } // Reset the hotplug volume statuses to enforce mount vmiCopy := vmi.DeepCopy() vmiCopy.Status.VolumeStatus = []virtv1.VolumeStatus{} attachmentPodTemplate, err := c.templateService.RenderHotplugAttachmentPodTemplate(volumes, virtLauncherPod, vmiCopy, volumeNamesPVCMap) if err != nil { return fmt.Errorf("failed to render attachment pod template: %v", err) } if attachmentPodTemplate.ObjectMeta.Labels == nil { attachmentPodTemplate.ObjectMeta.Labels = make(map[string]string) } if attachmentPodTemplate.ObjectMeta.Annotations == nil { attachmentPodTemplate.ObjectMeta.Annotations = make(map[string]string) } attachmentPodTemplate.ObjectMeta.Labels[virtv1.MigrationJobLabel] = string(migration.UID) attachmentPodTemplate.ObjectMeta.Annotations[virtv1.MigrationJobNameAnnotation] = migration.Name key := controller.MigrationKey(migration) c.podExpectations.ExpectCreations(key, 1) attachmentPod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), attachmentPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(key) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, "Error creating attachment pod: %v", err) return fmt.Errorf("failed to create attachment pod: %v", err) } c.recorder.Eventf(migration, k8sv1.EventTypeNormal, controller.SuccessfulCreatePodReason, "Created attachment pod %s", attachmentPod.Name) return nil } func isPodPendingUnschedulable(pod *k8sv1.Pod) bool { if pod.Status.Phase != k8sv1.PodPending || pod.DeletionTimestamp != nil { return false } for _, condition := range pod.Status.Conditions { if condition.Type == k8sv1.PodScheduled && condition.Status == k8sv1.ConditionFalse && condition.Reason == k8sv1.PodReasonUnschedulable { return true } } return false } func timeSinceCreationSeconds(objectMeta *metav1.ObjectMeta) int64 { now := time.Now().UTC().Unix() creationTime := objectMeta.CreationTimestamp.Time.UTC().Unix() seconds := now - creationTime if seconds < 0 { seconds = 0 } return seconds } func (c *Controller) deleteTimedOutTargetPod(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod, message string) error { migrationKey, err := controller.KeyFunc(migration) if err != nil { return err } c.podExpectations.ExpectDeletions(migrationKey, []string{controller.PodKey(pod)}) err = c.clientset.CoreV1().Pods(vmi.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) if err != nil { c.podExpectations.DeletionObserved(migrationKey, controller.PodKey(pod)) c.recorder.Eventf(migration, k8sv1.EventTypeWarning, controller.FailedDeletePodReason, "Error deleted migration target pod: %v", err) return fmt.Errorf("failed to delete vmi migration target pod that reached pending pod timeout period.: %v", err) } log.Log.Object(vmi).Infof("Deleted pending migration target pod with uuid %s for migration %s with uuid %s with reason [%s]", string(pod.UID), migration.Name, string(migration.UID), message) c.recorder.Event(migration, k8sv1.EventTypeNormal, controller.SuccessfulDeletePodReason, message) return nil } func (c *Controller) getUnschedulablePendingTimeoutSeconds(migration *virtv1.VirtualMachineInstanceMigration) int64 { timeout := c.unschedulablePendingTimeoutSeconds customTimeoutStr, ok := migration.Annotations[virtv1.MigrationUnschedulablePodTimeoutSecondsAnnotation] if !ok { return timeout } newTimeout, err := strconv.Atoi(customTimeoutStr) if err != nil { log.Log.Object(migration).Reason(err).Errorf("Unable to parse unschedulable pending timeout value for migration") return timeout } return int64(newTimeout) } func (c *Controller) getCatchAllPendingTimeoutSeconds(migration *virtv1.VirtualMachineInstanceMigration) int64 { timeout := c.catchAllPendingTimeoutSeconds customTimeoutStr, ok := migration.Annotations[virtv1.MigrationPendingPodTimeoutSecondsAnnotation] if !ok { return timeout } newTimeout, err := strconv.Atoi(customTimeoutStr) if err != nil { log.Log.Object(migration).Reason(err).Errorf("Unable to parse catch all pending timeout value for migration") return timeout } return int64(newTimeout) } func (c *Controller) handlePendingPodTimeout(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { if pod.Status.Phase != k8sv1.PodPending || pod.DeletionTimestamp != nil || pod.CreationTimestamp.IsZero() { // only check if timeout has occurred if pod is pending and not already marked for deletion return nil } migrationKey, err := controller.KeyFunc(migration) if err != nil { return err } unschedulableTimeout := c.getUnschedulablePendingTimeoutSeconds(migration) catchAllTimeout := c.getCatchAllPendingTimeoutSeconds(migration) secondsSpentPending := timeSinceCreationSeconds(&pod.ObjectMeta) if isPodPendingUnschedulable(pod) { c.alertIfHostModelIsUnschedulable(vmi, pod) c.recorder.Eventf( migration, k8sv1.EventTypeWarning, controller.MigrationTargetPodUnschedulable, "Migration target pod for VMI [%s/%s] is currently unschedulable.", vmi.Namespace, vmi.Name) log.Log.Object(migration).Warningf("Migration target pod for VMI [%s/%s] is currently unschedulable.", vmi.Namespace, vmi.Name) if secondsSpentPending >= unschedulableTimeout { return c.deleteTimedOutTargetPod(migration, vmi, pod, fmt.Sprintf("unschedulable pod %s/%s timeout period exceeded", pod.Namespace, pod.Name)) } else { // Make sure we check this again after some time c.Queue.AddAfter(migrationKey, time.Second*time.Duration(unschedulableTimeout-secondsSpentPending)) } } if secondsSpentPending >= catchAllTimeout { return c.deleteTimedOutTargetPod(migration, vmi, pod, fmt.Sprintf("pending pod %s/%s timeout period exceeded", pod.Namespace, pod.Name)) } else { // Make sure we check this again after some time c.Queue.AddAfter(migrationKey, time.Second*time.Duration(catchAllTimeout-secondsSpentPending)) } return nil } func (c *Controller) sync(key string, migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance, pods []*k8sv1.Pod) error { var pod *k8sv1.Pod = nil targetPodExists := len(pods) > 0 if targetPodExists { pod = pods[0] } if vmiDeleted := vmi == nil || vmi.DeletionTimestamp != nil; vmiDeleted { return nil } if migrationFinalizedOnVMI := vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == migration.UID && vmi.Status.MigrationState.EndTimestamp != nil; migrationFinalizedOnVMI { return nil } canMigrate, err := c.canMigrateVMI(migration, vmi) if err != nil { return err } if !canMigrate { return fmt.Errorf("vmi is inelgible for migration because another migration job is running") } switch migration.Status.Phase { case virtv1.MigrationPending: if migration.DeletionTimestamp != nil { return c.handlePreHandoffMigrationCancel(migration, vmi, pod) } if err = c.handleMigrationBackoff(key, vmi, migration); errors.Is(err, migrationBackoffError) { warningMsg := fmt.Sprintf("backoff migrating vmi %s/%s", vmi.Namespace, vmi.Name) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, err.Error(), warningMsg) return nil } if !targetPodExists { sourcePod, err := controller.CurrentVMIPod(vmi, c.podIndexer) if err != nil { log.Log.Reason(err).Error("Failed to fetch pods for namespace from cache.") return err } if !controller.PodExists(sourcePod) { // for instance sudden deletes can cause this. In this // case we don't have to do anything in the creation flow anymore. // Once the VMI is in a final state or deleted the migration // will be marked as failed too. return nil } if _, exists := migration.GetAnnotations()[virtv1.EvacuationMigrationAnnotation]; exists { if err = descheduler.MarkEvictionInProgress(c.clientset, sourcePod); err != nil { return err } } // patch VMI annotations and set RuntimeUser in preparation for target pod creation patches := c.setupVMIRuntimeUser(vmi) if !patches.IsEmpty() { patchBytes, err := patches.GeneratePayload() if err != nil { return err } vmi, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err != nil { return fmt.Errorf("failed to set VMI RuntimeUser: %v", err) } } return c.handleTargetPodCreation(key, migration, vmi, sourcePod) } else if controller.IsPodReady(pod) { if controller.VMIHasHotplugVolumes(vmi) { attachmentPods, err := controller.AttachmentPods(pod, c.podIndexer) if err != nil { return fmt.Errorf(failedGetAttractionPodsFmt, err) } if len(attachmentPods) == 0 { log.Log.Object(migration).Infof("Creating attachment pod for vmi %s/%s on node %s", vmi.Namespace, vmi.Name, pod.Spec.NodeName) return c.createAttachmentPod(migration, vmi, pod) } } } else { return c.handlePendingPodTimeout(migration, vmi, pod) } case virtv1.MigrationScheduling: if migration.DeletionTimestamp != nil { return c.handlePreHandoffMigrationCancel(migration, vmi, pod) } if targetPodExists { return c.handlePendingPodTimeout(migration, vmi, pod) } case virtv1.MigrationScheduled: if migration.DeletionTimestamp != nil && !c.isMigrationHandedOff(migration, vmi) { return c.handlePreHandoffMigrationCancel(migration, vmi, pod) } // once target pod is running, then alert the VMI of the migration by // setting the target and source nodes. This kicks off the preparation stage. if targetPodExists && controller.IsPodReady(pod) { return c.handleTargetPodHandoff(migration, vmi, pod) } case virtv1.MigrationPreparingTarget, virtv1.MigrationTargetReady, virtv1.MigrationFailed: if (!targetPodExists || controller.PodIsDown(pod)) && vmi.Status.MigrationState != nil && len(vmi.Status.MigrationState.TargetDirectMigrationNodePorts) == 0 && vmi.Status.MigrationState.StartTimestamp == nil && !vmi.Status.MigrationState.Failed && !vmi.Status.MigrationState.Completed { err = c.handleMarkMigrationFailedOnVMI(migration, vmi) if err != nil { return err } } if migration.Status.Phase != virtv1.MigrationFailed { return nil } return descheduler.MarkSourcePodEvictionCompleted(c.clientset, migration, c.podIndexer) case virtv1.MigrationRunning: if migration.DeletionTimestamp != nil && vmi.Status.MigrationState != nil { err = c.markMigrationAbortInVmiStatus(migration, vmi) if err != nil { return err } } } return nil } func (c *Controller) setupVMIRuntimeUser(vmi *virtv1.VirtualMachineInstance) *patch.PatchSet { patchSet := patch.New() if !c.clusterConfig.RootEnabled() { // The cluster is configured for non-root VMs, ensure the VMI is non-root. // If the VMI is root, the migration will be a root -> non-root migration. if vmi.Status.RuntimeUser != util.NonRootUID { patchSet.AddOption(patch.WithReplace("/status/runtimeUser", util.NonRootUID)) } // This is required in order to be able to update from v0.43-v0.51 to v0.52+ if vmi.Annotations == nil { patchSet.AddOption(patch.WithAdd("/metadata/annotations", map[string]string{virtv1.DeprecatedNonRootVMIAnnotation: "true"})) } else if _, ok := vmi.Annotations[virtv1.DeprecatedNonRootVMIAnnotation]; !ok { patchSet.AddOption(patch.WithAdd(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(virtv1.DeprecatedNonRootVMIAnnotation)), "true")) } } else { // The cluster is configured for root VMs, ensure the VMI is root. // If the VMI is non-root, the migration will be a non-root -> root migration. if vmi.Status.RuntimeUser != util.RootUser { patchSet.AddOption(patch.WithReplace("/status/runtimeUser", util.RootUser)) } if _, ok := vmi.Annotations[virtv1.DeprecatedNonRootVMIAnnotation]; ok { patchSet.AddOption(patch.WithRemove(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(virtv1.DeprecatedNonRootVMIAnnotation)))) } } return patchSet } func (c *Controller) listMatchingTargetPods(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) ([]*k8sv1.Pod, error) { selector, err := v1.LabelSelectorAsSelector(&v1.LabelSelector{ MatchLabels: map[string]string{ virtv1.CreatedByLabel: string(vmi.UID), virtv1.AppLabel: "virt-launcher", virtv1.MigrationJobLabel: string(migration.UID), }, }) if err != nil { return nil, err } objs, err := c.podIndexer.ByIndex(cache.NamespaceIndex, migration.Namespace) if err != nil { return nil, err } var pods []*k8sv1.Pod for _, obj := range objs { pod := obj.(*k8sv1.Pod) if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) { pods = append(pods, pod) } } return pods, nil } func (c *Controller) addMigration(obj interface{}) { c.enqueueMigration(obj) } func (c *Controller) deleteMigration(obj interface{}) { c.enqueueMigration(obj) } func (c *Controller) updateMigration(_, curr interface{}) { c.enqueueMigration(curr) } func (c *Controller) enqueueMigration(obj interface{}) { logger := log.Log migration := obj.(*virtv1.VirtualMachineInstanceMigration) key, err := controller.KeyFunc(migration) if err != nil { logger.Object(migration).Reason(err).Error("Failed to extract key from migration.") return } c.Queue.Add(key) } func (c *Controller) getControllerOf(pod *k8sv1.Pod) *v1.OwnerReference { t := true return &v1.OwnerReference{ Kind: virtv1.VirtualMachineInstanceMigrationGroupVersionKind.Kind, Name: pod.Annotations[virtv1.MigrationJobNameAnnotation], UID: types.UID(pod.Labels[virtv1.MigrationJobLabel]), Controller: &t, BlockOwnerDeletion: &t, } } // resolveControllerRef returns the controller referenced by a ControllerRef, // or nil if the ControllerRef could not be resolved to a matching controller // of the correct Kind. func (c *Controller) resolveControllerRef(namespace string, controllerRef *v1.OwnerReference) *virtv1.VirtualMachineInstanceMigration { // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it's the wrong Kind. if controllerRef.Kind != virtv1.VirtualMachineInstanceMigrationGroupVersionKind.Kind { return nil } migration, exists, err := c.migrationIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } if migration.(*virtv1.VirtualMachineInstanceMigration).UID != controllerRef.UID { // The controller we found with this Name is not the same one that the // ControllerRef points to. return nil } return migration.(*virtv1.VirtualMachineInstanceMigration) } // When a pod is created, enqueue the migration that manages it and update its podExpectations. func (c *Controller) addPod(obj interface{}) { pod := obj.(*k8sv1.Pod) if pod.DeletionTimestamp != nil { // on a restart of the controller manager, it's possible a new pod shows up in a state that // is already pending deletion. Prevent the pod from being a creation observation. c.deletePod(pod) return } controllerRef := c.getControllerOf(pod) migration := c.resolveControllerRef(pod.Namespace, controllerRef) if migration == nil { return } migrationKey, err := controller.KeyFunc(migration) if err != nil { return } log.Log.V(4).Object(pod).Infof("Pod created") c.podExpectations.CreationObserved(migrationKey) c.enqueueMigration(migration) } // When a pod is updated, figure out what migration manages it and wake them // up. If the labels of the pod have changed we need to awaken both the old // and new migration. old and cur must be *v1.Pod types. func (c *Controller) updatePod(old, cur interface{}) { curPod := cur.(*k8sv1.Pod) oldPod := old.(*k8sv1.Pod) if curPod.ResourceVersion == oldPod.ResourceVersion { // Periodic resync will send update events for all known pods. // Two different versions of the same pod will always have different RVs. return } labelChanged := !equality.Semantic.DeepEqual(curPod.Labels, oldPod.Labels) if curPod.DeletionTimestamp != nil { // having a pod marked for deletion is enough to count as a deletion expectation c.deletePod(curPod) if labelChanged { // we don't need to check the oldPod.DeletionTimestamp because DeletionTimestamp cannot be unset. c.deletePod(oldPod) } return } curControllerRef := c.getControllerOf(curPod) oldControllerRef := c.getControllerOf(oldPod) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged && oldControllerRef != nil { // The ControllerRef was changed. Sync the old controller, if any. if migration := c.resolveControllerRef(oldPod.Namespace, oldControllerRef); migration != nil { c.enqueueMigration(migration) } } migration := c.resolveControllerRef(curPod.Namespace, curControllerRef) if migration == nil { return } log.Log.V(4).Object(curPod).Infof("Pod updated") c.enqueueMigration(migration) return } // When a resourceQuota is updated, figure out if there are pending migration in the namespace // if there are we should push them into the queue to accelerate the target creation process func (c *Controller) updateResourceQuota(_, cur interface{}) { curResourceQuota := cur.(*k8sv1.ResourceQuota) log.Log.V(4).Object(curResourceQuota).Infof("ResourceQuota updated") objs, _ := c.migrationIndexer.ByIndex(cache.NamespaceIndex, curResourceQuota.Namespace) for _, obj := range objs { migration := obj.(*virtv1.VirtualMachineInstanceMigration) if migration.Status.Conditions == nil { continue } for _, cond := range migration.Status.Conditions { if cond.Type == virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota { c.enqueueMigration(migration) } } } return } // When a resourceQuota is deleted, figure out if there are pending migration in the namespace // if there are we should push them into the queue to accelerate the target creation process func (c *Controller) deleteResourceQuota(obj interface{}) { resourceQuota := obj.(*k8sv1.ResourceQuota) log.Log.V(4).Object(resourceQuota).Infof("ResourceQuota deleted") objs, _ := c.migrationIndexer.ByIndex(cache.NamespaceIndex, resourceQuota.Namespace) for _, obj := range objs { migration := obj.(*virtv1.VirtualMachineInstanceMigration) if migration.Status.Conditions == nil { continue } for _, cond := range migration.Status.Conditions { if cond.Type == virtv1.VirtualMachineInstanceMigrationRejectedByResourceQuota { c.enqueueMigration(migration) } } } return } // When a pod is deleted, enqueue the migration that manages the pod and update its podExpectations. // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. func (c *Controller) deletePod(obj interface{}) { pod, ok := obj.(*k8sv1.Pod) // When a delete is dropped, the relist will notice a pod in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the pod // changed labels the new migration will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(failedToProcessDeleteNotificationErrMsg) return } pod, ok = tombstone.Obj.(*k8sv1.Pod) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a pod %#v", obj)).Error(failedToProcessDeleteNotificationErrMsg) return } } controllerRef := c.getControllerOf(pod) migration := c.resolveControllerRef(pod.Namespace, controllerRef) if migration == nil { return } migrationKey, err := controller.KeyFunc(migration) if err != nil { return } c.podExpectations.DeletionObserved(migrationKey, controller.PodKey(pod)) c.enqueueMigration(migration) } func (c *Controller) updatePDB(old, cur interface{}) { curPDB := cur.(*policyv1.PodDisruptionBudget) oldPDB := old.(*policyv1.PodDisruptionBudget) if curPDB.ResourceVersion == oldPDB.ResourceVersion { return } // Only process PDBs manipulated by this controller migrationName := curPDB.Labels[virtv1.MigrationNameLabel] if migrationName == "" { return } objs, err := c.migrationIndexer.ByIndex(cache.NamespaceIndex, curPDB.Namespace) if err != nil { return } for _, obj := range objs { vmim := obj.(*virtv1.VirtualMachineInstanceMigration) if vmim.Name == migrationName { log.Log.V(4).Object(curPDB).Infof("PDB updated") c.enqueueMigration(vmim) } } } func (c *Controller) addPVC(obj interface{}) { pvc := obj.(*k8sv1.PersistentVolumeClaim) if pvc.DeletionTimestamp != nil { return } if !strings.HasPrefix(pvc.Name, backendstorage.PVCPrefix) { return } migrationName, exists := pvc.Labels[virtv1.MigrationNameLabel] if !exists { return } migrationKey := controller.NamespacedKey(pvc.Namespace, migrationName) c.pvcExpectations.CreationObserved(migrationKey) c.Queue.Add(migrationKey) } type vmimCollection []*virtv1.VirtualMachineInstanceMigration func (c vmimCollection) Len() int { return len(c) } func (c vmimCollection) Less(i, j int) bool { t1 := &c[i].CreationTimestamp t2 := &c[j].CreationTimestamp return t1.Before(t2) } func (c vmimCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c *Controller) garbageCollectFinalizedMigrations(vmi *virtv1.VirtualMachineInstance) error { var finalizedMigrations []string migrations, err := c.listMigrationsMatchingVMI(vmi.Namespace, vmi.Name) if err != nil { return err } // Oldest first sort.Sort(vmimCollection(migrations)) for _, migration := range migrations { if migration.IsFinal() && migration.DeletionTimestamp == nil { finalizedMigrations = append(finalizedMigrations, migration.Name) } } // only keep the oldest 5 finalized migration objects garbageCollectionCount := len(finalizedMigrations) - defaultFinalizedMigrationGarbageCollectionBuffer if garbageCollectionCount <= 0 { return nil } for i := 0; i < garbageCollectionCount; i++ { err = c.clientset.VirtualMachineInstanceMigration(vmi.Namespace).Delete(context.Background(), finalizedMigrations[i], v1.DeleteOptions{}) if err != nil && k8serrors.IsNotFound(err) { // This is safe to ignore. It's possible in some // scenarios that the migration we're trying to garbage // collect has already disappeared. Let's log it as debug // and suppress the error in this situation. log.Log.Reason(err).Infof("error encountered when garbage collecting migration object %s/%s", vmi.Namespace, finalizedMigrations[i]) } else if err != nil { return err } } return nil } func (c *Controller) filterMigrations(namespace string, filter func(*virtv1.VirtualMachineInstanceMigration) bool) ([]*virtv1.VirtualMachineInstanceMigration, error) { objs, err := c.migrationIndexer.ByIndex(cache.NamespaceIndex, namespace) if err != nil { return nil, err } var migrations []*virtv1.VirtualMachineInstanceMigration for _, obj := range objs { migration := obj.(*virtv1.VirtualMachineInstanceMigration) if filter(migration) { migrations = append(migrations, migration) } } return migrations, nil } // takes a namespace and returns all migrations listening for this vmi func (c *Controller) listMigrationsMatchingVMI(namespace, name string) ([]*virtv1.VirtualMachineInstanceMigration, error) { return c.filterMigrations(namespace, func(migration *virtv1.VirtualMachineInstanceMigration) bool { return migration.Spec.VMIName == name }) } func (c *Controller) listBackoffEligibleMigrations(namespace string, name string) ([]*virtv1.VirtualMachineInstanceMigration, error) { return c.filterMigrations(namespace, func(migration *virtv1.VirtualMachineInstanceMigration) bool { return evacuationMigrationsFilter(migration, name) || workloadUpdaterMigrationsFilter(migration, name) }) } func evacuationMigrationsFilter(migration *virtv1.VirtualMachineInstanceMigration, name string) bool { _, isEvacuation := migration.Annotations[virtv1.EvacuationMigrationAnnotation] return migration.Spec.VMIName == name && isEvacuation } func workloadUpdaterMigrationsFilter(migration *virtv1.VirtualMachineInstanceMigration, name string) bool { _, isWorkloadUpdater := migration.Annotations[virtv1.WorkloadUpdateMigrationAnnotation] return migration.Spec.VMIName == name && isWorkloadUpdater } func (c *Controller) addVMI(obj interface{}) { vmi := obj.(*virtv1.VirtualMachineInstance) if vmi.DeletionTimestamp != nil { c.deleteVMI(vmi) return } migrations, err := c.listMigrationsMatchingVMI(vmi.Namespace, vmi.Name) if err != nil { return } for _, migration := range migrations { c.enqueueMigration(migration) } } func (c *Controller) updateVMI(old, cur interface{}) { curVMI := cur.(*virtv1.VirtualMachineInstance) oldVMI := old.(*virtv1.VirtualMachineInstance) if curVMI.ResourceVersion == oldVMI.ResourceVersion { // Periodic resync will send update events for all known VMIs. // Two different versions of the same vmi will always // have different RVs. return } labelChanged := !equality.Semantic.DeepEqual(curVMI.Labels, oldVMI.Labels) if curVMI.DeletionTimestamp != nil { // having a DataVOlume marked for deletion is enough // to count as a deletion expectation c.deleteVMI(curVMI) if labelChanged { // we don't need to check the oldVMI.DeletionTimestamp // because DeletionTimestamp cannot be unset. c.deleteVMI(oldVMI) } return } migrations, err := c.listMigrationsMatchingVMI(curVMI.Namespace, curVMI.Name) if err != nil { log.Log.Object(curVMI).Errorf("Error encountered during datavolume update: %v", err) return } for _, migration := range migrations { log.Log.V(4).Object(curVMI).Infof("vmi updated for migration %s", migration.Name) c.enqueueMigration(migration) } } func (c *Controller) deleteVMI(obj interface{}) { vmi, ok := obj.(*virtv1.VirtualMachineInstance) // When a delete is dropped, the relist will notice a vmi in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the vmi // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(failedToProcessDeleteNotificationErrMsg) return } vmi, ok = tombstone.Obj.(*virtv1.VirtualMachineInstance) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a vmi %#v", obj)).Error(failedToProcessDeleteNotificationErrMsg) return } } migrations, err := c.listMigrationsMatchingVMI(vmi.Namespace, vmi.Name) if err != nil { return } for _, migration := range migrations { log.Log.V(4).Object(vmi).Infof("vmi deleted for migration %s", migration.Name) c.enqueueMigration(migration) } } func (c *Controller) outboundMigrationsOnNode(node string, runningMigrations []*virtv1.VirtualMachineInstanceMigration) (int, error) { sum := 0 for _, migration := range runningMigrations { key := controller.NamespacedKey(migration.Namespace, migration.Spec.VMIName) if vmi, exists, _ := c.vmiStore.GetByKey(key); exists { if vmi.(*virtv1.VirtualMachineInstance).Status.NodeName == node { sum = sum + 1 } } } return sum, nil } // findRunningMigrations calcules how many migrations are running or in flight to be triggered to running // Migrations which are in running phase are added alongside with migrations which are still pending but // where we already see a target pod. func (c *Controller) findRunningMigrations() ([]*virtv1.VirtualMachineInstanceMigration, error) { // Don't start new migrations if we wait for migration object updates because of new target pods notFinishedMigrations := migrations.ListUnfinishedMigrations(c.migrationIndexer) var runningMigrations []*virtv1.VirtualMachineInstanceMigration for _, migration := range notFinishedMigrations { if migration.IsRunning() { runningMigrations = append(runningMigrations, migration) continue } key := controller.NamespacedKey(migration.Namespace, migration.Spec.VMIName) vmi, exists, err := c.vmiStore.GetByKey(key) if err != nil { return nil, err } if !exists { continue } pods, err := c.listMatchingTargetPods(migration, vmi.(*virtv1.VirtualMachineInstance)) if err != nil { return nil, err } if len(pods) > 0 { runningMigrations = append(runningMigrations, migration) } } return runningMigrations, nil } func (c *Controller) getNodeForVMI(vmi *virtv1.VirtualMachineInstance) (*k8sv1.Node, error) { obj, exists, err := c.nodeStore.GetByKey(vmi.Status.NodeName) if err != nil { return nil, fmt.Errorf("cannot get nodes to migrate VMI with host-model CPU. error: %v", err) } else if !exists { return nil, fmt.Errorf("node \"%s\" associated with vmi \"%s\" does not exist", vmi.Status.NodeName, vmi.Name) } node := obj.(*k8sv1.Node) return node, nil } func (c *Controller) alertIfHostModelIsUnschedulable(vmi *virtv1.VirtualMachineInstance, targetPod *k8sv1.Pod) { fittingNodeFound := false if cpu := vmi.Spec.Domain.CPU; cpu == nil || cpu.Model != virtv1.CPUModeHostModel { return } requiredNodeLabels := map[string]string{} for key, value := range targetPod.Spec.NodeSelector { if strings.HasPrefix(key, virtv1.SupportedHostModelMigrationCPU) || strings.HasPrefix(key, virtv1.CPUFeatureLabel) { requiredNodeLabels[key] = value } } nodes := c.nodeStore.List() for _, nodeInterface := range nodes { node := nodeInterface.(*k8sv1.Node) if node.Name == vmi.Status.NodeName { continue // avoid checking the VMI's source node } if isNodeSuitableForHostModelMigration(node, requiredNodeLabels) { log.Log.Object(vmi).Infof("Node %s is suitable to run vmi %s host model cpu mode (more nodes may fit as well)", node.Name, vmi.Name) fittingNodeFound = true break } } if !fittingNodeFound { warningMsg := fmt.Sprintf("Migration cannot proceed since no node is suitable to run the required CPU model / required features: %v", requiredNodeLabels) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.NoSuitableNodesForHostModelMigration, warningMsg) log.Log.Object(vmi).Warning(warningMsg) } } func prepareNodeSelectorForHostCpuModel(node *k8sv1.Node, pod *k8sv1.Pod, sourcePod *k8sv1.Pod) error { var hostCpuModel, nodeSelectorKeyForHostModel, hostModelLabelValue string migratedAtLeastOnce := false // if the vmi already migrated before it should include node selector that consider CPUModelLabel for key, value := range sourcePod.Spec.NodeSelector { if strings.Contains(key, virtv1.CPUFeatureLabel) || strings.Contains(key, virtv1.SupportedHostModelMigrationCPU) { pod.Spec.NodeSelector[key] = value migratedAtLeastOnce = true } } if !migratedAtLeastOnce { for key, value := range node.Labels { if strings.HasPrefix(key, virtv1.HostModelCPULabel) { hostCpuModel = strings.TrimPrefix(key, virtv1.HostModelCPULabel) hostModelLabelValue = value } if strings.HasPrefix(key, virtv1.HostModelRequiredFeaturesLabel) { requiredFeature := strings.TrimPrefix(key, virtv1.HostModelRequiredFeaturesLabel) pod.Spec.NodeSelector[virtv1.CPUFeatureLabel+requiredFeature] = value } } if hostCpuModel == "" { return fmt.Errorf("node does not contain labal \"%s\" with information about host cpu model", virtv1.HostModelCPULabel) } nodeSelectorKeyForHostModel = virtv1.SupportedHostModelMigrationCPU + hostCpuModel pod.Spec.NodeSelector[nodeSelectorKeyForHostModel] = hostModelLabelValue log.Log.Object(pod).Infof("cpu model label selector (\"%s\") defined for migration target pod", nodeSelectorKeyForHostModel) } return nil } func isNodeSuitableForHostModelMigration(node *k8sv1.Node, requiredNodeLabels map[string]string) bool { for key, value := range requiredNodeLabels { nodeValue, ok := node.Labels[key] if !ok || nodeValue != value { return false } } return true } func (c *Controller) matchMigrationPolicy(vmi *virtv1.VirtualMachineInstance, clusterMigrationConfiguration *virtv1.MigrationConfiguration) error { vmiNamespace, err := c.clientset.CoreV1().Namespaces().Get(context.Background(), vmi.Namespace, v1.GetOptions{}) if err != nil { return err } // Fetch cluster policies var policies []v1alpha1.MigrationPolicy migrationInterfaceList := c.migrationPolicyStore.List() for _, obj := range migrationInterfaceList { policy := obj.(*v1alpha1.MigrationPolicy) policies = append(policies, *policy) } policiesListObj := v1alpha1.MigrationPolicyList{Items: policies} // Override cluster-wide migration configuration if migration policy is matched matchedPolicy := matchPolicy(&policiesListObj, vmi, vmiNamespace) if matchedPolicy == nil { log.Log.Object(vmi).Reason(err).Infof("no migration policy matched for VMI %s", vmi.Name) return nil } isUpdated, err := matchedPolicy.GetMigrationConfByPolicy(clusterMigrationConfiguration) if err != nil { return err } if isUpdated { vmi.Status.MigrationState.MigrationPolicyName = &matchedPolicy.Name vmi.Status.MigrationState.MigrationConfiguration = clusterMigrationConfiguration log.Log.Object(vmi).Infof("migration is updated by migration policy named %s.", matchedPolicy.Name) } return nil } func (c *Controller) isMigrationPolicyMatched(vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil { return false } migrationPolicyName := vmi.Status.MigrationState.MigrationPolicyName return migrationPolicyName != nil && *migrationPolicyName != "" } func (c *Controller) isMigrationHandedOff(migration *virtv1.VirtualMachineInstanceMigration, vmi *virtv1.VirtualMachineInstance) bool { if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == migration.UID { return true } migrationKey := controller.MigrationKey(migration) c.handOffLock.Lock() defer c.handOffLock.Unlock() _, isHandedOff := c.handOffMap[migrationKey] return isHandedOff } func (c *Controller) addHandOffKey(migrationKey string) { c.handOffLock.Lock() defer c.handOffLock.Unlock() c.handOffMap[migrationKey] = struct{}{} } func (c *Controller) removeHandOffKey(migrationKey string) { c.handOffLock.Lock() defer c.handOffLock.Unlock() delete(c.handOffMap, migrationKey) } func getComputeContainer(pod *k8sv1.Pod) *k8sv1.Container { for _, container := range pod.Spec.Containers { if container.Name == "compute" { return &container } } return nil } func getTargetPodLimitsCount(pod *k8sv1.Pod) (int64, error) { cc := getComputeContainer(pod) if cc == nil { return 0, fmt.Errorf("Could not find VMI compute container") } cpuLimit, ok := cc.Resources.Limits[k8sv1.ResourceCPU] if !ok { return 0, fmt.Errorf("Could not find dedicated CPU limit in VMI compute container") } return cpuLimit.Value(), nil } func getTargetPodMemoryRequests(pod *k8sv1.Pod) (string, error) { cc := getComputeContainer(pod) if cc == nil { return "", fmt.Errorf("Could not find VMI compute container") } memReq, ok := cc.Resources.Requests[k8sv1.ResourceMemory] if !ok { return "", fmt.Errorf("Could not find memory request in VMI compute container") } if hugePagesReq, ok := cc.Resources.Requests[k8sv1.ResourceHugePagesPrefix+"2Mi"]; ok { memReq.Add(hugePagesReq) } if hugePagesReq, ok := cc.Resources.Requests[k8sv1.ResourceHugePagesPrefix+"1Gi"]; ok { memReq.Add(hugePagesReq) } return memReq.String(), nil }
package migration import ( k8sv1 "k8s.io/api/core/v1" k6tv1 "kubevirt.io/api/core/v1" "kubevirt.io/api/migrations/v1alpha1" ) type migrationPolicyMatchScore struct { matchingVMILabels int matchingNSLabels int } func (score migrationPolicyMatchScore) equals(otherScore migrationPolicyMatchScore) bool { return score.matchingVMILabels == otherScore.matchingVMILabels && score.matchingNSLabels == otherScore.matchingNSLabels } func (score migrationPolicyMatchScore) greaterThan(otherScore migrationPolicyMatchScore) bool { thisTotalScore := score.matchingNSLabels + score.matchingVMILabels otherTotalScore := otherScore.matchingNSLabels + otherScore.matchingVMILabels if thisTotalScore == otherTotalScore { return score.matchingVMILabels > otherScore.matchingVMILabels } return thisTotalScore > otherTotalScore } func (score migrationPolicyMatchScore) lessThan(otherScore migrationPolicyMatchScore) bool { return !score.equals(otherScore) && !score.greaterThan(otherScore) } // matchPolicy returns the policy that is matched to the vmi, or nil of no policy is matched. // // Since every policy can specify VMI and Namespace labels to match to, matching is done by returning the most // detailed policy, meaning the policy that matches the VMI and specifies the most labels that matched either // the VMI or its namespace labels. // // If two policies are matched and have the same level of details (i.e. same number of matching labels) the matched // policy is chosen by policies' names ordered by lexicographic order. The reason is to create a rather arbitrary yet // deterministic way of matching policies. func matchPolicy(policyList *v1alpha1.MigrationPolicyList, vmi *k6tv1.VirtualMachineInstance, vmiNamespace *k8sv1.Namespace) *v1alpha1.MigrationPolicy { var mathingPolicies []v1alpha1.MigrationPolicy bestScore := migrationPolicyMatchScore{} for _, policy := range policyList.Items { doesMatch, curScore := countMatchingLabels(&policy, vmi.Labels, vmiNamespace.Labels) if !doesMatch || curScore.lessThan(bestScore) { continue } else if curScore.greaterThan(bestScore) { bestScore = curScore mathingPolicies = []v1alpha1.MigrationPolicy{policy} } else { mathingPolicies = append(mathingPolicies, policy) } } if len(mathingPolicies) == 0 { return nil } else if len(mathingPolicies) == 1 { return &mathingPolicies[0] } // If more than one policy is matched with the same number of matching labels it will be chosen by policies names' // lexicographic order firstPolicyNameLexicographicOrder := mathingPolicies[0].Name var firstPolicyNameLexicographicOrderIdx int for idx, matchingPolicy := range mathingPolicies { if matchingPolicy.Name < firstPolicyNameLexicographicOrder { firstPolicyNameLexicographicOrder = matchingPolicy.Name firstPolicyNameLexicographicOrderIdx = idx } } return &mathingPolicies[firstPolicyNameLexicographicOrderIdx] } // countMatchingLabels checks if a policy matches to a VMI and the number of matching labels. // In the case that doesMatch is false, matchingLabels needs to be dismissed and not counted on. func countMatchingLabels(policy *v1alpha1.MigrationPolicy, vmiLabels, namespaceLabels map[string]string) (doesMatch bool, score migrationPolicyMatchScore) { var matchingVMILabels, matchingNSLabels int doesMatch = true if policy.Spec.Selectors == nil { return false, score } countLabelsHelper := func(policyLabels, labelsToMatch map[string]string) (matchingLabels int) { for policyKey, policyValue := range policyLabels { value, exists := labelsToMatch[policyKey] if exists && value == policyValue { matchingLabels++ } else { doesMatch = false return } } return matchingLabels } areSelectorsAndLabelsNotNil := func(selector v1alpha1.LabelSelector, labels map[string]string) bool { return selector != nil && labels != nil } if areSelectorsAndLabelsNotNil(policy.Spec.Selectors.VirtualMachineInstanceSelector, vmiLabels) { matchingVMILabels = countLabelsHelper(policy.Spec.Selectors.VirtualMachineInstanceSelector, vmiLabels) } if doesMatch && areSelectorsAndLabelsNotNil(policy.Spec.Selectors.NamespaceSelector, vmiLabels) { matchingNSLabels = countLabelsHelper(policy.Spec.Selectors.NamespaceSelector, namespaceLabels) } if doesMatch { score = migrationPolicyMatchScore{matchingVMILabels: matchingVMILabels, matchingNSLabels: matchingNSLabels} } return doesMatch, score }
package node import ( "context" "encoding/json" "fmt" "strings" "time" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" virtv1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/util/lookup" ) const ( // NodeUnresponsiveReason is in various places as reason to indicate that // an action was taken because virt-handler became unresponsive. NodeUnresponsiveReason = "NodeUnresponsive" ) // Controller is the main Controller struct. type Controller struct { clientset kubecli.KubevirtClient Queue workqueue.TypedRateLimitingInterface[string] nodeStore cache.Store vmiStore cache.Store recorder record.EventRecorder heartBeatTimeout time.Duration recheckInterval time.Duration hasSynced func() bool } // NewController creates a new instance of the NodeController struct. func NewController(clientset kubecli.KubevirtClient, nodeInformer cache.SharedIndexInformer, vmiInformer cache.SharedIndexInformer, recorder record.EventRecorder) (*Controller, error) { c := &Controller{ clientset: clientset, Queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-node"}, ), nodeStore: nodeInformer.GetStore(), vmiStore: vmiInformer.GetStore(), recorder: recorder, heartBeatTimeout: 5 * time.Minute, recheckInterval: 1 * time.Minute, } c.hasSynced = func() bool { return nodeInformer.HasSynced() && vmiInformer.HasSynced() } _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addNode, DeleteFunc: c.deleteNode, UpdateFunc: c.updateNode, }) if err != nil { return nil, err } _, err = vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVirtualMachine, DeleteFunc: func(_ interface{}) { /* nothing to do */ }, UpdateFunc: c.updateVirtualMachine, }) if err != nil { return nil, err } return c, nil } func (c *Controller) addNode(obj interface{}) { c.enqueueNode(obj) } func (c *Controller) deleteNode(obj interface{}) { c.enqueueNode(obj) } func (c *Controller) updateNode(_, curr interface{}) { c.enqueueNode(curr) } func (c *Controller) enqueueNode(obj interface{}) { logger := log.Log node := obj.(*v1.Node) key, err := controller.KeyFunc(node) if err != nil { logger.Object(node).Reason(err).Error("Failed to extract key from node.") return } c.Queue.Add(key) } func (c *Controller) addVirtualMachine(obj interface{}) { vmi := obj.(*virtv1.VirtualMachineInstance) if vmi.Status.NodeName != "" { c.Queue.Add(vmi.Status.NodeName) } } func (c *Controller) updateVirtualMachine(_, curr interface{}) { currVMI := curr.(*virtv1.VirtualMachineInstance) if currVMI.Status.NodeName != "" { c.Queue.Add(currVMI.Status.NodeName) } } // Run runs the passed in NodeController. func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.Queue.ShutDown() log.Log.Info("Starting node controller.") // Wait for cache sync before we start the node controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping node controller.") } func (c *Controller) runWorker() { for c.Execute() { } } // Execute runs commands from the controller queue, if there is // an error it requeues the command. Returns false if the queue // is empty. func (c *Controller) Execute() bool { key, quit := c.Queue.Get() if quit { return false } defer c.Queue.Done(key) err := c.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing node %v", key) c.Queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed node %v", key) c.Queue.Forget(key) } return true } func (c *Controller) execute(key string) error { logger := log.DefaultLogger() obj, nodeExists, err := c.nodeStore.GetByKey(key) if err != nil { return err } var node *v1.Node if nodeExists { node = obj.(*v1.Node) logger = logger.Object(node) } else { namespace, name, err := cache.SplitMetaNamespaceKey(key) if err == nil { params := []string{} if namespace != "" { params = append(params, "namespace", namespace) } params = append(params, "name", name) params = append(params, "kind", "Node") logger = logger.With(params) } } unresponsive, err := isNodeUnresponsive(node, c.heartBeatTimeout) if err != nil { logger.Reason(err).Error("Failed to determine if node is responsive, will not reenqueue") return nil } if unresponsive { if nodeIsSchedulable(node) { if err := c.markNodeAsUnresponsive(node, logger); err != nil { return err } } err = c.checkNodeForOrphanedAndErroredVMIs(key, node, logger) if err != nil { return err } } c.requeueIfExists(key, node) return nil } func nodeIsSchedulable(node *v1.Node) bool { if node == nil { return false } return node.Labels[virtv1.NodeSchedulable] == "true" } func (c *Controller) checkNodeForOrphanedAndErroredVMIs(nodeName string, node *v1.Node, logger *log.FilteredLogger) error { vmis, err := lookup.ActiveVirtualMachinesOnNode(c.clientset, nodeName) if err != nil { logger.Reason(err).Error("Failed fetching vmis for node") return err } if len(vmis) == 0 { c.requeueIfExists(nodeName, node) return nil } err = c.createEventIfNodeHasOrphanedVMIs(node, vmis) if err != nil { logger.Reason(err).Error("checking virt-handler for node") return err } return c.checkVirtLauncherPodsAndUpdateVMIStatus(nodeName, vmis, logger) } func (c *Controller) checkVirtLauncherPodsAndUpdateVMIStatus(nodeName string, vmis []*virtv1.VirtualMachineInstance, logger *log.FilteredLogger) error { pods, err := c.alivePodsOnNode(nodeName) if err != nil { logger.Reason(err).Error("Failed fetch pods for node") return err } vmis = filterStuckVirtualMachinesWithoutPods(vmis, pods) return c.updateVMIWithFailedStatus(vmis, logger) } func (c *Controller) updateVMIWithFailedStatus(vmis []*virtv1.VirtualMachineInstance, logger *log.FilteredLogger) error { errs := []string{} // Do sequential updates, we don't want to create update storms in situations where something might already be wrong for _, vmi := range vmis { err := c.createAndApplyFailedVMINodeUnresponsivePatch(vmi, logger) if err != nil { errs = append(errs, fmt.Sprintf("failed to move vmi %s in namespace %s to final state: %v", vmi.Name, vmi.Namespace, err)) } } if len(errs) > 0 { return fmt.Errorf("%v", strings.Join(errs, "; ")) } return nil } func (c *Controller) createAndApplyFailedVMINodeUnresponsivePatch(vmi *virtv1.VirtualMachineInstance, logger *log.FilteredLogger) error { c.recorder.Event(vmi, v1.EventTypeNormal, NodeUnresponsiveReason, fmt.Sprintf("virt-handler on node %s is not responsive, marking VMI as failed", vmi.Status.NodeName)) logger.V(2).Infof("Moving vmi %s in namespace %s on unresponsive node to failed state", vmi.Name, vmi.Namespace) patchBytes, err := patch.New(patch.WithReplace("/status/phase", virtv1.Failed), patch.WithAdd("/status/reason", NodeUnresponsiveReason)).GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { logger.Reason(err).Errorf("Failed to move vmi %s in namespace %s to final state", vmi.Name, vmi.Namespace) return err } return nil } func (c *Controller) requeueIfExists(key string, node *v1.Node) { if node == nil { return } c.Queue.AddAfter(key, c.recheckInterval) } func (c *Controller) markNodeAsUnresponsive(node *v1.Node, logger *log.FilteredLogger) error { c.recorder.Event(node, v1.EventTypeNormal, NodeUnresponsiveReason, "virt-handler is not responsive, marking node as unresponsive") logger.V(4).Infof("Marking node %s as unresponsive", node.Name) data := []byte(fmt.Sprintf(`{"metadata": { "labels": {"%s": "false"}}}`, virtv1.NodeSchedulable)) _, err := c.clientset.CoreV1().Nodes().Patch(context.Background(), node.Name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) if err != nil { logger.Reason(err).Error("Failed to mark node as unschedulable") return fmt.Errorf("failed to mark node %s as unschedulable: %v", node.Name, err) } return nil } func (c *Controller) createEventIfNodeHasOrphanedVMIs(node *v1.Node, vmis []*virtv1.VirtualMachineInstance) error { // node is not running any vmis so we don't need to check anything else if len(vmis) == 0 || node == nil { return nil } // query for a virt-handler pod on the node handlerNodeSelector := fields.ParseSelectorOrDie("spec.nodeName=" + node.GetName()) virtHandlerSelector := fields.ParseSelectorOrDie("kubevirt.io=virt-handler") pods, err := c.clientset.CoreV1().Pods(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{ FieldSelector: handlerNodeSelector.String(), LabelSelector: virtHandlerSelector.String(), }) if err != nil { return err } // node is running the virt-handler if len(pods.Items) != 0 { return nil } running, err := checkDaemonSetStatus(c.clientset, virtHandlerSelector) if err != nil { return err } // the virt-handler DaemonsSet is not running as expect so we can't know for sure // if a virt-handler pod will be ran on this node if !running { c.requeueIfExists(node.GetName(), node) return nil } c.recorder.Event(node, v1.EventTypeWarning, NodeUnresponsiveReason, "virt-handler is not present, there are orphaned vmis on this node. Run virt-handler on this node to migrate or remove them.") return nil } func checkDaemonSetStatus(clientset kubecli.KubevirtClient, selector fields.Selector) (bool, error) { dss, err := clientset.AppsV1().DaemonSets(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{ LabelSelector: selector.String(), }) if err != nil { return false, err } if len(dss.Items) != 1 { return false, fmt.Errorf("shouuld only be running one virt-handler DaemonSet") } ds := dss.Items[0] desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady if desired != scheduled && desired != ready { return false, nil } return true, nil } func (c *Controller) alivePodsOnNode(nodeName string) ([]*v1.Pod, error) { handlerNodeSelector := fields.ParseSelectorOrDie("spec.nodeName=" + nodeName) list, err := c.clientset.CoreV1().Pods(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{ FieldSelector: handlerNodeSelector.String(), }) if err != nil { return nil, err } pods := []*v1.Pod{} for i := range list.Items { pod := &list.Items[i] if controllerRef := controller.GetControllerOf(pod); !isControlledByVMI(controllerRef) { continue } // Some pods get stuck in a pending Termination during shutdown // due to virt-handler not being available to unmount container disk // mount propagation. A pod with all containers terminated is not // considered alive allContainersTerminated := false if len(pod.Status.ContainerStatuses) > 0 { allContainersTerminated = true for _, status := range pod.Status.ContainerStatuses { if status.State.Terminated == nil { allContainersTerminated = false break } } } phase := pod.Status.Phase toAppendPod := !allContainersTerminated && phase != v1.PodFailed && phase != v1.PodSucceeded if toAppendPod { pods = append(pods, pod) continue } } return pods, nil } func filterStuckVirtualMachinesWithoutPods(vmis []*virtv1.VirtualMachineInstance, pods []*v1.Pod) []*virtv1.VirtualMachineInstance { podsPerNamespace := map[string]map[string]*v1.Pod{} for _, pod := range pods { podsForVMI, ok := podsPerNamespace[pod.Namespace] if !ok { podsForVMI = map[string]*v1.Pod{} } if controllerRef := controller.GetControllerOf(pod); isControlledByVMI(controllerRef) { podsForVMI[string(controllerRef.UID)] = pod podsPerNamespace[pod.Namespace] = podsForVMI } } filtered := []*virtv1.VirtualMachineInstance{} for _, vmi := range vmis { if podsForVMI, exists := podsPerNamespace[vmi.Namespace]; exists { if _, exists := podsForVMI[string(vmi.UID)]; exists { continue } } filtered = append(filtered, vmi) } return filtered } func isControlledByVMI(controllerRef *metav1.OwnerReference) bool { return controllerRef != nil && controllerRef.Kind == virtv1.VirtualMachineInstanceGroupVersionKind.Kind } func isNodeUnresponsive(node *v1.Node, timeout time.Duration) (bool, error) { if node == nil { return true, nil } if lastHeartBeat, exists := node.Annotations[virtv1.VirtHandlerHeartbeat]; exists { timestamp := metav1.Time{} if err := json.Unmarshal([]byte(`"`+lastHeartBeat+`"`), ×tamp); err != nil { return false, err } if timestamp.Time.Before(metav1.Now().Add(-timeout)) { return true, nil } } return false, nil }
package pool import ( "context" "encoding/json" "fmt" "maps" "math" "math/rand" "strconv" "strings" "sync" "time" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/trace" "kubevirt.io/kubevirt/pkg/pointer" appsv1 "k8s.io/api/apps/v1" k8score "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" virtv1 "kubevirt.io/api/core/v1" poolv1 "kubevirt.io/api/pool/v1alpha1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" traceUtils "kubevirt.io/kubevirt/pkg/util/trace" "kubevirt.io/kubevirt/pkg/virt-controller/watch/common" ) // Controller is the main Controller struct. type Controller struct { clientset kubecli.KubevirtClient queue workqueue.TypedRateLimitingInterface[string] vmIndexer cache.Indexer vmiStore cache.Store poolIndexer cache.Indexer revisionIndexer cache.Indexer recorder record.EventRecorder expectations *controller.UIDTrackingControllerExpectations burstReplicas uint hasSynced func() bool } const ( FailedUpdateVirtualMachineReason = "FailedUpdate" SuccessfulUpdateVirtualMachineReason = "SuccessfulUpdate" defaultAddDelay = 1 * time.Second ) const ( FailedScaleOutReason = "FailedScaleOut" FailedScaleInReason = "FailedScaleIn" FailedUpdateReason = "FailedUpdate" FailedRevisionPruningReason = "FailedRevisionPruning" SuccessfulPausedPoolReason = "SuccessfulPaused" SuccessfulResumePoolReason = "SuccessfulResume" ) var virtControllerPoolWorkQueueTracer = &traceUtils.Tracer{Threshold: time.Second} // NewController creates a new instance of the PoolController struct. func NewController(clientset kubecli.KubevirtClient, vmiInformer cache.SharedIndexInformer, vmInformer cache.SharedIndexInformer, poolInformer cache.SharedIndexInformer, revisionInformer cache.SharedIndexInformer, recorder record.EventRecorder, burstReplicas uint) (*Controller, error) { c := &Controller{ clientset: clientset, queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-pool"}, ), poolIndexer: poolInformer.GetIndexer(), vmiStore: vmiInformer.GetStore(), vmIndexer: vmInformer.GetIndexer(), revisionIndexer: revisionInformer.GetIndexer(), recorder: recorder, expectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), burstReplicas: burstReplicas, } c.hasSynced = func() bool { return poolInformer.HasSynced() && vmInformer.HasSynced() && vmiInformer.HasSynced() && revisionInformer.HasSynced() } _, err := poolInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPool, DeleteFunc: c.deletePool, UpdateFunc: c.updatePool, }) if err != nil { return nil, err } _, err = vmInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVMHandler, DeleteFunc: c.deleteVMHandler, UpdateFunc: c.updateVMHandler, }) if err != nil { return nil, err } _, err = revisionInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addRevisionHandler, UpdateFunc: c.updateRevisionHandler, }) if err != nil { return nil, err } _, err = vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVMIHandler, UpdateFunc: c.updateVMIHandler, }) if err != nil { return nil, err } return c, nil } func (c *Controller) resolveVMIControllerRef(namespace string, controllerRef *v1.OwnerReference) *virtv1.VirtualMachine { // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it's the wrong Kind. if controllerRef.Kind != virtv1.VirtualMachineGroupVersionKind.Kind { return nil } vm, exists, err := c.vmIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } if vm.(*virtv1.VirtualMachine).UID != controllerRef.UID { // The controller we found with this Name is not the same one that the // ControllerRef points to. return nil } return vm.(*virtv1.VirtualMachine) } func (c *Controller) addVMIHandler(obj interface{}) { vmi := obj.(*virtv1.VirtualMachineInstance) if vmi.DeletionTimestamp != nil { return } vmiControllerRef := metav1.GetControllerOf(vmi) if vmiControllerRef == nil { return } log.Log.Object(vmi).V(4).Info("Looking for VirtualMachineInstance Ref") vm := c.resolveVMIControllerRef(vmi.Namespace, vmiControllerRef) if vm == nil { // VMI is not controlled by a VM return } vmControllerRef := metav1.GetControllerOf(vm) if vmControllerRef == nil { return } pool := c.resolveControllerRef(vm.Namespace, vmControllerRef) if pool == nil { // VM is not controlled by a pool return } vmRevisionName, vmOk := vm.Spec.Template.ObjectMeta.Labels[virtv1.VirtualMachinePoolRevisionName] vmiRevisionName, vmiOk := vmi.Labels[virtv1.VirtualMachinePoolRevisionName] if vmOk && vmiOk && vmRevisionName == vmiRevisionName { // nothing to do here, VMI is up-to-date with VM's Template return } // enqueue the Pool due to a VMI detected that isn't up to date c.enqueuePool(pool) } func (c *Controller) updateVMIHandler(old, cur interface{}) { c.addVMIHandler(cur) } // When a revision is created, enqueue the pool that manages it and update its expectations. func (c *Controller) addRevisionHandler(obj interface{}) { cr := obj.(*appsv1.ControllerRevision) // If it has a ControllerRef, that's all that matters. if controllerRef := metav1.GetControllerOf(cr); controllerRef != nil { pool := c.resolveControllerRef(cr.Namespace, controllerRef) if pool == nil { return } poolKey, err := controller.KeyFunc(pool) if err != nil { return } c.expectations.CreationObserved(poolKey) c.enqueuePool(pool) return } } func (c *Controller) updateRevisionHandler(old, cur interface{}) { cr := cur.(*appsv1.ControllerRevision) // If it has a ControllerRef, that's all that matters. if controllerRef := metav1.GetControllerOf(cr); controllerRef != nil { pool := c.resolveControllerRef(cr.Namespace, controllerRef) if pool == nil { return } c.enqueuePool(pool) return } } // When a vm is created, enqueue the pool that manages it and update its expectations. func (c *Controller) addVMHandler(obj interface{}) { vm := obj.(*virtv1.VirtualMachine) if vm.DeletionTimestamp != nil { // on a restart of the controller manager, it's possible a new vm shows up in a state that // is already pending deletion. Prevent the vm from being a creation observation. c.deleteVMHandler(vm) return } // If it has a ControllerRef, that's all that matters. if controllerRef := metav1.GetControllerOf(vm); controllerRef != nil { pool := c.resolveControllerRef(vm.Namespace, controllerRef) if pool == nil { return } poolKey, err := controller.KeyFunc(pool) if err != nil { return } log.Log.V(4).Object(vm).Infof("VirtualMachine created") c.expectations.CreationObserved(poolKey) c.enqueuePool(pool) return } } // When a vm is updated, figure out what pool/s manage it and wake them // up. If the labels of the vm have changed we need to awaken both the old // and new pool. old and cur must be *metav1.VirtualMachine types. func (c *Controller) updateVMHandler(old, cur interface{}) { curVM := cur.(*virtv1.VirtualMachine) oldVM := old.(*virtv1.VirtualMachine) if curVM.ResourceVersion == oldVM.ResourceVersion { return } labelChanged := !equality.Semantic.DeepEqual(curVM.Labels, oldVM.Labels) if curVM.DeletionTimestamp != nil { c.deleteVMHandler(curVM) if labelChanged { c.deleteVMHandler(oldVM) } return } curControllerRef := metav1.GetControllerOf(curVM) oldControllerRef := metav1.GetControllerOf(oldVM) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged && oldControllerRef != nil { // The ControllerRef was changed. Sync the old controller, if any. if pool := c.resolveControllerRef(oldVM.Namespace, oldControllerRef); pool != nil { c.enqueuePool(pool) } } // If it has a ControllerRef, that's all that matters. if curControllerRef != nil { pool := c.resolveControllerRef(curVM.Namespace, curControllerRef) if pool == nil { return } log.Log.V(4).Object(curVM).Infof("VirtualMachine updated") c.enqueuePool(pool) return } } // When a vm is deleted, enqueue the pool that manages the vm and update its expectations. // obj could be an *metav1.VirtualMachine, or a DeletionFinalStateUnknown marker item. func (c *Controller) deleteVMHandler(obj interface{}) { vm, ok := obj.(*virtv1.VirtualMachine) // When a delete is dropped, the relist will notice a vm in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the vm // changed labels the new Pool will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error("Failed to process delete notification") return } vm, ok = tombstone.Obj.(*virtv1.VirtualMachine) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a vm %#v", obj)).Error("Failed to process delete notification") return } } controllerRef := metav1.GetControllerOf(vm) if controllerRef == nil { return } pool := c.resolveControllerRef(vm.Namespace, controllerRef) if pool == nil { return } poolKey, err := controller.KeyFunc(pool) if err != nil { return } c.expectations.DeletionObserved(poolKey, controller.VirtualMachineKey(vm)) c.enqueuePool(pool) } func (c *Controller) addPool(obj interface{}) { c.enqueuePool(obj) } func (c *Controller) deletePool(obj interface{}) { c.enqueuePool(obj) } func (c *Controller) updatePool(_, curr interface{}) { c.enqueuePool(curr) } func (c *Controller) enqueuePool(obj interface{}) { logger := log.Log pool := obj.(*poolv1.VirtualMachinePool) key, err := controller.KeyFunc(pool) if err != nil { logger.Object(pool).Reason(err).Error("Failed to extract key from pool.") return } // Delay prevents pool from being reconciled too often c.queue.AddAfter(key, defaultAddDelay) } // resolveControllerRef returns the controller referenced by a ControllerRef, // or nil if the ControllerRef could not be resolved to a matching controller // of the correct Kind. func (c *Controller) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *poolv1.VirtualMachinePool { // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it's the wrong Kind. if controllerRef.Kind != poolv1.VirtualMachinePoolKind { return nil } pool, exists, err := c.poolIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } if pool.(*poolv1.VirtualMachinePool).UID != controllerRef.UID { // The controller we found with this Name is not the same one that the // ControllerRef points to. return nil } return pool.(*poolv1.VirtualMachinePool) } // listControllerFromNamespace takes a namespace and returns all Pools from the Pool cache which run in this namespace func (c *Controller) listControllerFromNamespace(namespace string) ([]*poolv1.VirtualMachinePool, error) { objs, err := c.poolIndexer.ByIndex(cache.NamespaceIndex, namespace) if err != nil { return nil, err } pools := []*poolv1.VirtualMachinePool{} for _, obj := range objs { pool := obj.(*poolv1.VirtualMachinePool) pools = append(pools, pool) } return pools, nil } // getMatchingController returns the first Pool which matches the labels of the VirtualMachine from the listener cache. // If there are no matching controllers, a NotFound error is returned. func (c *Controller) getMatchingControllers(vm *virtv1.VirtualMachine) (pools []*poolv1.VirtualMachinePool) { logger := log.Log controllers, err := c.listControllerFromNamespace(vm.ObjectMeta.Namespace) if err != nil { return nil } for _, pool := range controllers { selector, err := metav1.LabelSelectorAsSelector(pool.Spec.Selector) if err != nil { logger.Object(pool).Reason(err).Error("Failed to parse label selector from pool.") continue } if selector.Matches(labels.Set(vm.ObjectMeta.Labels)) { pools = append(pools, pool) } } return pools } // Run runs the passed in PoolController. func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.queue.ShutDown() log.Log.Info("Starting pool controller.") // Wait for cache sync before we start the pool controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping pool controller.") } func (c *Controller) runWorker() { for c.Execute() { } } func (c *Controller) listVMsFromNamespace(namespace string) ([]*virtv1.VirtualMachine, error) { objs, err := c.vmIndexer.ByIndex(cache.NamespaceIndex, namespace) if err != nil { return nil, err } vms := []*virtv1.VirtualMachine{} for _, obj := range objs { vms = append(vms, obj.(*virtv1.VirtualMachine)) } return vms, nil } func (c *Controller) calcDiff(pool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine) int { wantedReplicas := int32(1) if pool.Spec.Replicas != nil { wantedReplicas = *pool.Spec.Replicas } return len(vms) - int(wantedReplicas) } func filterDeletingVMs(vms []*virtv1.VirtualMachine) []*virtv1.VirtualMachine { filtered := []*virtv1.VirtualMachine{} for _, vm := range vms { if vm.DeletionTimestamp == nil { filtered = append(filtered, vm) } } return filtered } // filterReadyVMs takes a list of VMs and returns all VMs which are in ready state. func (c *Controller) filterReadyVMs(vms []*virtv1.VirtualMachine) []*virtv1.VirtualMachine { return filterVMs(vms, func(vm *virtv1.VirtualMachine) bool { return controller.NewVirtualMachineConditionManager().HasConditionWithStatus(vm, virtv1.VirtualMachineConditionType(k8score.PodReady), k8score.ConditionTrue) }) } func filterVMs(vms []*virtv1.VirtualMachine, f func(vmi *virtv1.VirtualMachine) bool) []*virtv1.VirtualMachine { filtered := []*virtv1.VirtualMachine{} for _, vm := range vms { if f(vm) { filtered = append(filtered, vm) } } return filtered } func (c *Controller) scaleIn(pool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine, count int) error { poolKey, err := controller.KeyFunc(pool) if err != nil { return err } elgibleVMs := filterDeletingVMs(vms) // make sure we count already deleting VMs here during scale in. count = count - (len(vms) - len(elgibleVMs)) if len(elgibleVMs) == 0 || count == 0 { return nil } else if count > len(elgibleVMs) { count = len(elgibleVMs) } // random delete strategy rand.Shuffle(len(elgibleVMs), func(i, j int) { elgibleVMs[i], elgibleVMs[j] = elgibleVMs[j], elgibleVMs[i] }) log.Log.Object(pool).Infof("Removing %d VMs from pool", count) var wg sync.WaitGroup deleteList := elgibleVMs[0:count] c.expectations.ExpectDeletions(poolKey, controller.VirtualMachineKeys(deleteList)) wg.Add(len(deleteList)) errChan := make(chan error, len(deleteList)) for i := 0; i < len(deleteList); i++ { go func(idx int) { defer wg.Done() vm := deleteList[idx] foreGround := metav1.DeletePropagationForeground err := c.clientset.VirtualMachine(vm.Namespace).Delete(context.Background(), vm.Name, metav1.DeleteOptions{PropagationPolicy: &foreGround}) if err != nil { c.expectations.DeletionObserved(poolKey, controller.VirtualMachineKey(vm)) c.recorder.Eventf(pool, k8score.EventTypeWarning, common.FailedDeleteVirtualMachineReason, "Error deleting virtual machine %s: %v", vm.ObjectMeta.Name, err) errChan <- err return } c.recorder.Eventf(pool, k8score.EventTypeNormal, common.SuccessfulDeleteVirtualMachineReason, "Deleted VM %s/%s with uid %v from pool", vm.Namespace, vm.Name, vm.ObjectMeta.UID) log.Log.Object(pool).Infof("Deleted vm %s/%s from pool", vm.Namespace, vm.Name) }(i) } wg.Wait() select { case err := <-errChan: // Only return the first error which occurred. We log the rest return err default: } return nil } func generateVMName(index int, baseName string) string { return fmt.Sprintf("%s-%d", baseName, index) } func calculateNewVMNames(count int, baseName string, namespace string, vmStore cache.Store) []string { var newNames []string // generate `count` new unused VM names curIndex := 0 for n := 0; n < count; n++ { // find next unused index starting where we left off last i := curIndex for { name := generateVMName(i, baseName) vmKey := controller.NamespacedKey(namespace, name) _, exists, _ := vmStore.GetByKey(vmKey) if !exists { newNames = append(newNames, name) curIndex = i + 1 break } i++ } } return newNames } func poolOwnerRef(pool *poolv1.VirtualMachinePool) metav1.OwnerReference { t := pointer.P(true) gvk := schema.GroupVersionKind{Group: poolv1.SchemeGroupVersion.Group, Version: poolv1.SchemeGroupVersion.Version, Kind: poolv1.VirtualMachinePoolKind} return metav1.OwnerReference{ APIVersion: gvk.GroupVersion().String(), Kind: gvk.Kind, Name: pool.ObjectMeta.Name, UID: pool.ObjectMeta.UID, Controller: t, BlockOwnerDeletion: t, } } func indexFromName(name string) (int, error) { slice := strings.Split(name, "-") return strconv.Atoi(slice[len(slice)-1]) } func indexVMSpec(poolSpec *poolv1.VirtualMachinePoolSpec, idx int) *virtv1.VirtualMachineSpec { spec := poolSpec.VirtualMachineTemplate.Spec.DeepCopy() dvNameMap := map[string]string{} for i := range spec.DataVolumeTemplates { indexName := fmt.Sprintf("%s-%d", spec.DataVolumeTemplates[i].Name, idx) dvNameMap[spec.DataVolumeTemplates[i].Name] = indexName spec.DataVolumeTemplates[i].Name = indexName } appendIndexToConfigMapRefs := false appendIndexToSecretRefs := false if poolSpec.NameGeneration != nil { if poolSpec.NameGeneration.AppendIndexToConfigMapRefs != nil { appendIndexToConfigMapRefs = *poolSpec.NameGeneration.AppendIndexToConfigMapRefs } if poolSpec.NameGeneration.AppendIndexToSecretRefs != nil { appendIndexToSecretRefs = *poolSpec.NameGeneration.AppendIndexToSecretRefs } } for i, volume := range spec.Template.Spec.Volumes { if volume.VolumeSource.PersistentVolumeClaim != nil { indexName, ok := dvNameMap[volume.VolumeSource.PersistentVolumeClaim.ClaimName] if ok { spec.Template.Spec.Volumes[i].PersistentVolumeClaim.ClaimName = indexName } } else if volume.VolumeSource.DataVolume != nil { indexName, ok := dvNameMap[volume.VolumeSource.DataVolume.Name] if ok { spec.Template.Spec.Volumes[i].DataVolume.Name = indexName } } else if volume.VolumeSource.ConfigMap != nil && appendIndexToConfigMapRefs { volume.VolumeSource.ConfigMap.Name += "-" + strconv.Itoa(idx) } else if volume.VolumeSource.Secret != nil && appendIndexToSecretRefs { volume.VolumeSource.Secret.SecretName += "-" + strconv.Itoa(idx) } } return spec } func injectPoolRevisionLabelsIntoVM(vm *virtv1.VirtualMachine, revisionName string) *virtv1.VirtualMachine { if vm.Labels == nil { vm.Labels = map[string]string{} } if vm.Spec.Template.ObjectMeta.Labels == nil { vm.Spec.Template.ObjectMeta.Labels = map[string]string{} } vm.Labels[virtv1.VirtualMachinePoolRevisionName] = revisionName vm.Spec.Template.ObjectMeta.Labels[virtv1.VirtualMachinePoolRevisionName] = revisionName return vm } func getRevisionName(pool *poolv1.VirtualMachinePool) string { return fmt.Sprintf("%s-%d", pool.Name, pool.Generation) } func (c *Controller) ensureControllerRevision(pool *poolv1.VirtualMachinePool) (string, error) { poolKey, err := controller.KeyFunc(pool) if err != nil { return "", err } revisionName := getRevisionName(pool) _, alreadyExists, err := c.getControllerRevision(pool.Namespace, revisionName) if err != nil { return "", err } else if alreadyExists { // already created return revisionName, nil } bytes, err := json.Marshal(&pool.Spec) if err != nil { return "", err } cr := &appsv1.ControllerRevision{ ObjectMeta: v1.ObjectMeta{ Name: revisionName, Namespace: pool.Namespace, OwnerReferences: []metav1.OwnerReference{poolOwnerRef(pool)}, }, Data: runtime.RawExtension{Raw: bytes}, Revision: pool.ObjectMeta.Generation, } c.expectations.RaiseExpectations(poolKey, 1, 0) _, err = c.clientset.AppsV1().ControllerRevisions(pool.Namespace).Create(context.Background(), cr, v1.CreateOptions{}) if err != nil { c.expectations.CreationObserved(poolKey) return "", err } return cr.Name, nil } func (c *Controller) getControllerRevision(namespace, name string) (*poolv1.VirtualMachinePoolSpec, bool, error) { key := controller.NamespacedKey(namespace, name) storeObj, exists, err := c.revisionIndexer.GetByKey(key) if !exists || err != nil { return nil, false, err } cr, ok := storeObj.(*appsv1.ControllerRevision) if !ok { return nil, false, fmt.Errorf("unexpected resource %+v", storeObj) } spec := &poolv1.VirtualMachinePoolSpec{} err = json.Unmarshal(cr.Data.Raw, spec) if err != nil { return nil, false, err } return spec, true, nil } func (c *Controller) scaleOut(pool *poolv1.VirtualMachinePool, count int) error { var wg sync.WaitGroup newNames := calculateNewVMNames(count, pool.Name, pool.Namespace, c.vmIndexer) revisionName, err := c.ensureControllerRevision(pool) if err != nil { return err } log.Log.Object(pool).Infof("Adding %d VMs to pool", len(newNames)) poolKey, err := controller.KeyFunc(pool) if err != nil { return err } // We have to create VMs c.expectations.RaiseExpectations(poolKey, len(newNames), 0) wg.Add(len(newNames)) errChan := make(chan error, len(newNames)) for _, name := range newNames { go func(name string) { defer wg.Done() index, err := indexFromName(name) if err != nil { errChan <- err return } vm := virtv1.NewVMReferenceFromNameWithNS(pool.Namespace, name) vm.Labels = maps.Clone(pool.Spec.VirtualMachineTemplate.ObjectMeta.Labels) vm.Annotations = maps.Clone(pool.Spec.VirtualMachineTemplate.ObjectMeta.Annotations) vm.Spec = *indexVMSpec(&pool.Spec, index) vm = injectPoolRevisionLabelsIntoVM(vm, revisionName) vm.ObjectMeta.OwnerReferences = []metav1.OwnerReference{poolOwnerRef(pool)} vm, err = c.clientset.VirtualMachine(vm.Namespace).Create(context.Background(), vm, metav1.CreateOptions{}) if err != nil { c.expectations.CreationObserved(poolKey) log.Log.Object(pool).Reason(err).Errorf("Failed to add vm %s/%s to pool", pool.Namespace, name) errChan <- err return } c.recorder.Eventf(pool, k8score.EventTypeNormal, common.SuccessfulCreateVirtualMachineReason, "Created VM %s/%s", vm.Namespace, vm.ObjectMeta.Name) log.Log.Object(pool).Infof("Adding vm %s/%s to pool", pool.Namespace, name) }(name) } wg.Wait() select { case err := <-errChan: // Only return the first error which occurred. We log the rest c.recorder.Eventf(pool, k8score.EventTypeWarning, common.FailedCreateVirtualMachineReason, "Error creating VM: %v", err) return err default: } return nil } func (c *Controller) scale(pool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine) (common.SyncError, bool) { diff := c.calcDiff(pool, vms) if diff == 0 { // nothing to do return nil, true } maxDiff := int(math.Min(math.Abs(float64(diff)), float64(c.burstReplicas))) if diff < 0 { err := c.scaleOut(pool, maxDiff) if err != nil { return common.NewSyncError(fmt.Errorf("Error during scale out: %v", err), FailedScaleOutReason), false } } else { err := c.scaleIn(pool, vms, maxDiff) if err != nil { return common.NewSyncError(fmt.Errorf("Error during scale in: %v", err), FailedScaleInReason), false } } return nil, false } func (c *Controller) opportunisticUpdate(pool *poolv1.VirtualMachinePool, vmOutdatedList []*virtv1.VirtualMachine) error { var wg sync.WaitGroup if len(vmOutdatedList) == 0 { return nil } revisionName, err := c.ensureControllerRevision(pool) if err != nil { return err } wg.Add(len(vmOutdatedList)) errChan := make(chan error, len(vmOutdatedList)) for i := 0; i < len(vmOutdatedList); i++ { go func(idx int) { defer wg.Done() vm := vmOutdatedList[idx] index, err := indexFromName(vm.Name) if err != nil { errChan <- err return } vmCopy := vm.DeepCopy() vmCopy.Labels = maps.Clone(pool.Spec.VirtualMachineTemplate.ObjectMeta.Labels) vmCopy.Annotations = maps.Clone(pool.Spec.VirtualMachineTemplate.ObjectMeta.Annotations) vmCopy.Spec = *indexVMSpec(&pool.Spec, index) vmCopy = injectPoolRevisionLabelsIntoVM(vmCopy, revisionName) _, err = c.clientset.VirtualMachine(vmCopy.Namespace).Update(context.Background(), vmCopy, metav1.UpdateOptions{}) if err != nil { c.recorder.Eventf(pool, k8score.EventTypeWarning, FailedUpdateVirtualMachineReason, "Error updating virtual machine %s/%s: %v", vm.Name, vm.Namespace, err) log.Log.Object(pool).Reason(err).Errorf("Error encountered during update of vm %s/%s in pool", vmCopy.Namespace, vmCopy.Name) errChan <- err return } log.Log.Object(pool).Infof("Updated vm %s/%s in pool", vmCopy.Namespace, vmCopy.Name) c.recorder.Eventf(pool, k8score.EventTypeNormal, SuccessfulUpdateVirtualMachineReason, "Updated VM %s/%s", vm.Namespace, vm.Name) }(i) } wg.Wait() select { case err := <-errChan: // Only return the first error which occurred. We log the rest return err default: } return nil } func (c *Controller) proactiveUpdate(pool *poolv1.VirtualMachinePool, vmUpdatedList []*virtv1.VirtualMachine) error { var wg sync.WaitGroup wg.Add(len(vmUpdatedList)) errChan := make(chan error, len(vmUpdatedList)) for i := 0; i < len(vmUpdatedList); i++ { go func(idx int) { defer wg.Done() vm := vmUpdatedList[idx] vmiKey := controller.NamespacedKey(vm.Namespace, vm.Name) obj, exists, _ := c.vmiStore.GetByKey(vmiKey) if !exists { // no VMI to update return } vmi := obj.(*virtv1.VirtualMachineInstance) if vmi.DeletionTimestamp != nil { // ignore VMIs which are already deleting return } updateType, err := c.isOutdatedVMI(vm, vmi) if err != nil { errChan <- err return } switch updateType { case proactiveUpdateTypeRestart: err := c.clientset.VirtualMachineInstance(vm.ObjectMeta.Namespace).Delete(context.Background(), vmi.ObjectMeta.Name, v1.DeleteOptions{}) if err != nil { c.recorder.Eventf(pool, k8score.EventTypeWarning, FailedUpdateVirtualMachineReason, "Error proactively updating VM %s/%s by deleting outdated VMI: %v", vm.Namespace, vm.Name, err) errChan <- err return } log.Log.Object(pool).Infof("Proactively updating vm %s/%s in pool via vmi deletion", vm.Namespace, vm.Name) c.recorder.Eventf(pool, k8score.EventTypeNormal, common.SuccessfulDeleteVirtualMachineReason, "Proactive update of VM %s/%s by deleting outdated VMI", vm.Namespace, vm.Name) case proactiveUpdateTypePatchRevisionLabel: patchSet := patch.New() vmiCopy := vmi.DeepCopy() if vmiCopy.Labels == nil { vmiCopy.Labels = make(map[string]string) } revisionName, exists := vm.Labels[virtv1.VirtualMachinePoolRevisionName] if !exists { // nothing to do return } vmiCopy.Labels[virtv1.VirtualMachinePoolRevisionName] = revisionName if vmi.Labels == nil { patchSet.AddOption(patch.WithAdd("/metadata/labels", vmi.Labels)) } else { patchSet.AddOption( patch.WithTest("/metadata/labels", vmiCopy.Labels), patch.WithReplace("/metadata/labels", vmi.Labels), ) } patchBytes, err := patchSet.GeneratePayload() if err != nil { errChan <- err return } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err != nil { errChan <- fmt.Errorf("patching of vmi labels with new pool revision name: %v", err) return } log.Log.Object(pool).Infof("Proactively updating vm %s/%s in pool via label patch", vm.Namespace, vm.Name) } }(i) } wg.Wait() select { case err := <-errChan: // Only return the first error which occurred. We log the rest return err default: } return nil } type proactiveUpdateType string const ( // VMI spec has changed within vmi pool and requires restart proactiveUpdateTypeRestart proactiveUpdateType = "restart" // VMI spec is identify in current vmi pool, just needs revision label updated proactiveUpdateTypePatchRevisionLabel proactiveUpdateType = "label-patch" // VMI does not need an update proactiveUpdateTypeNone proactiveUpdateType = "no-update" ) func (c *Controller) isOutdatedVMI(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) (proactiveUpdateType, error) { // This function compares the pool revision (pool spec at a specific point in time) synced // to the VM vs the one used to create the VMI. By comparing the pool spec revisions between // the VM and VMI we can determine if the VM has mutated in a way that should result // in the VMI being updated. If the VMITemplate in these two pool revisions are not identical, // the VMI needs to be updated via forced restart when proactive updates are in use. // // Rules for determining if a VMI is out of date or not // // 1. If the VM revision name doesn't exist, it's going to get set by the reconcile loop. // The (opportunist update) logic handles ensuring the VM revision name will get set again // on a future reconcile loop. // 2. If the VMI revision name doesn't exist, the VMI has to be proactively restarted // because we have no history of what revision was used to originate the VMI. The // VM is an offline config we're comparing to, but the VMI is the active config. // 3. Compare the VMI template in the pool revision associated with the VM to the one // associated with the VMI. If they are identical in name or DeepEquals, then no // proactive restart is required. // 4. If the expected VMI template specs from the revisions are not identical in name, but // are identical in DeepEquals, patch the VMI with the new revision name used on the vm. vmRevisionName, exists := vm.Labels[virtv1.VirtualMachinePoolRevisionName] if !exists { // If we can't detect the VM revision then consider the outdated // status as not being required. The VM revision will get set again // by this controller on a future reconcile loop return proactiveUpdateTypeNone, nil } vmiRevisionName, exists := vmi.Labels[virtv1.VirtualMachinePoolRevisionName] if !exists { // If the VMI doesn't have the revision label, then it is outdated log.Log.Infof("Marking vmi %s/%s for update due to missing revision label", vm.Namespace, vm.Name) return proactiveUpdateTypeRestart, nil } if vmRevisionName == vmiRevisionName { // no update required because revisions match return proactiveUpdateTypeNone, nil } // Get the pool revision used to create the VM poolSpecRevisionForVM, exists, err := c.getControllerRevision(vm.Namespace, vmRevisionName) if err != nil { return proactiveUpdateTypeNone, err } else if !exists { // if the revision associated with the pool can't be found, then // no update is required at this time. The revision will eventually // get created in a future reconcile loop and we'll be able to process the VMI. return proactiveUpdateTypeNone, nil } expectedVMITemplate := poolSpecRevisionForVM.VirtualMachineTemplate.Spec.Template // Get the pool revision used to create the VMI poolSpecRevisionForVMI, exists, err := c.getControllerRevision(vm.Namespace, vmiRevisionName) if err != nil { return proactiveUpdateTypeRestart, err } else if !exists { // if the VMI does not have an associated revision, then we have to force // an update log.Log.Infof("Marking vmi %s/%s for update due to missing revision", vm.Namespace, vm.Name) return proactiveUpdateTypeRestart, nil } currentVMITemplate := poolSpecRevisionForVMI.VirtualMachineTemplate.Spec.Template // If the VMI templates differ between the revision used to create // the VM and the revision used to create the VMI, then the VMI // must be updated. if !equality.Semantic.DeepEqual(currentVMITemplate, expectedVMITemplate) { log.Log.Infof("Marking vmi %s/%s for update due out of sync spec", vm.Namespace, vm.Name) return proactiveUpdateTypeRestart, nil } // If we get here, the vmi templates are identical, but the revision // names are different, so patch the VMI with a new revision name. return proactiveUpdateTypePatchRevisionLabel, nil } func (c *Controller) isOutdatedVM(pool *poolv1.VirtualMachinePool, vm *virtv1.VirtualMachine) (bool, error) { if vm.Labels == nil { log.Log.Object(pool).Infof("Marking vm %s/%s for update due to missing labels ", vm.Namespace, vm.Name) return true, nil } revisionName, exists := vm.Labels[virtv1.VirtualMachinePoolRevisionName] if !exists { log.Log.Object(pool).Infof("Marking vm %s/%s for update due to missing revision labels ", vm.Namespace, vm.Name) return true, nil } oldPoolSpec, exists, err := c.getControllerRevision(pool.Namespace, revisionName) if err != nil { return true, err } else if !exists { log.Log.Object(pool).Infof("Marking vm %s/%s for update due to missing revision", vm.Namespace, vm.Name) return true, nil } if !equality.Semantic.DeepEqual(oldPoolSpec.VirtualMachineTemplate, pool.Spec.VirtualMachineTemplate) { log.Log.Object(pool).Infof("Marking vm %s/%s for update due out of date spec", vm.Namespace, vm.Name) return true, nil } return false, nil } func (c *Controller) pruneUnusedRevisions(pool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine) common.SyncError { keys, err := c.revisionIndexer.IndexKeys("vmpool", string(pool.UID)) if err != nil { return common.NewSyncError(fmt.Errorf("Error while pruning vmpool revisions: %v", err), FailedRevisionPruningReason) } deletionMap := make(map[string]interface{}) for _, key := range keys { strs := strings.Split(key, "/") if len(strs) != 2 { continue } deletionMap[strs[1]] = nil } for _, vm := range vms { // Check to see what revision is used by the VM, and remove // that from the revision prune list revisionName, exists := vm.Labels[virtv1.VirtualMachinePoolRevisionName] if exists { // remove from deletionMap since we found a VM that references this revision delete(deletionMap, revisionName) } // Check to see what revision is used by the VMI, and remove // that from the revision prune list vmiKey := controller.NamespacedKey(vm.Namespace, vm.Name) obj, exists, _ := c.vmiStore.GetByKey(vmiKey) if exists { vmi := obj.(*virtv1.VirtualMachineInstance) revisionName, exists = vmi.Labels[virtv1.VirtualMachinePoolRevisionName] if exists { // remove from deletionMap since we found a VMI that references this revision delete(deletionMap, revisionName) } } } for revisionName := range deletionMap { err := c.clientset.AppsV1().ControllerRevisions(pool.Namespace).Delete(context.Background(), revisionName, v1.DeleteOptions{}) if err != nil { return common.NewSyncError(fmt.Errorf("Error while pruning vmpool revisions: %v", err), FailedRevisionPruningReason) } } return nil } func (c *Controller) update(pool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine) (common.SyncError, bool) { // List of VMs that need to be updated vmOutdatedList := []*virtv1.VirtualMachine{} // List of VMs that are up-to-date that need to be checked to see if VMI is up-to-date vmUpdatedList := []*virtv1.VirtualMachine{} for _, vm := range vms { outdated, err := c.isOutdatedVM(pool, vm) if err != nil { return common.NewSyncError(fmt.Errorf("Error while detected outdated VMs: %v", err), FailedUpdateReason), false } if outdated { vmOutdatedList = append(vmOutdatedList, vm) } else { vmUpdatedList = append(vmUpdatedList, vm) } } err := c.opportunisticUpdate(pool, vmOutdatedList) if err != nil { return common.NewSyncError(fmt.Errorf("Error during VM update: %v", err), FailedUpdateReason), false } err = c.proactiveUpdate(pool, vmUpdatedList) if err != nil { return common.NewSyncError(fmt.Errorf("Error during VMI update: %v", err), FailedUpdateReason), false } vmUpdateStable := false if len(vmOutdatedList) == 0 { vmUpdateStable = true } return nil, vmUpdateStable } // Execute runs commands from the controller queue, if there is // an error it requeues the command. Returns false if the queue // is empty. func (c *Controller) Execute() bool { key, quit := c.queue.Get() if quit { return false } defer c.queue.Done(key) virtControllerPoolWorkQueueTracer.StartTrace(key, "virt-controller VMPool workqueue", trace.Field{Key: "Workqueue Key", Value: key}) defer virtControllerPoolWorkQueueTracer.StopTrace(key) err := c.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing pool %v", key) c.queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed pool %v", key) c.queue.Forget(key) } return true } func (c *Controller) updateStatus(origPool *poolv1.VirtualMachinePool, vms []*virtv1.VirtualMachine, syncErr common.SyncError) error { key, err := controller.KeyFunc(origPool) if err != nil { return err } defer virtControllerPoolWorkQueueTracer.StepTrace(key, "updateStatus", trace.Field{Key: "VMPool Name", Value: origPool.Name}) pool := origPool.DeepCopy() labelSelector, err := metav1.LabelSelectorAsSelector(pool.Spec.Selector) if err != nil { return err } pool.Status.LabelSelector = labelSelector.String() cm := controller.NewVirtualMachinePoolConditionManager() if syncErr != nil && !cm.HasCondition(pool, poolv1.VirtualMachinePoolReplicaFailure) { cm.UpdateCondition(pool, &poolv1.VirtualMachinePoolCondition{ Type: poolv1.VirtualMachinePoolReplicaFailure, Reason: syncErr.Reason(), Message: syncErr.Error(), LastTransitionTime: metav1.Now(), Status: k8score.ConditionTrue, }) c.recorder.Eventf(pool, k8score.EventTypeWarning, syncErr.Reason(), syncErr.Error()) } else if syncErr == nil && cm.HasCondition(pool, poolv1.VirtualMachinePoolReplicaFailure) { cm.RemoveCondition(pool, poolv1.VirtualMachinePoolReplicaFailure) } if pool.Spec.Paused && !cm.HasCondition(pool, poolv1.VirtualMachinePoolReplicaPaused) { cm.UpdateCondition(pool, &poolv1.VirtualMachinePoolCondition{ Type: poolv1.VirtualMachinePoolReplicaPaused, Reason: SuccessfulPausedPoolReason, Message: "Pool controller is paused", LastTransitionTime: metav1.Now(), Status: k8score.ConditionTrue, }) c.recorder.Eventf(pool, k8score.EventTypeNormal, SuccessfulPausedPoolReason, "Pool is paused") } else if !pool.Spec.Paused && cm.HasCondition(pool, poolv1.VirtualMachinePoolReplicaPaused) { cm.RemoveCondition(pool, poolv1.VirtualMachinePoolReplicaPaused) c.recorder.Eventf(pool, k8score.EventTypeNormal, SuccessfulResumePoolReason, "Pool is unpaused") } pool.Status.Replicas = int32(len(vms)) pool.Status.ReadyReplicas = int32(len(c.filterReadyVMs(vms))) if !equality.Semantic.DeepEqual(pool.Status, origPool.Status) || pool.Status.Replicas != pool.Status.ReadyReplicas { _, err := c.clientset.VirtualMachinePool(pool.Namespace).UpdateStatus(context.Background(), pool, metav1.UpdateOptions{}) if err != nil { return err } } return nil } func (c *Controller) execute(key string) error { logger := log.DefaultLogger() var syncErr common.SyncError obj, poolExists, err := c.poolIndexer.GetByKey(key) if err != nil { return err } var pool *poolv1.VirtualMachinePool if poolExists { pool = obj.(*poolv1.VirtualMachinePool) logger = logger.Object(pool) } else { c.expectations.DeleteExpectations(key) return nil } selector, err := metav1.LabelSelectorAsSelector(pool.Spec.Selector) if err != nil { logger.Reason(err).Error("Invalid selector on pool, will not re-enqueue.") return nil } if !selector.Matches(labels.Set(pool.Spec.VirtualMachineTemplate.ObjectMeta.Labels)) { logger.Reason(err).Error("Selector does not match template labels, will not re-enqueue.") return nil } vms, err := c.listVMsFromNamespace(pool.ObjectMeta.Namespace) if err != nil { logger.Reason(err).Error("Failed to fetch vms for namespace from cache.") return err } // If any adoptions are attempted, we should first recheck for deletion with // an uncached quorum read sometime after listing VirtualMachines (see kubernetes/kubernetes#42639). canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) { fresh, err := c.clientset.VirtualMachinePool(pool.ObjectMeta.Namespace).Get(context.Background(), pool.ObjectMeta.Name, metav1.GetOptions{}) if err != nil { return nil, err } if fresh.ObjectMeta.UID != pool.ObjectMeta.UID { return nil, fmt.Errorf("original Pool %v/%v is gone: got uid %v, wanted %v", pool.Namespace, pool.Name, fresh.UID, pool.UID) } return fresh, nil }) cm := controller.NewVirtualMachineControllerRefManager(controller.RealVirtualMachineControl{Clientset: c.clientset}, pool, selector, virtv1.VirtualMachineInstanceReplicaSetGroupVersionKind, canAdoptFunc) vms, err = cm.ReleaseDetachedVirtualMachines(vms) if err != nil { return err } needsSync := c.expectations.SatisfiedExpectations(key) if needsSync && !pool.Spec.Paused && pool.DeletionTimestamp == nil { scaleIsStable := false updateIsStable := false syncErr, scaleIsStable = c.scale(pool, vms) if syncErr != nil { logger.Reason(err).Error("Scaling the pool failed.") } needsSync = c.expectations.SatisfiedExpectations(key) if needsSync && scaleIsStable && syncErr == nil { // Handle updates after scale operations are satisfied. syncErr, updateIsStable = c.update(pool, vms) } needsSync = c.expectations.SatisfiedExpectations(key) if needsSync && syncErr == nil && scaleIsStable && updateIsStable { // handle pruning revisions after scale and update operations are satisfied syncErr = c.pruneUnusedRevisions(pool, vms) } virtControllerPoolWorkQueueTracer.StepTrace(key, "sync", trace.Field{Key: "VMPool Name", Value: pool.Name}) } else if pool.DeletionTimestamp != nil { syncErr = c.pruneUnusedRevisions(pool, vms) } err = c.updateStatus(pool, vms, syncErr) if err != nil { return err } return syncErr }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2017 Red Hat, Inc. * */ package vm import ( "context" "encoding/json" "errors" "fmt" "maps" "math" "math/rand" "strconv" "strings" "time" "kubevirt.io/kubevirt/pkg/instancetype/revision" "kubevirt.io/kubevirt/pkg/liveupdate/memory" netadmitter "kubevirt.io/kubevirt/pkg/network/admitter" "kubevirt.io/kubevirt/pkg/network/vmispec" "kubevirt.io/kubevirt/pkg/virt-controller/watch/common" watchutil "kubevirt.io/kubevirt/pkg/virt-controller/watch/util" "github.com/google/uuid" appsv1 "k8s.io/api/apps/v1" authv1 "k8s.io/api/authorization/v1" k8score "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apiErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" k8sfield "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/utils/trace" virtv1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/storage/memorydump" storagetypes "kubevirt.io/kubevirt/pkg/storage/types" "kubevirt.io/kubevirt/pkg/util" "kubevirt.io/kubevirt/pkg/util/hardware" "kubevirt.io/kubevirt/pkg/util/migrations" traceUtils "kubevirt.io/kubevirt/pkg/util/trace" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" volumemig "kubevirt.io/kubevirt/pkg/virt-controller/watch/volume-migration" ) const ( fetchingRunStrategyErrFmt = "Error fetching RunStrategy: %v" fetchingVMKeyErrFmt = "Error fetching vmKey: %v" startingVMIFailureFmt = "Failure while starting VMI: %v" ) type CloneAuthFunc func(dv *cdiv1.DataVolume, requestNamespace, requestName string, proxy cdiv1.AuthorizationHelperProxy, saNamespace, saName string) (bool, string, error) // Repeating info / error messages const ( stoppingVmMsg = "Stopping VM" startingVmMsg = "Starting VM" failedExtractVmkeyFromVmErrMsg = "Failed to extract vmKey from VirtualMachine." failedCreateCRforVmErrMsg = "Failed to create controller revision for VirtualMachine." failedProcessDeleteNotificationErrMsg = "Failed to process delete notification" failureDeletingVmiErrFormat = "Failure attempting to delete VMI: %v" failedCleanupRestartRequired = "Failed to delete RestartRequired condition or last-seen controller revisions" failedManualRecoveryRequiredCondSetErrMsg = "cannot start the VM since it has the manual recovery required condtion set" // UnauthorizedDataVolumeCreateReason is added in an event when the DataVolume // ServiceAccount doesn't have permission to create a DataVolume UnauthorizedDataVolumeCreateReason = "UnauthorizedDataVolumeCreate" // FailedDataVolumeCreateReason is added in an event when posting a dynamically // generated dataVolume to the cluster fails. FailedDataVolumeCreateReason = "FailedDataVolumeCreate" // SuccessfulDataVolumeCreateReason is added in an event when a dynamically generated // dataVolume is successfully created SuccessfulDataVolumeCreateReason = "SuccessfulDataVolumeCreate" // SourcePVCNotAvailabe is added in an event when the source PVC of a valid // clone Datavolume doesn't exist SourcePVCNotAvailabe = "SourcePVCNotAvailabe" ) const ( hotplugVolumeErrorReason = "HotPlugVolumeError" hotplugCPUErrorReason = "HotPlugCPUError" failedUpdateErrorReason = "FailedUpdateError" failedCreateReason = "FailedCreate" vmiFailedDeleteReason = "FailedDelete" affinityChangeErrorReason = "AffinityChangeError" hotplugMemoryErrorReason = "HotPlugMemoryError" volumesUpdateErrorReason = "VolumesUpdateError" tolerationsChangeErrorReason = "TolerationsChangeError" ) const defaultMaxCrashLoopBackoffDelaySeconds = 300 func NewController(vmiInformer cache.SharedIndexInformer, vmInformer cache.SharedIndexInformer, dataVolumeInformer cache.SharedIndexInformer, dataSourceInformer cache.SharedIndexInformer, namespaceStore cache.Store, pvcInformer cache.SharedIndexInformer, crInformer cache.SharedIndexInformer, podInformer cache.SharedIndexInformer, recorder record.EventRecorder, clientset kubecli.KubevirtClient, clusterConfig *virtconfig.ClusterConfig, netSynchronizer synchronizer, instancetypeController instancetypeHandler, ) (*Controller, error) { c := &Controller{ Queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-vm"}, ), vmiIndexer: vmiInformer.GetIndexer(), vmIndexer: vmInformer.GetIndexer(), dataVolumeStore: dataVolumeInformer.GetStore(), dataSourceStore: dataSourceInformer.GetStore(), namespaceStore: namespaceStore, pvcStore: pvcInformer.GetStore(), crIndexer: crInformer.GetIndexer(), instancetypeController: instancetypeController, recorder: recorder, clientset: clientset, expectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), dataVolumeExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), cloneAuthFunc: func(dv *cdiv1.DataVolume, requestNamespace, requestName string, proxy cdiv1.AuthorizationHelperProxy, saNamespace, saName string) (bool, string, error) { response, err := dv.AuthorizeSA(requestNamespace, requestName, proxy, saNamespace, saName) return response.Allowed, response.Reason, err }, clusterConfig: clusterConfig, netSynchronizer: netSynchronizer, } c.hasSynced = func() bool { return vmiInformer.HasSynced() && vmInformer.HasSynced() && dataVolumeInformer.HasSynced() && dataSourceInformer.HasSynced() && pvcInformer.HasSynced() && crInformer.HasSynced() && podInformer.HasSynced() } _, err := vmInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVirtualMachine, DeleteFunc: c.deleteVirtualMachine, UpdateFunc: c.updateVirtualMachine, }) if err != nil { return nil, err } _, err = vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVirtualMachineInstance, DeleteFunc: c.deleteVirtualMachineInstance, UpdateFunc: c.updateVirtualMachineInstance, }) if err != nil { return nil, err } _, err = dataVolumeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addDataVolume, DeleteFunc: c.deleteDataVolume, UpdateFunc: c.updateDataVolume, }) if err != nil { return nil, err } return c, nil } type authProxy struct { client kubecli.KubevirtClient dataSourceStore cache.Store namespaceStore cache.Store } func (p *authProxy) CreateSar(sar *authv1.SubjectAccessReview) (*authv1.SubjectAccessReview, error) { return p.client.AuthorizationV1().SubjectAccessReviews().Create(context.Background(), sar, metav1.CreateOptions{}) } func (p *authProxy) GetNamespace(name string) (*k8score.Namespace, error) { obj, exists, err := p.namespaceStore.GetByKey(name) if err != nil { return nil, err } else if !exists { return nil, fmt.Errorf("namespace %s does not exist", name) } ns := obj.(*k8score.Namespace).DeepCopy() return ns, nil } func (p *authProxy) GetDataSource(namespace, name string) (*cdiv1.DataSource, error) { key := fmt.Sprintf("%s/%s", namespace, name) obj, exists, err := p.dataSourceStore.GetByKey(key) if err != nil { return nil, err } else if !exists { return nil, fmt.Errorf("dataSource %s does not exist", key) } ds := obj.(*cdiv1.DataSource).DeepCopy() return ds, nil } type synchronizer interface { Sync(*virtv1.VirtualMachine, *virtv1.VirtualMachineInstance) (*virtv1.VirtualMachine, error) } type instancetypeHandler interface { synchronizer ApplyToVM(*virtv1.VirtualMachine) error ApplyToVMI(*virtv1.VirtualMachine, *virtv1.VirtualMachineInstance) error ApplyDevicePreferences(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error } type Controller struct { clientset kubecli.KubevirtClient Queue workqueue.TypedRateLimitingInterface[string] vmiIndexer cache.Indexer vmIndexer cache.Indexer dataVolumeStore cache.Store dataSourceStore cache.Store namespaceStore cache.Store pvcStore cache.Store crIndexer cache.Indexer instancetypeController instancetypeHandler recorder record.EventRecorder expectations *controller.UIDTrackingControllerExpectations dataVolumeExpectations *controller.UIDTrackingControllerExpectations cloneAuthFunc CloneAuthFunc clusterConfig *virtconfig.ClusterConfig hasSynced func() bool netSynchronizer synchronizer } func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.Queue.ShutDown() log.Log.Info("Starting VirtualMachine controller.") // Wait for cache sync before we start the controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping VirtualMachine controller.") } func (c *Controller) runWorker() { for c.Execute() { } } func (c *Controller) satisfiedExpectations(key string) bool { return c.expectations.SatisfiedExpectations(key) && c.dataVolumeExpectations.SatisfiedExpectations(key) } var virtControllerVMWorkQueueTracer = &traceUtils.Tracer{Threshold: time.Second} func (c *Controller) Execute() bool { key, quit := c.Queue.Get() if quit { return false } virtControllerVMWorkQueueTracer.StartTrace(key, "virt-controller VM workqueue", trace.Field{Key: "Workqueue Key", Value: key}) defer virtControllerVMWorkQueueTracer.StopTrace(key) defer c.Queue.Done(key) if err := c.execute(key); err != nil { log.Log.Reason(err).Infof("re-enqueuing VirtualMachine %v", key) c.Queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed VirtualMachine %v", key) c.Queue.Forget(key) } return true } func (c *Controller) execute(key string) error { obj, exists, err := c.vmIndexer.GetByKey(key) if err != nil { return nil } if !exists { // nothing we need to do. It should always be possible to re-create this type of controller c.expectations.DeleteExpectations(key) return nil } originalVM := obj.(*virtv1.VirtualMachine) vm := originalVM.DeepCopy() logger := log.Log.Object(vm) logger.V(4).Info("Started processing vm") // this must be first step in execution. Writing the object // when api version changes ensures our api stored version is updated. if !controller.ObservedLatestApiVersionAnnotation(vm) { controller.SetLatestApiVersionAnnotation(vm) _, err = c.clientset.VirtualMachine(vm.Namespace).Update(context.Background(), vm, metav1.UpdateOptions{}) if err != nil { logger.Reason(err).Error("Updating api version annotations failed") } return err } vmKey, err := controller.KeyFunc(vm) if err != nil { return err } // If any adoptions are attempted, we should first recheck for deletion with // an uncached quorum read sometime after listing VirtualMachines (see kubernetes/kubernetes#42639). canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) { fresh, err := c.clientset.VirtualMachine(vm.ObjectMeta.Namespace).Get(context.Background(), vm.ObjectMeta.Name, metav1.GetOptions{}) if err != nil { return nil, err } if fresh.ObjectMeta.UID != vm.ObjectMeta.UID { return nil, fmt.Errorf("original VirtualMachine %v/%v is gone: got uid %v, wanted %v", vm.Namespace, vm.Name, fresh.UID, vm.UID) } return fresh, nil }) cm := controller.NewVirtualMachineControllerRefManager( controller.RealVirtualMachineControl{ Clientset: c.clientset, }, vm, nil, virtv1.VirtualMachineGroupVersionKind, canAdoptFunc) var vmi *virtv1.VirtualMachineInstance vmiObj, exist, err := c.vmiIndexer.GetByKey(vmKey) if err != nil { logger.Reason(err).Error("Failed to fetch vmi for namespace from cache.") return err } if !exist { logger.V(4).Infof("VirtualMachineInstance not found in cache %s", key) vmi = nil } else { vmi = vmiObj.(*virtv1.VirtualMachineInstance) vmi, err = cm.ClaimVirtualMachineInstanceByName(vmi) if err != nil { return err } } dataVolumes, err := storagetypes.ListDataVolumesFromTemplates(vm.Namespace, vm.Spec.DataVolumeTemplates, c.dataVolumeStore) if err != nil { logger.Reason(err).Error("Failed to fetch dataVolumes for namespace from cache.") return err } if len(dataVolumes) != 0 { dataVolumes, err = cm.ClaimMatchedDataVolumes(dataVolumes) if err != nil { return err } } var syncErr common.SyncError vm, vmi, syncErr, err = c.sync(vm, vmi, key) if err != nil { return err } if syncErr != nil { logger.Reason(syncErr).Error("Reconciling the VirtualMachine failed.") } err = c.updateStatus(vm, originalVM, vmi, syncErr, logger) if err != nil { logger.Reason(err).Error("Updating the VirtualMachine status failed.") return err } return syncErr } func (c *Controller) handleCloneDataVolume(vm *virtv1.VirtualMachine, dv *cdiv1.DataVolume) error { if dv.Spec.SourceRef != nil { return fmt.Errorf("DataVolume sourceRef not supported") } if dv.Spec.Source == nil { return nil } // For consistency with other k8s objects, we allow creating clone DataVolumes even when the source PVC doesn't exist. // This means that a VirtualMachine can be successfully created with volumes that may remain unpopulated until the source PVC is created. // For this reason, we check if the source PVC exists and, if not, we trigger an event to let users know of this behavior. if dv.Spec.Source.PVC != nil { // TODO: a lot of CDI knowledge, maybe an API to check if source exists? pvc, err := storagetypes.GetPersistentVolumeClaimFromCache(dv.Spec.Source.PVC.Namespace, dv.Spec.Source.PVC.Name, c.pvcStore) if err != nil { return err } if pvc == nil { c.recorder.Eventf(vm, k8score.EventTypeWarning, SourcePVCNotAvailabe, "Source PVC %s not available: Target PVC %s will remain unpopulated until source is created", dv.Spec.Source.PVC.Name, dv.Name) } } if err := c.authorizeDataVolume(vm, dv); err != nil { c.recorder.Eventf(vm, k8score.EventTypeWarning, UnauthorizedDataVolumeCreateReason, "Not authorized to create DataVolume %s: %v", dv.Name, err) return fmt.Errorf("not authorized to create DataVolume: %v", err) } return nil } func (c *Controller) authorizeDataVolume(vm *virtv1.VirtualMachine, dataVolume *cdiv1.DataVolume) error { serviceAccountName := "default" for _, vol := range vm.Spec.Template.Spec.Volumes { if vol.ServiceAccount != nil { serviceAccountName = vol.ServiceAccount.ServiceAccountName } } proxy := &authProxy{client: c.clientset, dataSourceStore: c.dataSourceStore, namespaceStore: c.namespaceStore} allowed, reason, err := c.cloneAuthFunc(dataVolume, vm.Namespace, dataVolume.Name, proxy, vm.Namespace, serviceAccountName) if err != nil && err != cdiv1.ErrNoTokenOkay { return err } if !allowed { return fmt.Errorf(reason) } return nil } func (c *Controller) handleDataVolumes(vm *virtv1.VirtualMachine) (bool, error) { ready := true vmKey, err := controller.KeyFunc(vm) if err != nil { return ready, err } for _, template := range vm.Spec.DataVolumeTemplates { curDataVolume, err := storagetypes.GetDataVolumeFromCache(vm.Namespace, template.Name, c.dataVolumeStore) if err != nil { return false, err } if curDataVolume == nil { // Don't create DV if PVC already exists pvc, err := storagetypes.GetPersistentVolumeClaimFromCache(vm.Namespace, template.Name, c.pvcStore) if err != nil { return false, err } // ready = false because encountered DataVolume that is not created yet ready = false newDataVolume, err := watchutil.CreateDataVolumeManifest(c.clientset, template, vm) if err != nil { return ready, fmt.Errorf("unable to create DataVolume manifest: %v", err) } // We validate requirements that are exclusive to clone DataVolumes if err = c.handleCloneDataVolume(vm, newDataVolume); err != nil { return ready, err } c.dataVolumeExpectations.ExpectCreations(vmKey, 1) curDataVolume, err = c.clientset.CdiClient().CdiV1beta1().DataVolumes(vm.Namespace).Create(context.Background(), newDataVolume, metav1.CreateOptions{}) if err != nil { c.dataVolumeExpectations.CreationObserved(vmKey) if pvc != nil && strings.Contains(err.Error(), "already exists") { // If the PVC already exists, we can ignore the error and continue // probably old version of CDI log.Log.Object(vm).Reason(err).Warning("Appear to be running a version of CDI that does not support claim adoption annotation") continue } c.recorder.Eventf(vm, k8score.EventTypeWarning, FailedDataVolumeCreateReason, "Error creating DataVolume %s: %v", newDataVolume.Name, err) return ready, fmt.Errorf("failed to create DataVolume: %v", err) } c.recorder.Eventf(vm, k8score.EventTypeNormal, SuccessfulDataVolumeCreateReason, "Created DataVolume %s", curDataVolume.Name) } else { switch curDataVolume.Status.Phase { case cdiv1.Succeeded, cdiv1.WaitForFirstConsumer, cdiv1.PendingPopulation: continue case cdiv1.Failed: c.recorder.Eventf(vm, k8score.EventTypeWarning, controller.FailedDataVolumeImportReason, "DataVolume %s failed to import disk image", curDataVolume.Name) case cdiv1.Pending: if err := storagetypes.HasDataVolumeExceededQuotaError(curDataVolume); err != nil { c.recorder.Eventf(vm, k8score.EventTypeWarning, controller.FailedDataVolumeImportReason, "DataVolume %s exceeds quota limits", curDataVolume.Name) return false, err } } // ready = false because encountered DataVolume that is not populated yet ready = false } } return ready, nil } func (c *Controller) VMICPUsPatch(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { patchSet := patch.New( patch.WithTest("/spec/domain/cpu/sockets", vmi.Spec.Domain.CPU.Sockets), patch.WithReplace("/spec/domain/cpu/sockets", vm.Spec.Template.Spec.Domain.CPU.Sockets), ) vcpusDelta := hardware.GetNumberOfVCPUs(vm.Spec.Template.Spec.Domain.CPU) - hardware.GetNumberOfVCPUs(vmi.Spec.Domain.CPU) resourcesDelta := resource.NewMilliQuantity(vcpusDelta*int64(1000/c.clusterConfig.GetCPUAllocationRatio()), resource.DecimalSI) logMsg := fmt.Sprintf("hotplugging cpu to %v sockets", vm.Spec.Template.Spec.Domain.CPU.Sockets) if !vm.Spec.Template.Spec.Domain.Resources.Requests.Cpu().IsZero() { newCpuReq := vmi.Spec.Domain.Resources.Requests.Cpu().DeepCopy() newCpuReq.Add(*resourcesDelta) patchSet.AddOption( patch.WithTest("/spec/domain/resources/requests/cpu", vmi.Spec.Domain.Resources.Requests.Cpu().String()), patch.WithReplace("/spec/domain/resources/requests/cpu", newCpuReq.String()), ) logMsg = fmt.Sprintf("%s, setting requests to %s", logMsg, newCpuReq.String()) } if !vm.Spec.Template.Spec.Domain.Resources.Limits.Cpu().IsZero() { newCpuLimit := vmi.Spec.Domain.Resources.Limits.Cpu().DeepCopy() newCpuLimit.Add(*resourcesDelta) patchSet.AddOption( patch.WithTest("/spec/domain/resources/limits/cpu", vmi.Spec.Domain.Resources.Limits.Cpu().String()), patch.WithReplace("/spec/domain/resources/limits/cpu", newCpuLimit.String()), ) logMsg = fmt.Sprintf("%s, setting limits to %s", logMsg, newCpuLimit.String()) } patchBytes, err := patchSet.GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err == nil { log.Log.Object(vmi).Infof(logMsg) } return err } func (c *Controller) handleCPUChangeRequest(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil || vmi.DeletionTimestamp != nil { return nil } vmCopyWithInstancetype := vm.DeepCopy() if err := c.instancetypeController.ApplyToVM(vmCopyWithInstancetype); err != nil { return err } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.CPU == nil || vmi.Spec.Domain.CPU == nil { return nil } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.CPU.Sockets == vmi.Spec.Domain.CPU.Sockets { return nil } vmiConditions := controller.NewVirtualMachineInstanceConditionManager() if vmiConditions.HasConditionWithStatus(vmi, virtv1.VirtualMachineInstanceVCPUChange, k8score.ConditionTrue) { return fmt.Errorf("another CPU hotplug is in progress") } if migrations.IsMigrating(vmi) { return fmt.Errorf("CPU hotplug is not allowed while VMI is migrating") } // If the following is true, MaxSockets was calculated, not manually specified (or the validation webhook would have rejected the change). // Since we're here, we can also assume MaxSockets was not changed in the VM spec since last boot. // Therefore, bumping Sockets to a value higher than MaxSockets is fine, it just requires a reboot. if vmCopyWithInstancetype.Spec.Template.Spec.Domain.CPU.Sockets > vmi.Spec.Domain.CPU.MaxSockets { setRestartRequired(vm, "CPU sockets updated in template spec to a value higher than what's available") return nil } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.CPU.Sockets < vmi.Spec.Domain.CPU.Sockets { setRestartRequired(vm, "Reduction of CPU socket count requires a restart") return nil } networkInterfaceMultiQueue := vmCopyWithInstancetype.Spec.Template.Spec.Domain.Devices.NetworkInterfaceMultiQueue if networkInterfaceMultiQueue != nil && *networkInterfaceMultiQueue { setRestartRequired(vm, "Changes to CPU sockets require a restart when NetworkInterfaceMultiQueue is enabled") return nil } if err := c.VMICPUsPatch(vmCopyWithInstancetype, vmi); err != nil { log.Log.Object(vmi).Errorf("unable to patch vmi to add cpu topology status: %v", err) return err } return nil } func (c *Controller) VMNodeSelectorPatch(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { patchset := patch.New() if vm.Spec.Template.Spec.NodeSelector != nil { vmNodeSelector := maps.Clone(vm.Spec.Template.Spec.NodeSelector) if vmNodeSelector == nil { vmNodeSelector = make(map[string]string) } if vmi.Spec.NodeSelector == nil { patchset.AddOption(patch.WithAdd("/spec/nodeSelector", vmNodeSelector)) } else { patchset.AddOption( patch.WithTest("/spec/nodeSelector", vmi.Spec.NodeSelector), patch.WithReplace("/spec/nodeSelector", vmNodeSelector)) } } else { patchset.AddOption(patch.WithRemove("/spec/nodeSelector")) } generatedPatch, err := patchset.GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, generatedPatch, metav1.PatchOptions{}) return err } func (c *Controller) VMIAffinityPatch(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { patchset := patch.New() if vm.Spec.Template.Spec.Affinity != nil { if vmi.Spec.Affinity == nil { patchset.AddOption(patch.WithAdd("/spec/affinity", vm.Spec.Template.Spec.Affinity)) } else { patchset.AddOption( patch.WithTest("/spec/affinity", vmi.Spec.Affinity), patch.WithReplace("/spec/affinity", vm.Spec.Template.Spec.Affinity)) } } else { patchset.AddOption(patch.WithRemove("/spec/affinity")) } generatedPatch, err := patchset.GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, generatedPatch, metav1.PatchOptions{}) return err } func (c *Controller) vmiTolerationsPatch(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { patchset := patch.New() if vm.Spec.Template.Spec.Tolerations != nil { if vmi.Spec.Tolerations == nil { patchset.AddOption(patch.WithAdd("/spec/tolerations", vm.Spec.Template.Spec.Tolerations)) } else { patchset.AddOption( patch.WithTest("/spec/tolerations", vmi.Spec.Tolerations), patch.WithReplace("/spec/tolerations", vm.Spec.Template.Spec.Tolerations)) } } else { patchset.AddOption(patch.WithRemove("/spec/tolerations")) } generatedPatch, err := patchset.GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, generatedPatch, metav1.PatchOptions{}) return err } func (c *Controller) handleTolerationsChangeRequest(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil || vmi.DeletionTimestamp != nil { return nil } vmCopyWithInstancetype := vm.DeepCopy() if err := c.instancetypeController.ApplyToVM(vmCopyWithInstancetype); err != nil { return err } if equality.Semantic.DeepEqual(vmCopyWithInstancetype.Spec.Template.Spec.Tolerations, vmi.Spec.Tolerations) { return nil } if migrations.IsMigrating(vmi) { return fmt.Errorf("tolerations should not be changed during VMI migration") } if err := c.vmiTolerationsPatch(vmCopyWithInstancetype, vmi); err != nil { log.Log.Object(vmi).Errorf("unable to patch vmi to update tolerations: %v", err) return err } return nil } func (c *Controller) handleAffinityChangeRequest(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil || vmi.DeletionTimestamp != nil { return nil } vmCopyWithInstancetype := vm.DeepCopy() if err := c.instancetypeController.ApplyToVM(vmCopyWithInstancetype); err != nil { return err } hasNodeSelectorChanged := !equality.Semantic.DeepEqual(vmCopyWithInstancetype.Spec.Template.Spec.NodeSelector, vmi.Spec.NodeSelector) hasNodeAffinityChanged := !equality.Semantic.DeepEqual(vmCopyWithInstancetype.Spec.Template.Spec.Affinity, vmi.Spec.Affinity) if migrations.IsMigrating(vmi) && (hasNodeSelectorChanged || hasNodeAffinityChanged) { return fmt.Errorf("Node affinity should not be changed during VMI migration") } if hasNodeAffinityChanged { if err := c.VMIAffinityPatch(vmCopyWithInstancetype, vmi); err != nil { log.Log.Object(vmi).Errorf("unable to patch vmi to update node affinity: %v", err) return err } } if hasNodeSelectorChanged { if err := c.VMNodeSelectorPatch(vmCopyWithInstancetype, vmi); err != nil { log.Log.Object(vmi).Errorf("unable to patch vmi to update node selector: %v", err) return err } } return nil } func (c *Controller) handleVolumeRequests(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if len(vm.Status.VolumeRequests) == 0 { return nil } vmiVolumeMap := make(map[string]virtv1.Volume) if vmi != nil { for _, volume := range vmi.Spec.Volumes { vmiVolumeMap[volume.Name] = volume } } for i, request := range vm.Status.VolumeRequests { vm.Spec.Template.Spec = *controller.ApplyVolumeRequestOnVMISpec(&vm.Spec.Template.Spec, &vm.Status.VolumeRequests[i]) if vmi == nil || vmi.DeletionTimestamp != nil { continue } if request.AddVolumeOptions != nil { if _, exists := vmiVolumeMap[request.AddVolumeOptions.Name]; exists { continue } if err := c.clientset.VirtualMachineInstance(vmi.Namespace).AddVolume(context.Background(), vmi.Name, request.AddVolumeOptions); err != nil { return err } } else if request.RemoveVolumeOptions != nil { if _, exists := vmiVolumeMap[request.RemoveVolumeOptions.Name]; !exists { continue } if err := c.clientset.VirtualMachineInstance(vmi.Namespace).RemoveVolume(context.Background(), vmi.Name, request.RemoveVolumeOptions); err != nil { return err } } } return nil } func (c *Controller) handleVolumeUpdateRequest(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil { return nil } // The pull policy for container disks are only set on the VMI spec and not on the VM spec. // In order to correctly compare the volumes set, we need to set the pull policy on the VM spec as well. vmCopy := vm.DeepCopy() volsVMI := storagetypes.GetVolumesByName(&vmi.Spec) for i, volume := range vmCopy.Spec.Template.Spec.Volumes { vmiVol, ok := volsVMI[volume.Name] if !ok { continue } if vmiVol.ContainerDisk != nil { vmCopy.Spec.Template.Spec.Volumes[i].ContainerDisk.ImagePullPolicy = vmiVol.ContainerDisk.ImagePullPolicy } } hotplugOp := false volsVM := storagetypes.GetVolumesByName(&vmCopy.Spec.Template.Spec) for _, volume := range vmi.Spec.Volumes { hotpluggableVol := (volume.VolumeSource.PersistentVolumeClaim != nil && volume.VolumeSource.PersistentVolumeClaim.Hotpluggable) || (volume.VolumeSource.DataVolume != nil && volume.VolumeSource.DataVolume.Hotpluggable) _, ok := volsVM[volume.Name] if !ok && hotpluggableVol { hotplugOp = true } } if hotplugOp { return nil } if equality.Semantic.DeepEqual(vmi.Spec.Volumes, vmCopy.Spec.Template.Spec.Volumes) { return nil } vmConditions := controller.NewVirtualMachineConditionManager() // Abort the volume migration if any of the previous migrated volumes // has changed if volMigAbort, err := volumemig.VolumeMigrationCancel(c.clientset, vmi, vm); volMigAbort { if err == nil { log.Log.Object(vm).Infof("Cancel volume migration") } return err } switch { case vm.Spec.UpdateVolumesStrategy == nil || *vm.Spec.UpdateVolumesStrategy == virtv1.UpdateVolumesStrategyReplacement: if !vmConditions.HasCondition(vm, virtv1.VirtualMachineRestartRequired) { log.Log.Object(vm).Infof("Set restart required condition because of a volumes update") setRestartRequired(vm, "the volumes replacement is effective only after restart") } case *vm.Spec.UpdateVolumesStrategy == virtv1.UpdateVolumesStrategyMigration: // Validate if the update volumes can be migrated if err := volumemig.ValidateVolumes(vmi, vm, c.dataVolumeStore, c.pvcStore); err != nil { log.Log.Object(vm).Errorf("cannot migrate the VM. Volumes are invalid: %v", err) setRestartRequired(vm, err.Error()) return nil } migVols, err := volumemig.GenerateMigratedVolumes(c.pvcStore, vmi, vm) if err != nil { log.Log.Object(vm).Errorf("failed to generate the migrating volumes for vm: %v", err) return err } if err := volumemig.ValidateVolumesUpdateMigration(vmi, vm, migVols); err != nil { log.Log.Object(vm).Errorf("cannot migrate the VMI: %v", err) setRestartRequired(vm, err.Error()) return nil } if err := volumemig.PatchVMIStatusWithMigratedVolumes(c.clientset, migVols, vmi); err != nil { log.Log.Object(vm).Errorf("failed to update migrating volumes for vmi:%v", err) return err } log.Log.Object(vm).Infof("Updated migrating volumes in the status") if _, err := volumemig.PatchVMIVolumes(c.clientset, vmi, vm); err != nil { log.Log.Object(vm).Errorf("failed to update volumes for vmi:%v", err) return err } log.Log.Object(vm).Infof("Updated volumes for vmi") if vm.Status.VolumeUpdateState == nil { vm.Status.VolumeUpdateState = &virtv1.VolumeUpdateState{} } if len(migVols) > 0 { vm.Status.VolumeUpdateState.VolumeMigrationState = &virtv1.VolumeMigrationState{ MigratedVolumes: migVols, } } default: return fmt.Errorf("updateVolumes strategy not recognized: %s", *vm.Spec.UpdateVolumesStrategy) } return nil } func (c *Controller) addStartRequest(vm *virtv1.VirtualMachine) error { desiredStateChangeRequests := append(vm.Status.StateChangeRequests, virtv1.VirtualMachineStateChangeRequest{Action: virtv1.StartRequest}) patchSet := patch.New() patchSet.AddOption(patch.WithAdd("/status/stateChangeRequests", desiredStateChangeRequests)) patchBytes, err := patchSet.GeneratePayload() if err != nil { return err } patchedVM, err := c.clientset.VirtualMachine(vm.Namespace).PatchStatus(context.Background(), vm.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return err } vm.Status = patchedVM.Status return nil } func (c *Controller) syncRunStrategy(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance, runStrategy virtv1.VirtualMachineRunStrategy) (*virtv1.VirtualMachine, common.SyncError) { vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vm).Errorf(fetchingVMKeyErrFmt, err) return vm, common.NewSyncError(err, failedCreateReason) } log.Log.Object(vm).V(4).Infof("VirtualMachine RunStrategy: %s", runStrategy) switch runStrategy { case virtv1.RunStrategyAlways: // For this RunStrategy, a VMI should always be running. If a StateChangeRequest // asks to stop a VMI, a new one must be immediately re-started. if vmi != nil { var forceRestart bool if forceRestart = hasStopRequestForVMI(vm, vmi); forceRestart { log.Log.Object(vm).Infof("processing forced restart request for VMI with phase %s and VM runStrategy: %s", vmi.Status.Phase, runStrategy) } if forceRestart || vmi.IsFinal() { log.Log.Object(vm).Infof("%s with VMI in phase %s and VM runStrategy: %s", stoppingVmMsg, vmi.Status.Phase, runStrategy) // The VirtualMachineInstance can fail or be finished. The job of this controller // is keep the VirtualMachineInstance running, therefore it restarts it. // restarting VirtualMachineInstance by stopping it and letting it start in next step log.Log.Object(vm).V(4).Info(stoppingVmMsg) vm, err = c.stopVMI(vm, vmi) if err != nil { log.Log.Object(vm).Errorf(failureDeletingVmiErrFormat, err) return vm, common.NewSyncError(fmt.Errorf(failureDeletingVmiErrFormat, err), vmiFailedDeleteReason) } // return to let the controller pick up the expected deletion } // VirtualMachineInstance is OK no need to do anything return vm, nil } timeLeft := startFailureBackoffTimeLeft(vm) if timeLeft > 0 { log.Log.Object(vm).Infof("Delaying start of VM %s with 'runStrategy: %s' due to start failure backoff. Waiting %d more seconds before starting.", startingVmMsg, runStrategy, timeLeft) c.Queue.AddAfter(vmKey, time.Duration(timeLeft)*time.Second) return vm, nil } log.Log.Object(vm).Infof("%s due to runStrategy: %s", startingVmMsg, runStrategy) vm, err = c.startVMI(vm) if err != nil { return vm, common.NewSyncError(fmt.Errorf(startingVMIFailureFmt, err), failedCreateReason) } return vm, nil case virtv1.RunStrategyRerunOnFailure: // For this RunStrategy, a VMI should only be restarted if it failed. // If a VMI enters the Succeeded phase, it should not be restarted. if vmi != nil { forceStop := hasStopRequestForVMI(vm, vmi) if forceStop { log.Log.Object(vm).Infof("processing stop request for VMI with phase %s and VM runStrategy: %s", vmi.Status.Phase, runStrategy) } vmiFailed := vmi.Status.Phase == virtv1.Failed vmiSucceeded := vmi.Status.Phase == virtv1.Succeeded if vmi.DeletionTimestamp == nil && (forceStop || vmiFailed || vmiSucceeded) { // For RerunOnFailure, this controller should only restart the VirtualMachineInstance if it failed. log.Log.Object(vm).Infof("%s with VMI in phase %s and VM runStrategy: %s", stoppingVmMsg, vmi.Status.Phase, runStrategy) vm, err = c.stopVMI(vm, vmi) if err != nil { log.Log.Object(vm).Errorf(failureDeletingVmiErrFormat, err) return vm, common.NewSyncError(fmt.Errorf(failureDeletingVmiErrFormat, err), vmiFailedDeleteReason) } if vmiFailed { if err := c.addStartRequest(vm); err != nil { return vm, common.NewSyncError(fmt.Errorf("failed to patch VM with start action: %v", err), vmiFailedDeleteReason) } } } // return to let the controller pick up the expected deletion return vm, nil } // when coming here from a different RunStrategy we have to start the VM if !hasStartRequest(vm) && vm.Status.RunStrategy == runStrategy { return vm, nil } timeLeft := startFailureBackoffTimeLeft(vm) if timeLeft > 0 { log.Log.Object(vm).Infof("Delaying start of VM %s with 'runStrategy: %s' due to start failure backoff. Waiting %d more seconds before starting.", startingVmMsg, runStrategy, timeLeft) c.Queue.AddAfter(vmKey, time.Duration(timeLeft)*time.Second) return vm, nil } log.Log.Object(vm).Infof("%s due to runStrategy: %s", startingVmMsg, runStrategy) vm, err = c.startVMI(vm) if err != nil { return vm, common.NewSyncError(fmt.Errorf(startingVMIFailureFmt, err), failedCreateReason) } return vm, nil case virtv1.RunStrategyManual: // For this RunStrategy, VMI's will be started/stopped/restarted using api endpoints only if vmi != nil { log.Log.Object(vm).V(4).Info("VMI exists") if forceStop := hasStopRequestForVMI(vm, vmi); forceStop { log.Log.Object(vm).Infof("%s with VMI in phase %s due to stop request and VM runStrategy: %s", vmi.Status.Phase, stoppingVmMsg, runStrategy) vm, err = c.stopVMI(vm, vmi) if err != nil { log.Log.Object(vm).Errorf(failureDeletingVmiErrFormat, err) return vm, common.NewSyncError(fmt.Errorf(failureDeletingVmiErrFormat, err), vmiFailedDeleteReason) } // return to let the controller pick up the expected deletion return vm, nil } } else { if hasStartRequest(vm) { log.Log.Object(vm).Infof("%s due to start request and runStrategy: %s", startingVmMsg, runStrategy) vm, err = c.startVMI(vm) if err != nil { return vm, common.NewSyncError(fmt.Errorf(startingVMIFailureFmt, err), failedCreateReason) } } } return vm, nil case virtv1.RunStrategyHalted: // For this runStrategy, no VMI should be running under any circumstances. // Set RunStrategyAlways/running = true if VM has StartRequest(start paused case). if vmi == nil { if hasStartRequest(vm) { vmCopy := vm.DeepCopy() runStrategy := virtv1.RunStrategyAlways running := true if vmCopy.Spec.RunStrategy != nil { vmCopy.Spec.RunStrategy = &runStrategy } else { vmCopy.Spec.Running = &running } _, err := c.clientset.VirtualMachine(vmCopy.Namespace).Update(context.Background(), vmCopy, metav1.UpdateOptions{}) return vm, common.NewSyncError(fmt.Errorf(startingVMIFailureFmt, err), failedCreateReason) } return vm, nil } log.Log.Object(vm).Infof("%s with VMI in phase %s due to runStrategy: %s", stoppingVmMsg, vmi.Status.Phase, runStrategy) vm, err = c.stopVMI(vm, vmi) if err != nil { return vm, common.NewSyncError(fmt.Errorf(failureDeletingVmiErrFormat, err), vmiFailedDeleteReason) } return vm, nil case virtv1.RunStrategyOnce: if vmi == nil { log.Log.Object(vm).Infof("%s due to start request and runStrategy: %s", startingVmMsg, runStrategy) vm, err = c.startVMI(vm) if err != nil { return vm, common.NewSyncError(fmt.Errorf(startingVMIFailureFmt, err), failedCreateReason) } } return vm, nil default: return vm, common.NewSyncError(fmt.Errorf("unknown runstrategy: %s", runStrategy), failedCreateReason) } } // isVMIStartExpected determines whether a VMI is expected to be started for this VM. func (c *Controller) isVMIStartExpected(vm *virtv1.VirtualMachine) bool { vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vm).Errorf(fetchingVMKeyErrFmt, err) return false } expectations, exists, _ := c.expectations.GetExpectations(vmKey) if !exists || expectations == nil { return false } adds, _ := expectations.GetExpectations() return adds > 0 } // isVMIStopExpected determines whether a VMI is expected to be stopped for this VM. func (c *Controller) isVMIStopExpected(vm *virtv1.VirtualMachine) bool { vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vm).Errorf(fetchingVMKeyErrFmt, err) return false } expectations, exists, _ := c.expectations.GetExpectations(vmKey) if !exists || expectations == nil { return false } _, dels := expectations.GetExpectations() return dels > 0 } // isSetToStart determines whether a VM is configured to be started (running). func isSetToStart(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { runStrategy, err := vm.RunStrategy() if err != nil { log.Log.Object(vm).Errorf(fetchingRunStrategyErrFmt, err) return false } switch runStrategy { case virtv1.RunStrategyAlways: return true case virtv1.RunStrategyHalted: return false case virtv1.RunStrategyManual: if vmi != nil { return !hasStopRequestForVMI(vm, vmi) } return hasStartRequest(vm) case virtv1.RunStrategyRerunOnFailure: if vmi != nil { return vmi.Status.Phase != virtv1.Succeeded } return true case virtv1.RunStrategyOnce: if vmi == nil { return true } return false default: // Shouldn't ever be here, but... return false } } func (c *Controller) cleanupRestartRequired(vm *virtv1.VirtualMachine) (*virtv1.VirtualMachine, error) { vmConditionManager := controller.NewVirtualMachineConditionManager() if vmConditionManager.HasCondition(vm, virtv1.VirtualMachineRestartRequired) { vmConditionManager.RemoveCondition(vm, virtv1.VirtualMachineRestartRequired) } return vm, c.deleteVMRevisions(vm) } func (c *Controller) startVMI(vm *virtv1.VirtualMachine) (*virtv1.VirtualMachine, error) { ready, err := c.handleDataVolumes(vm) if err != nil { return vm, err } if !ready { log.Log.Object(vm).V(4).Info("Waiting for DataVolumes to be created, delaying start") return vm, nil } if controller.NewVirtualMachineConditionManager().HasConditionWithStatus(vm, virtv1.VirtualMachineManualRecoveryRequired, k8score.ConditionTrue) { log.Log.Object(vm).Reason(err).Error(failedManualRecoveryRequiredCondSetErrMsg) return vm, nil } // TODO add check for existence vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vm).Reason(err).Error(failedExtractVmkeyFromVmErrMsg) return vm, nil } vm, err = c.cleanupRestartRequired(vm) if err != nil { log.Log.Object(vm).Reason(err).Error(failedCleanupRestartRequired) return vm, err } // start it vmi := c.setupVMIFromVM(vm) vmRevisionName, err := c.createVMRevision(vm) if err != nil { log.Log.Object(vm).Reason(err).Error(failedCreateCRforVmErrMsg) return vm, err } vmi.Status.VirtualMachineRevisionName = vmRevisionName setGenerationAnnotationOnVmi(vm.Generation, vmi) // add a finalizer to ensure the VM controller has a chance to see // the VMI before it is deleted vmi.Finalizers = append(vmi.Finalizers, virtv1.VirtualMachineControllerFinalizer) // We need to apply device preferences before any new network or input devices are added. Doing so allows // any autoAttach preferences we might have to be applied, either enabling or disabling the attachment of these devices. if err := c.instancetypeController.ApplyDevicePreferences(vm, vmi); err != nil { log.Log.Object(vm).Infof("Failed to apply device preferences again to VirtualMachineInstance: %s/%s", vmi.Namespace, vmi.Name) c.recorder.Eventf(vm, k8score.EventTypeWarning, common.FailedCreateVirtualMachineReason, "Error applying device preferences again: %v", err) return vm, err } util.SetDefaultVolumeDisk(&vmi.Spec) autoAttachInputDevice(vmi) err = vmispec.SetDefaultNetworkInterface(c.clusterConfig, &vmi.Spec) if err != nil { return vm, err } if err = c.instancetypeController.ApplyToVMI(vm, vmi); err != nil { log.Log.Object(vm).Infof("Failed to apply instancetype to VirtualMachineInstance: %s/%s", vmi.Namespace, vmi.Name) c.recorder.Eventf(vm, k8score.EventTypeWarning, common.FailedCreateVirtualMachineReason, "Error creating virtual machine instance: Failed to apply instancetype: %v", err) return vm, err } netValidator := netadmitter.NewValidator(k8sfield.NewPath("spec"), &vmi.Spec, c.clusterConfig) var validateErrors []error for _, cause := range netValidator.ValidateCreation() { validateErrors = append(validateErrors, errors.New(cause.String())) } if validateErr := errors.Join(validateErrors...); validateErrors != nil { return vm, fmt.Errorf("failed create validation: %v", validateErr) } c.expectations.ExpectCreations(vmKey, 1) vmi, err = c.clientset.VirtualMachineInstance(vm.ObjectMeta.Namespace).Create(context.Background(), vmi, metav1.CreateOptions{}) if err != nil { log.Log.Object(vm).Infof("Failed to create VirtualMachineInstance: %s", controller.NamespacedKey(vmi.Namespace, vmi.Name)) c.expectations.CreationObserved(vmKey) c.recorder.Eventf(vm, k8score.EventTypeWarning, common.FailedCreateVirtualMachineReason, "Error creating virtual machine instance: %v", err) return vm, err } log.Log.Object(vm).Infof("Started VM by creating the new virtual machine instance %s", vmi.Name) c.recorder.Eventf(vm, k8score.EventTypeNormal, common.SuccessfulCreateVirtualMachineReason, "Started the virtual machine by creating the new virtual machine instance %v", vmi.ObjectMeta.Name) return vm, nil } func setGenerationAnnotation(generation int64, annotations map[string]string) { annotations[virtv1.VirtualMachineGenerationAnnotation] = strconv.FormatInt(generation, 10) } func setGenerationAnnotationOnVmi(generation int64, vmi *virtv1.VirtualMachineInstance) { annotations := vmi.GetAnnotations() if annotations == nil { annotations = make(map[string]string) } setGenerationAnnotation(generation, annotations) vmi.SetAnnotations(annotations) } func (c *Controller) patchVmGenerationAnnotationOnVmi(generation int64, vmi *virtv1.VirtualMachineInstance) (*virtv1.VirtualMachineInstance, error) { oldAnnotations := vmi.Annotations newAnnotations := map[string]string{} maps.Copy(newAnnotations, oldAnnotations) setGenerationAnnotation(generation, newAnnotations) patchBytes, err := patch.New( patch.WithTest("/metadata/annotations", oldAnnotations), patch.WithReplace("/metadata/annotations", newAnnotations)).GeneratePayload() if err != nil { return vmi, err } patchedVMI, err := c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return vmi, err } return patchedVMI, nil } // getGenerationAnnotation will return the generation annotation on the // vmi as an string pointer. The string pointer will be nil if the annotation is // not found. func getGenerationAnnotation(vmi *virtv1.VirtualMachineInstance) (i *string, err error) { if vmi == nil { return nil, errors.New("received nil pointer for vmi") } currentGenerationAnnotation, found := vmi.Annotations[virtv1.VirtualMachineGenerationAnnotation] if found { return ¤tGenerationAnnotation, nil } return nil, nil } // getGenerationAnnotation will return the generation annotation on the // vmi as an int64 pointer. The int64 pointer will be nil if the annotation is // not found. func getGenerationAnnotationAsInt(vmi *virtv1.VirtualMachineInstance, logger *log.FilteredLogger) (i *int64, err error) { if vmi == nil { return nil, errors.New("received nil pointer for vmi") } currentGenerationAnnotation, found := vmi.Annotations[virtv1.VirtualMachineGenerationAnnotation] if found { i, err := strconv.ParseInt(currentGenerationAnnotation, 10, 64) if err != nil { // If there is an error during parsing, it will be treated as if the // annotation does not exist since the annotation is not formatted // correctly. Further iterations / logic in the controller will handle // re-annotating this by the controller revision. Still log the error for // debugging, since there should never be a ParseInt error during normal // use. logger.Reason(err).Errorf("Failed to parse virtv1.VirtualMachineGenerationAnnotation as an int from vmi %v annotations", vmi.Name) return nil, nil } return &i, nil } return nil, nil } // Follows the template used in createVMRevision for the Data.Raw value type VirtualMachineRevisionData struct { Spec virtv1.VirtualMachineSpec `json:"spec"` } // conditionallyBumpGenerationAnnotationOnVmi will check whether the // generation annotation needs to be bumped on the VMI, and then bump that // annotation if needed. The checks are: // 1. If the generation has not changed, do not bump. // 2. Only bump if the templates are the same. // // Note that if only the Run Strategy of the VM has changed, the generaiton // annotation will still be bumped, since this does not affect the VMI. func (c *Controller) conditionallyBumpGenerationAnnotationOnVmi(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) (*virtv1.VirtualMachineInstance, error) { if vmi == nil || vm == nil { return vmi, nil } // If this is an old vmi created before a controller update, then the // annotation may not exist. In that case, continue on as if the generation // annotation needs to be bumped. currentGeneration, err := getGenerationAnnotation(vmi) if err != nil { return vmi, err } if currentGeneration != nil && *currentGeneration == strconv.FormatInt(vm.Generation, 10) { return vmi, nil } currentRevision, err := c.getControllerRevision(vmi.Namespace, vmi.Status.VirtualMachineRevisionName) if currentRevision == nil || err != nil { return vmi, err } revisionSpec := &VirtualMachineRevisionData{} if err = json.Unmarshal(currentRevision.Data.Raw, revisionSpec); err != nil { return vmi, err } // If the templates are the same, we can safely bump the annotation. if equality.Semantic.DeepEqual(revisionSpec.Spec.Template, vm.Spec.Template) { patchedVMI, err := c.patchVmGenerationAnnotationOnVmi(vm.Generation, vmi) if err != nil { return vmi, err } vmi = patchedVMI } return vmi, nil } // Returns in seconds how long to wait before trying to start the VM again. func calculateStartBackoffTime(failCount int, maxDelay int) int { // The algorithm is designed to work well with a dynamic maxDelay // if we decide to expose this as a tuning in the future. minInterval := 10 delaySeconds := 0 if failCount <= 0 { failCount = 1 } multiplier := int(math.Pow(float64(failCount), float64(2))) interval := maxDelay / 30 if interval < minInterval { interval = minInterval } delaySeconds = interval * multiplier randomRange := (delaySeconds / 2) + 1 // add randomized seconds to offset multiple failing VMs from one another delaySeconds += rand.Intn(randomRange) if delaySeconds > maxDelay { delaySeconds = maxDelay } return delaySeconds } // Reports if vmi has ever hit a running state func wasVMIInRunningPhase(vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil { return false } for _, ts := range vmi.Status.PhaseTransitionTimestamps { if ts.Phase == virtv1.Running { return true } } return false } // Reports if vmi failed before ever hitting a running state func vmiFailedEarly(vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil || !vmi.IsFinal() { return false } if wasVMIInRunningPhase(vmi) { return false } return true } // clear start failure tracking if... // 1. VMI exists and ever hit running phase // 2. run strategy is not set to automatically restart failed VMIs func shouldClearStartFailure(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if wasVMIInRunningPhase(vmi) { return true } runStrategy, err := vm.RunStrategy() if err != nil { log.Log.Object(vm).Errorf(fetchingRunStrategyErrFmt, err) return false } if runStrategy != virtv1.RunStrategyAlways && runStrategy != virtv1.RunStrategyRerunOnFailure && runStrategy != virtv1.RunStrategyOnce { return true } return false } func startFailureBackoffTimeLeft(vm *virtv1.VirtualMachine) int64 { if vm.Status.StartFailure == nil { return 0 } now := time.Now().UTC().Unix() retryAfter := vm.Status.StartFailure.RetryAfterTimestamp.Time.UTC().Unix() diff := retryAfter - now if diff > 0 { return diff } return 0 } func syncStartFailureStatus(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) { if shouldClearStartFailure(vm, vmi) { // if a vmi associated with the vm hits a running phase, then reset the start failure counter vm.Status.StartFailure = nil } else if vmi != nil && vmiFailedEarly(vmi) { // if the VMI failed without ever hitting running successfully, // record this as a start failure so we can back off retrying if vm.Status.StartFailure != nil && vm.Status.StartFailure.LastFailedVMIUID == vmi.UID { // already counted this failure return } count := 1 if vm.Status.StartFailure != nil { count = vm.Status.StartFailure.ConsecutiveFailCount + 1 } now := metav1.NewTime(time.Now()) delaySeconds := calculateStartBackoffTime(count, defaultMaxCrashLoopBackoffDelaySeconds) retryAfter := metav1.NewTime(now.Time.Add(time.Duration(int64(delaySeconds)) * time.Second)) vm.Status.StartFailure = &virtv1.VirtualMachineStartFailure{ LastFailedVMIUID: vmi.UID, RetryAfterTimestamp: &retryAfter, ConsecutiveFailCount: count, } } } func syncVolumeMigration(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) { if vm.Status.VolumeUpdateState == nil || vm.Status.VolumeUpdateState.VolumeMigrationState == nil { return } vmCond := controller.NewVirtualMachineConditionManager() vmiCond := controller.NewVirtualMachineInstanceConditionManager() // Check if the volumes have been recovered and point to the original ones srcMigVols := make(map[string]string) for _, v := range vm.Status.VolumeUpdateState.VolumeMigrationState.MigratedVolumes { if v.SourcePVCInfo != nil { srcMigVols[v.VolumeName] = v.SourcePVCInfo.ClaimName } } recoveredOldVMVolumes := true for _, v := range vm.Spec.Template.Spec.Volumes { name := storagetypes.PVCNameFromVirtVolume(&v) origName, ok := srcMigVols[v.Name] if !ok { continue } if origName != name { recoveredOldVMVolumes = false } } if recoveredOldVMVolumes || (vm.Spec.UpdateVolumesStrategy == nil || *vm.Spec.UpdateVolumesStrategy != virtv1.UpdateVolumesStrategyMigration) { vm.Status.VolumeUpdateState.VolumeMigrationState = nil // Clean-up the volume change label when the volume set has been restored vmCond.RemoveCondition(vm, virtv1.VirtualMachineConditionType(virtv1.VirtualMachineInstanceVolumesChange)) vmCond.RemoveCondition(vm, virtv1.VirtualMachineManualRecoveryRequired) return } if vmi == nil || vmi.IsFinal() { if vmCond.HasConditionWithStatus(vm, virtv1.VirtualMachineConditionType(virtv1.VirtualMachineInstanceVolumesChange), k8score.ConditionTrue) { // Something went wrong with the VMI while the volume migration was in progress vmCond.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineManualRecoveryRequired, Status: k8score.ConditionTrue, Reason: "VMI was removed or was final during the volume migration", }) } return } // The volume migration has been cancelled if cond := vmiCond.GetCondition(vmi, virtv1.VirtualMachineInstanceVolumesChange); cond != nil && cond.Status == k8score.ConditionFalse && cond.Reason == virtv1.VirtualMachineInstanceReasonVolumesChangeCancellation { vm.Status.VolumeUpdateState.VolumeMigrationState = nil } } // here is stop func (c *Controller) stopVMI(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) (*virtv1.VirtualMachine, error) { if vmi == nil || vmi.DeletionTimestamp != nil { // nothing to do return vm, nil } vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vm).Reason(err).Error(failedExtractVmkeyFromVmErrMsg) return vm, nil } // stop it c.expectations.ExpectDeletions(vmKey, []string{controller.VirtualMachineInstanceKey(vmi)}) err = c.clientset.VirtualMachineInstance(vm.ObjectMeta.Namespace).Delete(context.Background(), vmi.ObjectMeta.Name, metav1.DeleteOptions{}) // Don't log an error if it is already deleted if err != nil { // We can't observe a delete if it was not accepted by the server c.expectations.DeletionObserved(vmKey, controller.VirtualMachineInstanceKey(vmi)) c.recorder.Eventf(vm, k8score.EventTypeWarning, common.FailedDeleteVirtualMachineReason, "Error deleting virtual machine instance %s: %v", vmi.ObjectMeta.Name, err) return vm, err } vm, err = c.cleanupRestartRequired(vm) if err != nil { log.Log.Object(vm).Reason(err).Error(failedCleanupRestartRequired) return vm, nil } c.recorder.Eventf(vm, k8score.EventTypeNormal, common.SuccessfulDeleteVirtualMachineReason, "Stopped the virtual machine by deleting the virtual machine instance %v", vmi.ObjectMeta.UID) log.Log.Object(vm).Infof("Dispatching delete event for vmi %s with phase %s", controller.NamespacedKey(vmi.Namespace, vmi.Name), vmi.Status.Phase) return vm, nil } func popStateChangeRequest(vm *virtv1.VirtualMachine) { vm.Status.StateChangeRequests = vm.Status.StateChangeRequests[1:] } func vmRevisionName(vmUID types.UID) string { return fmt.Sprintf("revision-start-vm-%s", vmUID) } func getVMRevisionName(vmUID types.UID, generation int64) string { return fmt.Sprintf("%s-%d", vmRevisionName(vmUID), generation) } func patchVMRevision(vm *virtv1.VirtualMachine) ([]byte, error) { vmCopy := vm.DeepCopy() if revision.HasControllerRevisionRef(vmCopy.Status.InstancetypeRef) { vmCopy.Spec.Instancetype.RevisionName = vmCopy.Status.InstancetypeRef.ControllerRevisionRef.Name } if revision.HasControllerRevisionRef(vm.Status.PreferenceRef) { vmCopy.Spec.Preference.RevisionName = vm.Status.PreferenceRef.ControllerRevisionRef.Name } vmBytes, err := json.Marshal(vmCopy) if err != nil { return nil, err } var raw map[string]interface{} err = json.Unmarshal(vmBytes, &raw) if err != nil { return nil, err } objCopy := make(map[string]interface{}) spec := raw["spec"].(map[string]interface{}) objCopy["spec"] = spec patch, err := json.Marshal(objCopy) return patch, err } func (c *Controller) deleteOlderVMRevision(vm *virtv1.VirtualMachine) (bool, error) { keys, err := c.crIndexer.IndexKeys("vm", string(vm.UID)) if err != nil { return false, err } createNotNeeded := false for _, key := range keys { if !strings.Contains(key, vmRevisionName(vm.UID)) { continue } storeObj, exists, err := c.crIndexer.GetByKey(key) if !exists || err != nil { return false, err } cr, ok := storeObj.(*appsv1.ControllerRevision) if !ok { return false, fmt.Errorf("unexpected resource %+v", storeObj) } if cr.Revision == vm.ObjectMeta.Generation { createNotNeeded = true continue } err = c.clientset.AppsV1().ControllerRevisions(vm.Namespace).Delete(context.Background(), cr.Name, metav1.DeleteOptions{}) if err != nil { return false, err } } return createNotNeeded, nil } func (c *Controller) deleteVMRevisions(vm *virtv1.VirtualMachine) error { keys, err := c.crIndexer.IndexKeys("vm", string(vm.UID)) if err != nil { return err } for _, key := range keys { if !strings.Contains(key, vmRevisionName(vm.UID)) { continue } storeObj, exists, err := c.crIndexer.GetByKey(key) if !exists || err != nil { return err } cr, ok := storeObj.(*appsv1.ControllerRevision) if !ok { return fmt.Errorf("unexpected resource %+v", storeObj) } err = c.clientset.AppsV1().ControllerRevisions(vm.Namespace).Delete(context.Background(), cr.Name, metav1.DeleteOptions{}) if err != nil { return err } } return nil } // getControllerRevision attempts to get the controller revision by name and // namespace. It will return (nil, nil) if the controller revision is not found. func (c *Controller) getControllerRevision(namespace string, name string) (*appsv1.ControllerRevision, error) { cr, err := c.clientset.AppsV1().ControllerRevisions(namespace).Get(context.Background(), name, metav1.GetOptions{}) if err != nil { if apiErrors.IsNotFound(err) { return nil, nil } return nil, err } return cr, nil } func (c *Controller) getVMSpecForKey(key string) (*virtv1.VirtualMachineSpec, error) { obj, exists, err := c.crIndexer.GetByKey(key) if err != nil { return nil, err } if !exists { return nil, fmt.Errorf("could not find key %s", key) } cr, ok := obj.(*appsv1.ControllerRevision) if !ok { return nil, fmt.Errorf("unexpected resource %+v", obj) } revisionData := VirtualMachineRevisionData{} err = json.Unmarshal(cr.Data.Raw, &revisionData) if err != nil { return nil, err } return &revisionData.Spec, nil } func genFromKey(key string) (int64, error) { items := strings.Split(key, "-") genString := items[len(items)-1] return strconv.ParseInt(genString, 10, 64) } func (c *Controller) getLastVMRevisionSpec(vm *virtv1.VirtualMachine) (*virtv1.VirtualMachineSpec, error) { keys, err := c.crIndexer.IndexKeys("vm", string(vm.UID)) if err != nil { return nil, err } if len(keys) == 0 { return nil, nil } var highestGen int64 = 0 var key string for _, k := range keys { if !strings.Contains(k, vmRevisionName(vm.UID)) { continue } gen, err := genFromKey(k) if err != nil { return nil, fmt.Errorf("invalid key: %s", k) } if gen > highestGen { if key != "" { log.Log.Object(vm).Warningf("expected no more than 1 revision, found at least 2") } highestGen = gen key = k } } if key == "" { return nil, nil } return c.getVMSpecForKey(key) } func (c *Controller) createVMRevision(vm *virtv1.VirtualMachine) (string, error) { vmRevisionName := getVMRevisionName(vm.UID, vm.Generation) createNotNeeded, err := c.deleteOlderVMRevision(vm) if err != nil || createNotNeeded { return vmRevisionName, err } patch, err := patchVMRevision(vm) if err != nil { return "", err } cr := &appsv1.ControllerRevision{ ObjectMeta: metav1.ObjectMeta{ Name: vmRevisionName, Namespace: vm.Namespace, OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(vm, virtv1.VirtualMachineGroupVersionKind)}, }, Data: runtime.RawExtension{Raw: patch}, Revision: vm.ObjectMeta.Generation, } _, err = c.clientset.AppsV1().ControllerRevisions(vm.Namespace).Create(context.Background(), cr, metav1.CreateOptions{}) if err != nil { return "", err } return cr.Name, nil } // setupVMIfromVM creates a VirtualMachineInstance object from one VirtualMachine object. func (c *Controller) setupVMIFromVM(vm *virtv1.VirtualMachine) *virtv1.VirtualMachineInstance { vmi := virtv1.NewVMIReferenceFromNameWithNS(vm.ObjectMeta.Namespace, "") vmi.ObjectMeta = *vm.Spec.Template.ObjectMeta.DeepCopy() vmi.ObjectMeta.Name = vm.ObjectMeta.Name vmi.ObjectMeta.GenerateName = "" vmi.ObjectMeta.Namespace = vm.ObjectMeta.Namespace vmi.Spec = *vm.Spec.Template.Spec.DeepCopy() if hasStartPausedRequest(vm) { strategy := virtv1.StartStrategyPaused vmi.Spec.StartStrategy = &strategy } // prevent from retriggering memory dump after shutdown if memory dump is complete if memorydump.HasCompleted(vm) { vmi.Spec = *memorydump.RemoveMemoryDumpVolumeFromVMISpec(&vmi.Spec, vm.Status.MemoryDumpRequest.ClaimName) } setupStableFirmwareUUID(vm, vmi) // TODO check if vmi labels exist, and when make sure that they match. For now just override them vmi.ObjectMeta.Labels = vm.Spec.Template.ObjectMeta.Labels vmi.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ *metav1.NewControllerRef(vm, virtv1.VirtualMachineGroupVersionKind), } return vmi } func hasStartPausedRequest(vm *virtv1.VirtualMachine) bool { if len(vm.Status.StateChangeRequests) == 0 { return false } stateChange := vm.Status.StateChangeRequests[0] pausedValue, hasPaused := stateChange.Data[virtv1.StartRequestDataPausedKey] return stateChange.Action == virtv1.StartRequest && hasPaused && pausedValue == virtv1.StartRequestDataPausedTrue } func hasStartRequest(vm *virtv1.VirtualMachine) bool { if len(vm.Status.StateChangeRequests) == 0 { return false } stateChange := vm.Status.StateChangeRequests[0] return stateChange.Action == virtv1.StartRequest } func hasStopRequestForVMI(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if len(vm.Status.StateChangeRequests) == 0 { return false } stateChange := vm.Status.StateChangeRequests[0] return stateChange.Action == virtv1.StopRequest && stateChange.UID != nil && *stateChange.UID == vmi.UID } // no special meaning, randomly generated on my box. // TODO: do we want to use another constants? see examples in RFC4122 const magicUUID = "6a1a24a1-4061-4607-8bf4-a3963d0c5895" var firmwareUUIDns = uuid.MustParse(magicUUID) // setStableUUID makes sure the VirtualMachineInstance being started has a 'stable' UUID. // The UUID is 'stable' if doesn't change across reboots. func setupStableFirmwareUUID(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) { logger := log.Log.Object(vm) if vmi.Spec.Domain.Firmware == nil { vmi.Spec.Domain.Firmware = &virtv1.Firmware{} } existingUUID := vmi.Spec.Domain.Firmware.UUID if existingUUID != "" { logger.V(4).Infof("Using existing UUID '%s'", existingUUID) return } vmi.Spec.Domain.Firmware.UUID = types.UID(uuid.NewSHA1(firmwareUUIDns, []byte(vmi.ObjectMeta.Name)).String()) } // listControllerFromNamespace takes a namespace and returns all VirtualMachines // from the VirtualMachine cache which run in this namespace func (c *Controller) listControllerFromNamespace(namespace string) ([]*virtv1.VirtualMachine, error) { objs, err := c.vmIndexer.ByIndex(cache.NamespaceIndex, namespace) if err != nil { return nil, err } var vms []*virtv1.VirtualMachine for _, obj := range objs { vm := obj.(*virtv1.VirtualMachine) vms = append(vms, vm) } return vms, nil } // getMatchingControllers returns the list of VirtualMachines which matches // the labels of the VirtualMachineInstance from the listener cache. If there are no matching // controllers nothing is returned func (c *Controller) getMatchingControllers(vmi *virtv1.VirtualMachineInstance) (vms []*virtv1.VirtualMachine) { controllers, err := c.listControllerFromNamespace(vmi.ObjectMeta.Namespace) if err != nil { return nil } // TODO check owner reference, if we have an existing controller which owns this one for _, vm := range controllers { if vmi.Name == vm.Name { vms = append(vms, vm) } } return vms } // When a vmi is created, enqueue the VirtualMachine that manages it and update its expectations. func (c *Controller) addVirtualMachineInstance(obj interface{}) { vmi := obj.(*virtv1.VirtualMachineInstance) log.Log.Object(vmi).V(4).Info("VirtualMachineInstance added.") if vmi.DeletionTimestamp != nil { // on a restart of the controller manager, it's possible a new vmi shows up in a state that // is already pending deletion. Prevent the vmi from being a creation observation. c.deleteVirtualMachineInstance(vmi) return } // If it has a ControllerRef, that's all that matters. if controllerRef := metav1.GetControllerOf(vmi); controllerRef != nil { log.Log.Object(vmi).V(4).Info("Looking for VirtualMachineInstance Ref") vm := c.resolveControllerRef(vmi.Namespace, controllerRef) if vm == nil { // not managed by us log.Log.Object(vmi).V(4).Infof("Cant find the matching VM for VirtualMachineInstance: %s", vmi.Name) return } vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(vmi).Errorf("Cannot parse key of VM: %s for VirtualMachineInstance: %s", vm.Name, vmi.Name) return } log.Log.Object(vmi).V(4).Infof("VirtualMachineInstance created because %s was added.", vmi.Name) c.expectations.CreationObserved(vmKey) c.enqueueVm(vm) return } // Otherwise, it's an orphan. Get a list of all matching VirtualMachines and sync // them to see if anyone wants to adopt it. // DO NOT observe creation because no controller should be waiting for an // orphan. vms := c.getMatchingControllers(vmi) if len(vms) == 0 { return } log.Log.V(4).Object(vmi).Infof("Orphan VirtualMachineInstance created") for _, vm := range vms { c.enqueueVm(vm) } } // When a vmi is updated, figure out what VirtualMachine manage it and wake them // up. If the labels of the vmi have changed we need to awaken both the old // and new VirtualMachine. old and cur must be *v1.VirtualMachineInstance types. func (c *Controller) updateVirtualMachineInstance(old, cur interface{}) { curVMI := cur.(*virtv1.VirtualMachineInstance) oldVMI := old.(*virtv1.VirtualMachineInstance) if curVMI.ResourceVersion == oldVMI.ResourceVersion { // Periodic resync will send update events for all known vmis. // Two different versions of the same vmi will always have different RVs. return } labelChanged := !equality.Semantic.DeepEqual(curVMI.Labels, oldVMI.Labels) if curVMI.DeletionTimestamp != nil { // when a vmi is deleted gracefully it's deletion timestamp is first modified to reflect a grace period, // and after such time has passed, the virt-handler actually deletes it from the store. We receive an update // for modification of the deletion timestamp and expect an VirtualMachine to create newVMI asap, not wait // until the virt-handler actually deletes the vmi. This is different from the Phase of a vmi changing, because // an rs never initiates a phase change, and so is never asleep waiting for the same. c.deleteVirtualMachineInstance(curVMI) if labelChanged { // we don't need to check the oldVMI.DeletionTimestamp because DeletionTimestamp cannot be unset. c.deleteVirtualMachineInstance(oldVMI) } return } curControllerRef := metav1.GetControllerOf(curVMI) oldControllerRef := metav1.GetControllerOf(oldVMI) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged && oldControllerRef != nil { // The ControllerRef was changed. Sync the old controller, if any. if vm := c.resolveControllerRef(oldVMI.Namespace, oldControllerRef); vm != nil { c.enqueueVm(vm) } } // If it has a ControllerRef, that's all that matters. if curControllerRef != nil { vm := c.resolveControllerRef(curVMI.Namespace, curControllerRef) if vm == nil { return } log.Log.V(4).Object(curVMI).Infof("VirtualMachineInstance updated") c.enqueueVm(vm) // TODO: MinReadySeconds in the VirtualMachineInstance will generate an Available condition to be added in // Update once we support the available conect on the rs return } isOrphan := !labelChanged && !controllerRefChanged if isOrphan { return } // If anything changed, sync matching controllers to see if anyone wants to adopt it now. vms := c.getMatchingControllers(curVMI) if len(vms) == 0 { return } log.Log.V(4).Object(curVMI).Infof("Orphan VirtualMachineInstance updated") for _, vm := range vms { c.enqueueVm(vm) } } // When a vmi is deleted, enqueue the VirtualMachine that manages the vmi and update its expectations. // obj could be an *v1.VirtualMachineInstance, or a DeletionFinalStateUnknown marker item. func (c *Controller) deleteVirtualMachineInstance(obj interface{}) { vmi, ok := obj.(*virtv1.VirtualMachineInstance) // When a delete is dropped, the relist will notice a vmi in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the vmi // changed labels the new VirtualMachine will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(failedProcessDeleteNotificationErrMsg) return } vmi, ok = tombstone.Obj.(*virtv1.VirtualMachineInstance) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a vmi %#v", obj)).Error(failedProcessDeleteNotificationErrMsg) return } } controllerRef := metav1.GetControllerOf(vmi) if controllerRef == nil { // No controller should care about orphans being deleted. return } vm := c.resolveControllerRef(vmi.Namespace, controllerRef) if vm == nil { return } vmKey, err := controller.KeyFunc(vm) if err != nil { return } c.expectations.DeletionObserved(vmKey, controller.VirtualMachineInstanceKey(vmi)) c.enqueueVm(vm) } func (c *Controller) addDataVolume(obj interface{}) { dataVolume := obj.(*cdiv1.DataVolume) if dataVolume.DeletionTimestamp != nil { c.deleteDataVolume(dataVolume) return } controllerRef := metav1.GetControllerOf(dataVolume) if controllerRef != nil { log.Log.Object(dataVolume).Info("Looking for DataVolume Ref") vm := c.resolveControllerRef(dataVolume.Namespace, controllerRef) if vm != nil { vmKey, err := controller.KeyFunc(vm) if err != nil { log.Log.Object(dataVolume).Errorf("Cannot parse key of VM: %s for DataVolume: %s", vm.Name, dataVolume.Name) } else { log.Log.Object(dataVolume).Infof("DataVolume created because %s was added.", dataVolume.Name) c.dataVolumeExpectations.CreationObserved(vmKey) } } else { log.Log.Object(dataVolume).Errorf("Cant find the matching VM for DataVolume: %s", dataVolume.Name) } } c.queueVMsForDataVolume(dataVolume) } func (c *Controller) updateDataVolume(old, cur interface{}) { curDataVolume := cur.(*cdiv1.DataVolume) oldDataVolume := old.(*cdiv1.DataVolume) if curDataVolume.ResourceVersion == oldDataVolume.ResourceVersion { // Periodic resync will send update events for all known DataVolumes. // Two different versions of the same dataVolume will always // have different RVs. return } labelChanged := !equality.Semantic.DeepEqual(curDataVolume.Labels, oldDataVolume.Labels) if curDataVolume.DeletionTimestamp != nil { // having a DataVolume marked for deletion is enough // to count as a deletion expectation c.deleteDataVolume(curDataVolume) if labelChanged { // we don't need to check the oldDataVolume.DeletionTimestamp // because DeletionTimestamp cannot be unset. c.deleteDataVolume(oldDataVolume) } return } curControllerRef := metav1.GetControllerOf(curDataVolume) oldControllerRef := metav1.GetControllerOf(oldDataVolume) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged && oldControllerRef != nil { // The ControllerRef was changed. Sync the old controller, if any. if vm := c.resolveControllerRef(oldDataVolume.Namespace, oldControllerRef); vm != nil { c.enqueueVm(vm) } } c.queueVMsForDataVolume(curDataVolume) } func (c *Controller) deleteDataVolume(obj interface{}) { dataVolume, ok := obj.(*cdiv1.DataVolume) // When a delete is dropped, the relist will notice a dataVolume in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the dataVolume // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf("couldn't get object from tombstone %+v", obj)).Error(failedProcessDeleteNotificationErrMsg) return } dataVolume, ok = tombstone.Obj.(*cdiv1.DataVolume) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a dataVolume %#v", obj)).Error(failedProcessDeleteNotificationErrMsg) return } } if controllerRef := metav1.GetControllerOf(dataVolume); controllerRef != nil { if vm := c.resolveControllerRef(dataVolume.Namespace, controllerRef); vm != nil { if vmKey, err := controller.KeyFunc(vm); err == nil { c.dataVolumeExpectations.DeletionObserved(vmKey, controller.DataVolumeKey(dataVolume)) } } } c.queueVMsForDataVolume(dataVolume) } func (c *Controller) queueVMsForDataVolume(dataVolume *cdiv1.DataVolume) { var vmOwner string if controllerRef := metav1.GetControllerOf(dataVolume); controllerRef != nil { if vm := c.resolveControllerRef(dataVolume.Namespace, controllerRef); vm != nil { vmOwner = vm.Name log.Log.V(4).Object(dataVolume).Infof("DataVolume updated for vm %s", vm.Name) c.enqueueVm(vm) } } // handle DataVolumes not owned by the VM but referenced in the spec // TODO come back when DV/PVC name may differ k, err := controller.KeyFunc(dataVolume) if err != nil { log.Log.Object(dataVolume).Errorf("Cannot parse key of DataVolume: %s", dataVolume.Name) return } for _, indexName := range []string{"dv", "pvc"} { objs, err := c.vmIndexer.ByIndex(indexName, k) if err != nil { log.Log.Object(dataVolume).Errorf("Cannot get index %s of DataVolume: %s", indexName, dataVolume.Name) return } for _, obj := range objs { vm := obj.(*virtv1.VirtualMachine) if vm.Name != vmOwner { log.Log.V(4).Object(dataVolume).Infof("DataVolume updated for vm %s", vm.Name) c.enqueueVm(vm) } } } } func (c *Controller) addVirtualMachine(obj interface{}) { c.enqueueVm(obj) } func (c *Controller) deleteVirtualMachine(obj interface{}) { c.enqueueVm(obj) } func (c *Controller) updateVirtualMachine(_, curr interface{}) { c.enqueueVm(curr) } func (c *Controller) enqueueVm(obj interface{}) { logger := log.Log vm := obj.(*virtv1.VirtualMachine) key, err := controller.KeyFunc(vm) if err != nil { logger.Object(vm).Reason(err).Error(failedExtractVmkeyFromVmErrMsg) return } c.Queue.Add(key) } func (c *Controller) getPatchFinalizerOps(oldFinalizers, newFinalizers []string) ([]byte, error) { return patch.New( patch.WithTest("/metadata/finalizers", oldFinalizers), patch.WithReplace("/metadata/finalizers", newFinalizers)). GeneratePayload() } func (c *Controller) removeVMIFinalizer(vmi *virtv1.VirtualMachineInstance) error { if !controller.HasFinalizer(vmi, virtv1.VirtualMachineControllerFinalizer) { return nil } log.Log.V(3).Object(vmi).Infof("VMI is in a final state. Removing VM controller finalizer") newFinalizers := []string{} for _, fin := range vmi.Finalizers { if fin != virtv1.VirtualMachineControllerFinalizer { newFinalizers = append(newFinalizers, fin) } } patch, err := c.getPatchFinalizerOps(vmi.Finalizers, newFinalizers) if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patch, metav1.PatchOptions{}) return err } func (c *Controller) removeVMFinalizer(vm *virtv1.VirtualMachine) (*virtv1.VirtualMachine, error) { if !controller.HasFinalizer(vm, virtv1.VirtualMachineControllerFinalizer) { return vm, nil } log.Log.V(3).Object(vm).Infof("Removing VM controller finalizer: %s", virtv1.VirtualMachineControllerFinalizer) newFinalizers := []string{} for _, fin := range vm.Finalizers { if fin != virtv1.VirtualMachineControllerFinalizer { newFinalizers = append(newFinalizers, fin) } } patch, err := c.getPatchFinalizerOps(vm.Finalizers, newFinalizers) if err != nil { return vm, err } vm, err = c.clientset.VirtualMachine(vm.Namespace).Patch(context.Background(), vm.Name, types.JSONPatchType, patch, metav1.PatchOptions{}) return vm, err } func (c *Controller) addVMFinalizer(vm *virtv1.VirtualMachine) (*virtv1.VirtualMachine, error) { if controller.HasFinalizer(vm, virtv1.VirtualMachineControllerFinalizer) { return vm, nil } log.Log.V(3).Object(vm).Infof("Adding VM controller finalizer: %s", virtv1.VirtualMachineControllerFinalizer) newFinalizers := make([]string, len(vm.Finalizers)) copy(newFinalizers, vm.Finalizers) newFinalizers = append(newFinalizers, virtv1.VirtualMachineControllerFinalizer) patch, err := c.getPatchFinalizerOps(vm.Finalizers, newFinalizers) if err != nil { return vm, err } return c.clientset.VirtualMachine(vm.Namespace).Patch(context.Background(), vm.Name, types.JSONPatchType, patch, metav1.PatchOptions{}) } // parseGeneration will parse for the last value after a '-'. It is assumed the // revision name is created with getVMRevisionName. If the name is not formatted // correctly and the generation cannot be found, then nil will be returned. func parseGeneration(revisionName string, logger *log.FilteredLogger) *int64 { idx := strings.LastIndexAny(revisionName, "-") if idx == -1 { logger.Errorf("Failed to parse generation as an int from revision %v", revisionName) return nil } generationStr := revisionName[idx+1:] generation, err := strconv.ParseInt(generationStr, 10, 64) if err != nil { logger.Reason(err).Errorf("Failed to parse generation as an int from revision %v", revisionName) return nil } return &generation } // patchVmGenerationFromControllerRevision will first fetch the generation from // the corresponding controller revision, and then patch the vmi with the // generation annotation. If the controller revision does not exist, // (nil, nil) will be returned. func (c *Controller) patchVmGenerationFromControllerRevision(vmi *virtv1.VirtualMachineInstance, logger *log.FilteredLogger) (*virtv1.VirtualMachineInstance, *int64, error) { cr, err := c.getControllerRevision(vmi.Namespace, vmi.Status.VirtualMachineRevisionName) if err != nil || cr == nil { return vmi, nil, err } generation := parseGeneration(cr.Name, logger) if generation == nil { return vmi, nil, nil } vmi, err = c.patchVmGenerationAnnotationOnVmi(*generation, vmi) if err != nil { return vmi, generation, err } return vmi, generation, err } // syncGenerationInfo will update the vm.Status with the ObservedGeneration // from the vmi and the DesiredGeneration from the vm current generation. func (c *Controller) syncGenerationInfo(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance, logger *log.FilteredLogger) (*virtv1.VirtualMachineInstance, error) { if vm == nil || vmi == nil { return vmi, errors.New("passed nil pointer") } generation, err := getGenerationAnnotationAsInt(vmi, logger) if err != nil { return vmi, err } // If the generation annotation does not exist, the VMI could have been // been created before the controller was updated. In this case, check the // ControllerRevision on what the latest observed generation is and back-fill // the info onto the vmi annotation. if generation == nil { var patchedVMI *virtv1.VirtualMachineInstance patchedVMI, generation, err = c.patchVmGenerationFromControllerRevision(vmi, logger) if generation == nil || err != nil { return vmi, err } vmi = patchedVMI } vm.Status.ObservedGeneration = *generation vm.Status.DesiredGeneration = vm.Generation return vmi, nil } func (c *Controller) updateStatus(vm, vmOrig *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance, syncErr common.SyncError, logger *log.FilteredLogger) error { key := controller.VirtualMachineKey(vmOrig) defer virtControllerVMWorkQueueTracer.StepTrace(key, "updateStatus", trace.Field{Key: "VM Name", Value: vmOrig.Name}) created := vmi != nil vm.Status.Created = created ready := false if created { ready = controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatus(vmi, virtv1.VirtualMachineInstanceReady, k8score.ConditionTrue) var err error vmi, err = c.syncGenerationInfo(vm, vmi, logger) if err != nil { return err } } vm.Status.Ready = ready runStrategy, _ := vmOrig.RunStrategy() // sync for the first time only when the VMI gets created // so that we can tell if the VM got started at least once if vm.Status.RunStrategy != "" || vm.Status.Created { vm.Status.RunStrategy = runStrategy } c.trimDoneVolumeRequests(vm) memorydump.UpdateRequest(vm, vmi) if c.isTrimFirstChangeRequestNeeded(vm, vmi) { popStateChangeRequest(vm) } syncStartFailureStatus(vm, vmi) // On a successful migration, the volume change condition is removed and we need to detect the removal before the synchronization of the VMI // condition to the VM syncVolumeMigration(vm, vmi) syncConditions(vm, vmi, syncErr) c.setPrintableStatus(vm, vmi) // only update if necessary if !equality.Semantic.DeepEqual(vm.Status, vmOrig.Status) { if _, err := c.clientset.VirtualMachine(vm.Namespace).UpdateStatus(context.Background(), vm, v1.UpdateOptions{}); err != nil { return err } } if vmi != nil && vmi.IsFinal() && len(vmi.Finalizers) > 0 { // Remove our finalizer off of a finalized VMI now that we've been able // to record any status info from the VMI onto the VM object. err := c.removeVMIFinalizer(vmi) if err != nil { return err } } return nil } func (c *Controller) setPrintableStatus(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) { // For each status, there's a separate function that evaluates // whether the status is "true" for the given VM. // // Note that these statuses aren't mutually exclusive, // and several of them can be "true" at the same time // (e.g., Running && Migrating, or Paused && Terminating). // // The actual precedence of these statuses are determined by the order // of evaluation - first match wins. statuses := []struct { statusType virtv1.VirtualMachinePrintableStatus statusFunc func(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool }{ {virtv1.VirtualMachineStatusTerminating, c.isVirtualMachineStatusTerminating}, {virtv1.VirtualMachineStatusStopping, c.isVirtualMachineStatusStopping}, {virtv1.VirtualMachineStatusMigrating, c.isVirtualMachineStatusMigrating}, {virtv1.VirtualMachineStatusPaused, c.isVirtualMachineStatusPaused}, {virtv1.VirtualMachineStatusRunning, c.isVirtualMachineStatusRunning}, {virtv1.VirtualMachineStatusPvcNotFound, c.isVirtualMachineStatusPvcNotFound}, {virtv1.VirtualMachineStatusDataVolumeError, c.isVirtualMachineStatusDataVolumeError}, {virtv1.VirtualMachineStatusUnschedulable, c.isVirtualMachineStatusUnschedulable}, {virtv1.VirtualMachineStatusProvisioning, c.isVirtualMachineStatusProvisioning}, {virtv1.VirtualMachineStatusWaitingForVolumeBinding, c.isVirtualMachineStatusWaitingForVolumeBinding}, {virtv1.VirtualMachineStatusErrImagePull, c.isVirtualMachineStatusErrImagePull}, {virtv1.VirtualMachineStatusImagePullBackOff, c.isVirtualMachineStatusImagePullBackOff}, {virtv1.VirtualMachineStatusStarting, c.isVirtualMachineStatusStarting}, {virtv1.VirtualMachineStatusCrashLoopBackOff, c.isVirtualMachineStatusCrashLoopBackOff}, {virtv1.VirtualMachineStatusStopped, c.isVirtualMachineStatusStopped}, } for _, status := range statuses { if status.statusFunc(vm, vmi) { vm.Status.PrintableStatus = status.statusType return } } vm.Status.PrintableStatus = virtv1.VirtualMachineStatusUnknown } // isVirtualMachineStatusCrashLoopBackOff determines whether the VM status field should be set to "CrashLoop". func (c *Controller) isVirtualMachineStatusCrashLoopBackOff(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if vmi != nil && !vmi.IsFinal() { return false } else if c.isVMIStartExpected(vm) { return false } runStrategy, err := vm.RunStrategy() if err != nil { log.Log.Object(vm).Errorf(fetchingRunStrategyErrFmt, err) return false } if vm.Status.StartFailure != nil && vm.Status.StartFailure.ConsecutiveFailCount > 0 && (runStrategy == virtv1.RunStrategyAlways || runStrategy == virtv1.RunStrategyRerunOnFailure || runStrategy == virtv1.RunStrategyOnce) { return true } return false } // isVirtualMachineStatusStopped determines whether the VM status field should be set to "Stopped". func (c *Controller) isVirtualMachineStatusStopped(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if vmi != nil { return vmi.IsFinal() } return !c.isVMIStartExpected(vm) } // isVirtualMachineStatusStopped determines whether the VM status field should be set to "Provisioning". func (c *Controller) isVirtualMachineStatusProvisioning(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return storagetypes.HasDataVolumeProvisioning(vm.Namespace, vm.Spec.Template.Spec.Volumes, c.dataVolumeStore) } // isVirtualMachineStatusWaitingForVolumeBinding func (c *Controller) isVirtualMachineStatusWaitingForVolumeBinding(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if !isSetToStart(vm, vmi) { return false } return storagetypes.HasUnboundPVC(vm.Namespace, vm.Spec.Template.Spec.Volumes, c.pvcStore) } // isVirtualMachineStatusStarting determines whether the VM status field should be set to "Starting". func (c *Controller) isVirtualMachineStatusStarting(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil { return c.isVMIStartExpected(vm) } return vmi.IsUnprocessed() || vmi.IsScheduling() || vmi.IsScheduled() } // isVirtualMachineStatusRunning determines whether the VM status field should be set to "Running". func (c *Controller) isVirtualMachineStatusRunning(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil { return false } hasPausedCondition := controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatus(vmi, virtv1.VirtualMachineInstancePaused, k8score.ConditionTrue) return vmi.IsRunning() && !hasPausedCondition } // isVirtualMachineStatusPaused determines whether the VM status field should be set to "Paused". func (c *Controller) isVirtualMachineStatusPaused(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if vmi == nil { return false } hasPausedCondition := controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatus(vmi, virtv1.VirtualMachineInstancePaused, k8score.ConditionTrue) return vmi.IsRunning() && hasPausedCondition } // isVirtualMachineStatusStopping determines whether the VM status field should be set to "Stopping". func (c *Controller) isVirtualMachineStatusStopping(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return vmi != nil && !vmi.IsFinal() && (vmi.IsMarkedForDeletion() || c.isVMIStopExpected(vm)) } // isVirtualMachineStatusTerminating determines whether the VM status field should be set to "Terminating". func (c *Controller) isVirtualMachineStatusTerminating(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return vm.ObjectMeta.DeletionTimestamp != nil } // isVirtualMachineStatusMigrating determines whether the VM status field should be set to "Migrating". func (c *Controller) isVirtualMachineStatusMigrating(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return vmi != nil && migrations.IsMigrating(vmi) } // isVirtualMachineStatusUnschedulable determines whether the VM status field should be set to "FailedUnschedulable". func (c *Controller) isVirtualMachineStatusUnschedulable(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatusAndReason(vmi, virtv1.VirtualMachineInstanceConditionType(k8score.PodScheduled), k8score.ConditionFalse, k8score.PodReasonUnschedulable) } // isVirtualMachineStatusErrImagePull determines whether the VM status field should be set to "ErrImagePull" func (c *Controller) isVirtualMachineStatusErrImagePull(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { syncCond := controller.NewVirtualMachineInstanceConditionManager().GetCondition(vmi, virtv1.VirtualMachineInstanceSynchronized) return syncCond != nil && syncCond.Status == k8score.ConditionFalse && syncCond.Reason == controller.ErrImagePullReason } // isVirtualMachineStatusImagePullBackOff determines whether the VM status field should be set to "ImagePullBackOff" func (c *Controller) isVirtualMachineStatusImagePullBackOff(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { syncCond := controller.NewVirtualMachineInstanceConditionManager().GetCondition(vmi, virtv1.VirtualMachineInstanceSynchronized) return syncCond != nil && syncCond.Status == k8score.ConditionFalse && syncCond.Reason == controller.ImagePullBackOffReason } // isVirtualMachineStatusPvcNotFound determines whether the VM status field should be set to "FailedPvcNotFound". func (c *Controller) isVirtualMachineStatusPvcNotFound(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { return controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatusAndReason(vmi, virtv1.VirtualMachineInstanceSynchronized, k8score.ConditionFalse, controller.FailedPvcNotFoundReason) } // isVirtualMachineStatusDataVolumeError determines whether the VM status field should be set to "DataVolumeError" func (c *Controller) isVirtualMachineStatusDataVolumeError(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { err := storagetypes.HasDataVolumeErrors(vm.Namespace, vm.Spec.Template.Spec.Volumes, c.dataVolumeStore) if err != nil { log.Log.Object(vm).Errorf("%v", err) return true } return false } func syncReadyConditionFromVMI(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) { conditionManager := controller.NewVirtualMachineConditionManager() vmiReadyCond := controller.NewVirtualMachineInstanceConditionManager(). GetCondition(vmi, virtv1.VirtualMachineInstanceReady) now := metav1.Now() if vmi == nil { conditionManager.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineReady, Status: k8score.ConditionFalse, Reason: "VMINotExists", Message: "VMI does not exist", LastProbeTime: now, LastTransitionTime: now, }) } else if vmiReadyCond == nil { conditionManager.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineReady, Status: k8score.ConditionFalse, Reason: "VMIConditionMissing", Message: "VMI is missing the Ready condition", LastProbeTime: now, LastTransitionTime: now, }) } else { conditionManager.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineReady, Status: vmiReadyCond.Status, Reason: vmiReadyCond.Reason, Message: vmiReadyCond.Message, LastProbeTime: vmiReadyCond.LastProbeTime, LastTransitionTime: vmiReadyCond.LastTransitionTime, }) } } func syncConditions(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance, syncErr common.SyncError) { cm := controller.NewVirtualMachineConditionManager() // ready condition is handled differently as it persists regardless if vmi exists or not syncReadyConditionFromVMI(vm, vmi) processFailureCondition(vm, syncErr) // nothing to do if vmi hasn't been created yet. if vmi == nil { return } // sync VMI conditions, ignore list represents conditions that are not synced generically syncIgnoreMap := map[string]interface{}{ string(virtv1.VirtualMachineReady): nil, string(virtv1.VirtualMachineFailure): nil, string(virtv1.VirtualMachineRestartRequired): nil, } vmiCondMap := make(map[string]interface{}) // generically add/update all vmi conditions for _, cond := range vmi.Status.Conditions { _, ignore := syncIgnoreMap[string(cond.Type)] if ignore { continue } vmiCondMap[string(cond.Type)] = nil cm.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineConditionType(cond.Type), Status: cond.Status, Reason: cond.Reason, Message: cond.Message, LastProbeTime: cond.LastProbeTime, LastTransitionTime: cond.LastTransitionTime, }) } // remove vm conditions that don't exist on vmi (excluding the ignore list) for _, cond := range vm.Status.Conditions { _, ignore := syncIgnoreMap[string(cond.Type)] if ignore { continue } _, exists := vmiCondMap[string(cond.Type)] if !exists { cm.RemoveCondition(vm, cond.Type) } } } func processFailureCondition(vm *virtv1.VirtualMachine, syncErr common.SyncError) { vmConditionManager := controller.NewVirtualMachineConditionManager() if syncErr == nil { if vmConditionManager.HasCondition(vm, virtv1.VirtualMachineFailure) { log.Log.Object(vm).V(4).Info("Removing failure") vmConditionManager.RemoveCondition(vm, virtv1.VirtualMachineFailure) } // nothing to do return } vmConditionManager.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineFailure, Reason: syncErr.Reason(), Message: syncErr.Error(), LastTransitionTime: metav1.Now(), Status: k8score.ConditionTrue, }) } func (c *Controller) isTrimFirstChangeRequestNeeded(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) (clearChangeRequest bool) { if len(vm.Status.StateChangeRequests) == 0 { return false } // Only consider one stateChangeRequest at a time. The second and subsequent change // requests have not been acted upon by this controller yet! stateChange := vm.Status.StateChangeRequests[0] switch stateChange.Action { case virtv1.StopRequest: if vmi == nil { // If there's no VMI, then the VMI was stopped, and the stopRequest can be cleared log.Log.Object(vm).V(4).Infof("No VMI. Clearing stop request") return true } else { if stateChange.UID == nil { // It never makes sense to have a request to stop a VMI that doesn't // have a UUID associated with it. This shouldn't be possible -- but if // it occurs, clear the stopRequest because it can't be acted upon log.Log.Object(vm).Errorf("Stop Request has no UID.") return true } else if *stateChange.UID != vmi.UID { // If there is a VMI, but the UID doesn't match, then it // must have been previously stopped, so the stopRequest can be cleared log.Log.Object(vm).V(4).Infof("VMI's UID doesn't match. clearing stop request") return true } } case virtv1.StartRequest: // Update VMI as the runStrategy might have started/stopped the VM. // Example: if the runStrategy is `RerunOnFailure` and the VMI just failed // `syncRunStrategy()` will delete the VMI object and enqueue a StartRequest. // If we do not update `vmi` by asking the API Server this function could // erroneously trim the just added StartRequest because it would see a running // vmi with no DeletionTimestamp if vmi != nil && vmi.DeletionTimestamp == nil && !vmi.IsFinal() { log.Log.Object(vm).V(4).Infof("VMI exists. clearing start request") return true } } return false } func (c *Controller) trimDoneVolumeRequests(vm *virtv1.VirtualMachine) { if len(vm.Status.VolumeRequests) == 0 { return } volumeMap := make(map[string]virtv1.Volume) diskMap := make(map[string]virtv1.Disk) for _, volume := range vm.Spec.Template.Spec.Volumes { volumeMap[volume.Name] = volume } for _, disk := range vm.Spec.Template.Spec.Domain.Devices.Disks { diskMap[disk.Name] = disk } tmpVolRequests := vm.Status.VolumeRequests[:0] for _, request := range vm.Status.VolumeRequests { var added bool var volName string removeRequest := false if request.AddVolumeOptions != nil { volName = request.AddVolumeOptions.Name added = true } else if request.RemoveVolumeOptions != nil { volName = request.RemoveVolumeOptions.Name added = false } _, volExists := volumeMap[volName] _, diskExists := diskMap[volName] if added && volExists && diskExists { removeRequest = true } else if !added && !volExists && !diskExists { removeRequest = true } if !removeRequest { tmpVolRequests = append(tmpVolRequests, request) } } vm.Status.VolumeRequests = tmpVolRequests } func validLiveUpdateVolumes(oldVMSpec *virtv1.VirtualMachineSpec, vm *virtv1.VirtualMachine) bool { oldVols := storagetypes.GetVolumesByName(&oldVMSpec.Template.Spec) // Evaluate if any volume has changed or has been added for _, v := range vm.Spec.Template.Spec.Volumes { oldVol, okOld := oldVols[v.Name] switch { // Changes for hotlpugged volumes are valid case storagetypes.IsHotplugVolume(&v): delete(oldVols, v.Name) // The volume has been freshly added case !okOld: return false // if the update strategy is migration the PVC/DV could have // changed case (v.VolumeSource.PersistentVolumeClaim != nil || v.VolumeSource.DataVolume != nil) && vm.Spec.UpdateVolumesStrategy != nil && *vm.Spec.UpdateVolumesStrategy == virtv1.UpdateVolumesStrategyMigration: delete(oldVols, v.Name) // The volume has changed case !equality.Semantic.DeepEqual(*oldVol, v): return false default: delete(oldVols, v.Name) } } // Evaluate if any volumes were removed and they were hotplugged volumes for _, v := range oldVols { if !storagetypes.IsHotplugVolume(v) { return false } } return true } func validLiveUpdateDisks(oldVMSpec *virtv1.VirtualMachineSpec, vm *virtv1.VirtualMachine) bool { oldDisks := storagetypes.GetDisksByName(&oldVMSpec.Template.Spec) oldVols := storagetypes.GetVolumesByName(&oldVMSpec.Template.Spec) vols := storagetypes.GetVolumesByName(&vm.Spec.Template.Spec) // Evaluate if any disk has changed or has been added for _, newDisk := range vm.Spec.Template.Spec.Domain.Devices.Disks { v := vols[newDisk.Name] oldDisk, okOld := oldDisks[newDisk.Name] switch { // Changes for disks associated to a hotpluggable volume are valid case storagetypes.IsHotplugVolume(v): delete(oldDisks, v.Name) // The disk has been freshly added case !okOld: return false // The disk has changed case !equality.Semantic.DeepEqual(*oldDisk, newDisk): return false default: delete(oldDisks, v.Name) } } // Evaluate if any disks were removed and they were hotplugged volumes for _, d := range oldDisks { v := oldVols[d.Name] if !storagetypes.IsHotplugVolume(v) { return false } } return true } func setRestartRequired(vm *virtv1.VirtualMachine, message string) { vmConditions := controller.NewVirtualMachineConditionManager() vmConditions.UpdateCondition(vm, &virtv1.VirtualMachineCondition{ Type: virtv1.VirtualMachineRestartRequired, LastTransitionTime: metav1.Now(), Status: k8score.ConditionTrue, Message: message, }) } // addRestartRequiredIfNeeded adds the restartRequired condition to the VM if any non-live-updatable field was changed func (c *Controller) addRestartRequiredIfNeeded(lastSeenVMSpec *virtv1.VirtualMachineSpec, vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool { if lastSeenVMSpec == nil { return false } // Expand any instance types and preferences associated with lastSeenVMSpec or the current VM before working out if things are live-updatable currentVM := vm.DeepCopy() if err := c.instancetypeController.ApplyToVM(currentVM); err != nil { return false } lastSeenVM := &virtv1.VirtualMachine{ // We need the namespace to be populated here for the lookup and application of instance types to work below ObjectMeta: currentVM.DeepCopy().ObjectMeta, Spec: *lastSeenVMSpec, } if err := c.instancetypeController.ApplyToVM(lastSeenVM); err != nil { return false } // Ignore all the live-updatable fields by copying them over. (If the feature gate is disabled, nothing is live-updatable) // Note: this list needs to stay up-to-date with everything that can be live-updated // Note2: destroying lastSeenVMSpec here is fine, we don't need it later if c.clusterConfig.IsVMRolloutStrategyLiveUpdate() { if validLiveUpdateVolumes(lastSeenVMSpec, currentVM) { lastSeenVMSpec.Template.Spec.Volumes = currentVM.Spec.Template.Spec.Volumes } if validLiveUpdateDisks(lastSeenVMSpec, currentVM) { lastSeenVMSpec.Template.Spec.Domain.Devices.Disks = currentVM.Spec.Template.Spec.Domain.Devices.Disks } if lastSeenVMSpec.Template.Spec.Domain.CPU != nil && currentVM.Spec.Template.Spec.Domain.CPU != nil { lastSeenVMSpec.Template.Spec.Domain.CPU.Sockets = currentVM.Spec.Template.Spec.Domain.CPU.Sockets } if currentVM.Spec.Template.Spec.Domain.Memory != nil && currentVM.Spec.Template.Spec.Domain.Memory.Guest != nil { if lastSeenVM.Spec.Template.Spec.Domain.Memory == nil { lastSeenVM.Spec.Template.Spec.Domain.Memory = &virtv1.Memory{} } lastSeenVM.Spec.Template.Spec.Domain.Memory.Guest = currentVM.Spec.Template.Spec.Domain.Memory.Guest } lastSeenVM.Spec.Template.Spec.NodeSelector = currentVM.Spec.Template.Spec.NodeSelector lastSeenVM.Spec.Template.Spec.Affinity = currentVM.Spec.Template.Spec.Affinity lastSeenVM.Spec.Template.Spec.Tolerations = currentVM.Spec.Template.Spec.Tolerations } else { // In the case live-updates aren't enable the volume set of the VM can be still changed by volume hotplugging. // For imperative volume hotplug, first the VM status with the request AND the VMI spec are updated, then in the // next iteration, the VM spec is updated as well. Here, we're in this iteration where the currentVM has for the first // time the updated hotplugged volumes. Hence, we can compare the current VM volumes and disks with the ones belonging // to the VMI. // In case of a declarative update, the flow is the opposite, first we update the VM spec and then the VMI. Therefore, if // the change was declarative, then the VMI would still not have the update. if equality.Semantic.DeepEqual(currentVM.Spec.Template.Spec.Volumes, vmi.Spec.Volumes) && equality.Semantic.DeepEqual(currentVM.Spec.Template.Spec.Domain.Devices.Disks, vmi.Spec.Domain.Devices.Disks) { lastSeenVMSpec.Template.Spec.Volumes = currentVM.Spec.Template.Spec.Volumes lastSeenVMSpec.Template.Spec.Domain.Devices.Disks = currentVM.Spec.Template.Spec.Domain.Devices.Disks } } if !equality.Semantic.DeepEqual(lastSeenVM.Spec.Template.Spec, currentVM.Spec.Template.Spec) { setRestartRequired(vm, "a non-live-updatable field was changed in the template spec") return true } return false } func (c *Controller) sync(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance, key string) (*virtv1.VirtualMachine, *virtv1.VirtualMachineInstance, common.SyncError, error) { defer virtControllerVMWorkQueueTracer.StepTrace(key, "sync", trace.Field{Key: "VM Name", Value: vm.Name}) var ( syncErr common.SyncError err error startVMSpec *virtv1.VirtualMachineSpec ) if !c.satisfiedExpectations(key) { return vm, vmi, nil, nil } if vmi != nil { startVMSpec, err = c.getLastVMRevisionSpec(vm) if err != nil { return vm, vmi, nil, err } } if vm.DeletionTimestamp != nil { if vmi == nil || controller.HasFinalizer(vm, metav1.FinalizerOrphanDependents) { vm, err = c.removeVMFinalizer(vm) if err != nil { return vm, vmi, nil, err } } else { vm, err = c.stopVMI(vm, vmi) if err != nil { log.Log.Object(vm).Errorf(failureDeletingVmiErrFormat, err) return vm, vmi, common.NewSyncError(fmt.Errorf(failureDeletingVmiErrFormat, err), vmiFailedDeleteReason), nil } } return vm, vmi, nil, nil } else { vm, err = c.addVMFinalizer(vm) if err != nil { return vm, vmi, nil, err } } vmi, err = c.conditionallyBumpGenerationAnnotationOnVmi(vm, vmi) if err != nil { return nil, vmi, nil, err } // Scale up or down, if all expected creates and deletes were report by the listener runStrategy, err := vm.RunStrategy() if err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf(fetchingRunStrategyErrFmt, err), failedCreateReason), err } // FIXME(lyarwood): Move alongside netSynchronizer syncedVM, err := c.instancetypeController.Sync(vm, vmi) if err != nil { return vm, vmi, handleSynchronizerErr(err), nil } if !equality.Semantic.DeepEqual(vm.Spec, syncedVM.Spec) { return syncedVM, vmi, nil, nil } vm.ObjectMeta = syncedVM.ObjectMeta vm.Spec = syncedVM.Spec // eventually, would like the condition to be `== "true"`, but for now we need to support legacy behavior by default if vm.Annotations[virtv1.ImmediateDataVolumeCreation] != "false" { dataVolumesReady, err := c.handleDataVolumes(vm) if err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while creating DataVolumes: %v", err), failedCreateReason), nil } // not sure why we allow to proceed when halted but preserving legacy behavior if !dataVolumesReady && runStrategy != virtv1.RunStrategyHalted { log.Log.Object(vm).V(3).Info("Waiting on DataVolumes to be ready.") return vm, vmi, nil, nil } } vm, syncErr = c.syncRunStrategy(vm, vmi, runStrategy) if syncErr != nil { return vm, vmi, syncErr, nil } restartRequired := c.addRestartRequiredIfNeeded(startVMSpec, vm, vmi) // Must check satisfiedExpectations again here because a VMI can be created or // deleted in the startStop function which impacts how we process // hotplugged volumes and interfaces if !c.satisfiedExpectations(key) { return vm, vmi, nil, nil } vmCopy := vm.DeepCopy() if c.netSynchronizer != nil { syncedVM, err := c.netSynchronizer.Sync(vmCopy, vmi) if err != nil { return vm, vmi, handleSynchronizerErr(err), nil } vmCopy.ObjectMeta = syncedVM.ObjectMeta vmCopy.Spec = syncedVM.Spec } if err := c.handleVolumeRequests(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while handling volume hotplug requests: %v", err), hotplugVolumeErrorReason), nil } if err := memorydump.HandleRequest(c.clientset, vmCopy, vmi, c.pvcStore); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while handling memory dump request: %v", err), memorydump.ErrorReason), nil } conditionManager := controller.NewVirtualMachineConditionManager() if c.clusterConfig.IsVMRolloutStrategyLiveUpdate() && !restartRequired && !conditionManager.HasCondition(vm, virtv1.VirtualMachineRestartRequired) { if err := c.handleCPUChangeRequest(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while handling CPU change request: %v", err), hotplugCPUErrorReason), nil } if err := c.handleAffinityChangeRequest(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while handling node affinity change request: %v", err), affinityChangeErrorReason), nil } if err := c.handleTolerationsChangeRequest(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered while handling tolerations change request: %v", err), tolerationsChangeErrorReason), nil } if err := c.handleMemoryHotplugRequest(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("error encountered while handling memory hotplug requests: %v", err), hotplugMemoryErrorReason), nil } if err := c.handleVolumeUpdateRequest(vmCopy, vmi); err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("error encountered while handling volumes update requests: %v", err), volumesUpdateErrorReason), nil } } if !equality.Semantic.DeepEqual(vm.Spec, vmCopy.Spec) || !equality.Semantic.DeepEqual(vm.ObjectMeta, vmCopy.ObjectMeta) { updatedVm, err := c.clientset.VirtualMachine(vmCopy.Namespace).Update(context.Background(), vmCopy, metav1.UpdateOptions{}) if err != nil { return vm, vmi, common.NewSyncError(fmt.Errorf("Error encountered when trying to update vm according to add volume and/or memory dump requests: %v", err), failedUpdateErrorReason), nil } vm = updatedVm } else { vm = vmCopy } return vm, vmi, nil, nil } func handleSynchronizerErr(err error) common.SyncError { if err == nil { return nil } var errWithReason common.SyncError if errors.As(err, &errWithReason) { return errWithReason } return common.NewSyncError(fmt.Errorf("unsupported error: %v", err), "UnsupportedSyncError") } // resolveControllerRef returns the controller referenced by a ControllerRef, // or nil if the ControllerRef could not be resolved to a matching controller // of the correct Kind. func (c *Controller) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *virtv1.VirtualMachine { // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it's the wrong Kind. if controllerRef.Kind != virtv1.VirtualMachineGroupVersionKind.Kind { return nil } vm, exists, err := c.vmIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } if vm.(*virtv1.VirtualMachine).UID != controllerRef.UID { // The controller we found with this Name is not the same one that the // ControllerRef points to. return nil } return vm.(*virtv1.VirtualMachine) } func autoAttachInputDevice(vmi *virtv1.VirtualMachineInstance) { autoAttachInput := vmi.Spec.Domain.Devices.AutoattachInputDevice // Default to False if nil and return, otherwise return if input devices are already present if autoAttachInput == nil || !*autoAttachInput || len(vmi.Spec.Domain.Devices.Inputs) > 0 { return } // Only add the device with an alias here. Preferences for the bus and type might // be applied later and if not the VMI mutation webhook will apply defaults for both. vmi.Spec.Domain.Devices.Inputs = append( vmi.Spec.Domain.Devices.Inputs, virtv1.Input{ Name: "default-0", }, ) } func (c *Controller) handleMemoryHotplugRequest(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) error { if vmi == nil || vmi.DeletionTimestamp != nil { return nil } vmCopyWithInstancetype := vm.DeepCopy() if err := c.instancetypeController.ApplyToVM(vmCopyWithInstancetype); err != nil { return err } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory == nil || vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest == nil || vmi.Spec.Domain.Memory == nil || vmi.Spec.Domain.Memory.Guest == nil || vmi.Status.Memory == nil || vmi.Status.Memory.GuestCurrent == nil { return nil } conditionManager := controller.NewVirtualMachineInstanceConditionManager() if conditionManager.HasConditionWithStatus(vmi, virtv1.VirtualMachineInstanceMemoryChange, k8score.ConditionFalse) { setRestartRequired(vm, "memory updated in template spec. Memory-hotplug failed and is not available for this VM configuration") return nil } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.Equal(*vmi.Spec.Domain.Memory.Guest) { return nil } if !vmi.IsMigratable() { setRestartRequired(vm, "memory updated in template spec. Memory-hotplug is only available for migratable VMs") return nil } if vmi.Spec.Domain.Memory.MaxGuest == nil { setRestartRequired(vm, "memory updated in template spec. Memory-hotplug is not available for this VM configuration") return nil } if conditionManager.HasConditionWithStatus(vmi, virtv1.VirtualMachineInstanceMemoryChange, k8score.ConditionTrue) { return fmt.Errorf("another memory hotplug is in progress") } if migrations.IsMigrating(vmi) { return fmt.Errorf("memory hotplug is not allowed while VMI is migrating") } if err := memory.ValidateLiveUpdateMemory(&vmCopyWithInstancetype.Spec.Template.Spec, vmi.Spec.Domain.Memory.MaxGuest); err != nil { setRestartRequired(vm, fmt.Sprintf("memory hotplug not supported, %s", err.Error())) return nil } if vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest != nil && vmi.Status.Memory.GuestAtBoot != nil && vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.Cmp(*vmi.Status.Memory.GuestAtBoot) == -1 { setRestartRequired(vm, "memory updated in template spec to a value lower than what the VM started with") return nil } // If the following is true, MaxGuest was calculated, not manually specified (or the validation webhook would have rejected the change). // Since we're here, we can also assume MaxGuest was not changed in the VM spec since last boot. // Therefore, bumping Guest to a value higher than MaxGuest is fine, it just requires a reboot. if vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest != nil && vmi.Spec.Domain.Memory.MaxGuest != nil && vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.Cmp(*vmi.Spec.Domain.Memory.MaxGuest) == 1 { setRestartRequired(vm, "memory updated in template spec to a value higher than what's available") return nil } memoryDelta := resource.NewQuantity(vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.Value()-vmi.Status.Memory.GuestCurrent.Value(), resource.BinarySI) newMemoryReq := vmi.Spec.Domain.Resources.Requests.Memory().DeepCopy() newMemoryReq.Add(*memoryDelta) // checking if the new memory req are at least equal to the memory being requested in the handleMemoryHotplugRequest // this is necessary as weirdness can arise after hot-unplugs as not all memory is guaranteed to be released when doing hot-unplug. if newMemoryReq.Cmp(*vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest) == -1 { newMemoryReq = *vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest // adjusting memoryDelta too for the new limits computation (if required) memoryDelta = resource.NewQuantity(vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.Value()-newMemoryReq.Value(), resource.BinarySI) } patchSet := patch.New( patch.WithTest("/spec/domain/memory/guest", vmi.Spec.Domain.Memory.Guest.String()), patch.WithReplace("/spec/domain/memory/guest", vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.String()), patch.WithTest("/spec/domain/resources/requests/memory", vmi.Spec.Domain.Resources.Requests.Memory().String()), patch.WithReplace("/spec/domain/resources/requests/memory", newMemoryReq.String()), ) logMsg := fmt.Sprintf("hotplugging memory to %s, setting requests to %s", vmCopyWithInstancetype.Spec.Template.Spec.Domain.Memory.Guest.String(), newMemoryReq.String()) if !vmCopyWithInstancetype.Spec.Template.Spec.Domain.Resources.Limits.Memory().IsZero() { newMemoryLimit := vmi.Spec.Domain.Resources.Limits.Memory().DeepCopy() newMemoryLimit.Add(*memoryDelta) patchSet.AddOption( patch.WithTest("/spec/domain/resources/limits/memory", vmi.Spec.Domain.Resources.Limits.Memory().String()), patch.WithReplace("/spec/domain/resources/limits/memory", newMemoryLimit.String()), ) logMsg = fmt.Sprintf("%s, setting limits to %s", logMsg, newMemoryLimit.String()) } patchBytes, err := patchSet.GeneratePayload() if err != nil { return err } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err != nil { return err } log.Log.Object(vmi).Infof(logMsg) return nil }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2017, 2018 Red Hat, Inc. * */ package vmi import ( "context" "errors" "fmt" "maps" "sort" "strings" "time" k8sv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" k8sfield "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/utils/trace" virtv1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" backendstorage "kubevirt.io/kubevirt/pkg/storage/backend-storage" storagetypes "kubevirt.io/kubevirt/pkg/storage/types" "kubevirt.io/kubevirt/pkg/util" "kubevirt.io/kubevirt/pkg/util/hardware" "kubevirt.io/kubevirt/pkg/util/migrations" traceUtils "kubevirt.io/kubevirt/pkg/util/trace" virtconfig "kubevirt.io/kubevirt/pkg/virt-config" "kubevirt.io/kubevirt/pkg/virt-controller/services" "kubevirt.io/kubevirt/pkg/virt-controller/watch/common" "kubevirt.io/kubevirt/pkg/virt-controller/watch/descheduler" "kubevirt.io/kubevirt/pkg/virt-controller/watch/topology" "kubevirt.io/kubevirt/pkg/virt-controller/watch/vsock" ) const ( deleteNotifFailed = "Failed to process delete notification" tombstoneGetObjectErrFmt = "couldn't get object from tombstone %+v" ) func NewController(templateService services.TemplateService, vmiInformer cache.SharedIndexInformer, vmInformer cache.SharedIndexInformer, podInformer cache.SharedIndexInformer, pvcInformer cache.SharedIndexInformer, migrationInformer cache.SharedIndexInformer, storageClassInformer cache.SharedIndexInformer, recorder record.EventRecorder, clientset kubecli.KubevirtClient, dataVolumeInformer cache.SharedIndexInformer, storageProfileInformer cache.SharedIndexInformer, cdiInformer cache.SharedIndexInformer, cdiConfigInformer cache.SharedIndexInformer, clusterConfig *virtconfig.ClusterConfig, topologyHinter topology.Hinter, netAnnotationsGenerator annotationsGenerator, netStatusUpdater statusUpdater, netSpecValidator specValidator, ) (*Controller, error) { c := &Controller{ templateService: templateService, Queue: workqueue.NewTypedRateLimitingQueueWithConfig[string]( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "virt-controller-vmi"}, ), vmiIndexer: vmiInformer.GetIndexer(), vmStore: vmInformer.GetStore(), podIndexer: podInformer.GetIndexer(), pvcIndexer: pvcInformer.GetIndexer(), migrationIndexer: migrationInformer.GetIndexer(), recorder: recorder, clientset: clientset, podExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), vmiExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), pvcExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), dataVolumeIndexer: dataVolumeInformer.GetIndexer(), cdiStore: cdiInformer.GetStore(), cdiConfigStore: cdiConfigInformer.GetStore(), clusterConfig: clusterConfig, topologyHinter: topologyHinter, cidsMap: vsock.NewCIDsMap(), backendStorage: backendstorage.NewBackendStorage(clientset, clusterConfig, storageClassInformer.GetStore(), storageProfileInformer.GetStore(), pvcInformer.GetIndexer()), netAnnotationsGenerator: netAnnotationsGenerator, updateNetworkStatus: netStatusUpdater, validateNetworkSpec: netSpecValidator, } c.hasSynced = func() bool { return vmInformer.HasSynced() && vmiInformer.HasSynced() && podInformer.HasSynced() && dataVolumeInformer.HasSynced() && cdiConfigInformer.HasSynced() && cdiInformer.HasSynced() && pvcInformer.HasSynced() && storageClassInformer.HasSynced() && storageProfileInformer.HasSynced() } _, err := vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addVirtualMachineInstance, DeleteFunc: c.deleteVirtualMachineInstance, UpdateFunc: c.updateVirtualMachineInstance, }) if err != nil { return nil, err } _, err = podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPod, DeleteFunc: c.deletePod, UpdateFunc: c.updatePod, }) if err != nil { return nil, err } _, err = dataVolumeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addDataVolume, DeleteFunc: c.deleteDataVolume, UpdateFunc: c.updateDataVolume, }) if err != nil { return nil, err } _, err = pvcInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addPVC, UpdateFunc: c.updatePVC, }) if err != nil { return nil, err } return c, nil } type informalSyncError struct { err error reason string } func (i informalSyncError) Error() string { return i.err.Error() } func (i informalSyncError) Reason() string { return i.reason } func (i informalSyncError) RequiresRequeue() bool { return false } type annotationsGenerator interface { GenerateFromActivePod(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) map[string]string } type statusUpdater func(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error type specValidator func(*k8sfield.Path, *virtv1.VirtualMachineInstanceSpec, *virtconfig.ClusterConfig) []v1.StatusCause type Controller struct { templateService services.TemplateService clientset kubecli.KubevirtClient Queue workqueue.TypedRateLimitingInterface[string] vmiIndexer cache.Indexer vmStore cache.Store podIndexer cache.Indexer pvcIndexer cache.Indexer migrationIndexer cache.Indexer topologyHinter topology.Hinter recorder record.EventRecorder podExpectations *controller.UIDTrackingControllerExpectations vmiExpectations *controller.UIDTrackingControllerExpectations pvcExpectations *controller.UIDTrackingControllerExpectations dataVolumeIndexer cache.Indexer cdiStore cache.Store cdiConfigStore cache.Store clusterConfig *virtconfig.ClusterConfig cidsMap vsock.Allocator backendStorage *backendstorage.BackendStorage hasSynced func() bool netAnnotationsGenerator annotationsGenerator updateNetworkStatus statusUpdater validateNetworkSpec specValidator } func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { defer controller.HandlePanic() defer c.Queue.ShutDown() log.Log.Info("Starting vmi controller.") // Wait for cache sync before we start the pod controller cache.WaitForCacheSync(stopCh, c.hasSynced) // Sync the CIDs from exist VMIs var vmis []*virtv1.VirtualMachineInstance for _, obj := range c.vmiIndexer.List() { vmi := obj.(*virtv1.VirtualMachineInstance) vmis = append(vmis, vmi) } c.cidsMap.Sync(vmis) // Start the actual work for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh log.Log.Info("Stopping vmi controller.") } func (c *Controller) runWorker() { for c.Execute() { } } var virtControllerVMIWorkQueueTracer = &traceUtils.Tracer{Threshold: time.Second} func (c *Controller) Execute() bool { key, quit := c.Queue.Get() if quit { return false } virtControllerVMIWorkQueueTracer.StartTrace(key, "virt-controller VMI workqueue", trace.Field{Key: "Workqueue Key", Value: key}) defer virtControllerVMIWorkQueueTracer.StopTrace(key) defer c.Queue.Done(key) err := c.execute(key) if err != nil { log.Log.Reason(err).Infof("reenqueuing VirtualMachineInstance %v", key) c.Queue.AddRateLimited(key) } else { log.Log.V(4).Infof("processed VirtualMachineInstance %v", key) c.Queue.Forget(key) } return true } func (c *Controller) execute(key string) error { // Fetch the latest Vm state from cache obj, exists, err := c.vmiIndexer.GetByKey(key) if err != nil { return err } // Once all finalizers are removed the vmi gets deleted and we can clean all expectations if !exists { c.podExpectations.DeleteExpectations(key) c.vmiExpectations.DeleteExpectations(key) c.cidsMap.Remove(key) return nil } vmi := obj.(*virtv1.VirtualMachineInstance) logger := log.Log.Object(vmi) // this must be first step in execution. Writing the object // when api version changes ensures our api stored version is updated. if !controller.ObservedLatestApiVersionAnnotation(vmi) { vmi := vmi.DeepCopy() controller.SetLatestApiVersionAnnotation(vmi) key := controller.VirtualMachineInstanceKey(vmi) c.vmiExpectations.SetExpectations(key, 1, 0) _, err = c.clientset.VirtualMachineInstance(vmi.ObjectMeta.Namespace).Update(context.Background(), vmi, v1.UpdateOptions{}) if err != nil { c.vmiExpectations.LowerExpectations(key, 1, 0) return err } return nil } // If needsSync is true (expectations fulfilled) we can make save assumptions if virt-handler or virt-controller owns the pod needsSync := c.podExpectations.SatisfiedExpectations(key) && c.vmiExpectations.SatisfiedExpectations(key) && c.pvcExpectations.SatisfiedExpectations(key) if !needsSync { return nil } // Only consider pods which belong to this vmi // excluding unfinalized migration targets from this list. pod, err := controller.CurrentVMIPod(vmi, c.podIndexer) if err != nil { logger.Reason(err).Error("Failed to fetch pods for namespace from cache.") return err } // Get all dataVolumes associated with this vmi dataVolumes, err := storagetypes.ListDataVolumesFromVolumes(vmi.Namespace, vmi.Spec.Volumes, c.dataVolumeIndexer, c.pvcIndexer) if err != nil { logger.Reason(err).Error("Failed to fetch dataVolumes for namespace from cache.") return err } syncErr, pod := c.sync(vmi, pod, dataVolumes) err = c.updateStatus(vmi, pod, dataVolumes, syncErr) if err != nil { return err } if syncErr != nil && syncErr.RequiresRequeue() { return syncErr } return nil } // These "dynamic" labels are Pod labels which may diverge from the VMI over time that we want to keep in sync. func (c *Controller) syncDynamicLabelsToPod(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { patchSet := patch.New() dynamicLabels := []string{ virtv1.NodeNameLabel, virtv1.OutdatedLauncherImageLabel, } podMeta := pod.ObjectMeta.DeepCopy() if podMeta.Labels == nil { podMeta.Labels = map[string]string{} } changed := false for _, key := range dynamicLabels { vmiVal, vmiLabelExists := vmi.Labels[key] podVal, podLabelExists := podMeta.Labels[key] if vmiLabelExists == podLabelExists && vmiVal == podVal { continue } changed = true if !vmiLabelExists { delete(podMeta.Labels, key) } else { podMeta.Labels[key] = vmiVal } } if !changed { return nil } if pod.ObjectMeta.Labels == nil { patchSet.AddOption(patch.WithAdd("/metadata/labels", podMeta.Labels)) } else { patchSet.AddOption( patch.WithTest("/metadata/labels", pod.ObjectMeta.Labels), patch.WithReplace("/metadata/labels", podMeta.Labels), ) } if patchSet.IsEmpty() { return nil } patchBytes, err := patchSet.GeneratePayload() if err != nil { return err } if _, err := c.clientset.CoreV1().Pods(pod.Namespace).Patch(context.Background(), pod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}); err != nil { log.Log.Object(pod).Errorf("failed to sync dynamic pod labels during sync: %v", err) return err } return nil } func (c *Controller) syncPodAnnotations(pod *k8sv1.Pod, newAnnotations map[string]string) (*k8sv1.Pod, error) { patchSet := patch.New() for key, newValue := range newAnnotations { if podAnnotationValue, keyExist := pod.Annotations[key]; !keyExist || podAnnotationValue != newValue { patchSet.AddOption( patch.WithAdd(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(key)), newValue), ) } } if patchSet.IsEmpty() { return pod, nil } patchBytes, err := patchSet.GeneratePayload() if err != nil { return pod, fmt.Errorf("failed to generate patch payload: %w", err) } patchedPod, err := c.clientset.CoreV1().Pods(pod.Namespace).Patch(context.Background(), pod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) if err != nil { log.Log.Object(pod).Errorf("failed to sync pod annotations during sync: %v", err) return nil, err } return patchedPod, nil } func (c *Controller) setLauncherContainerInfo(vmi *virtv1.VirtualMachineInstance, curPodImage string) *virtv1.VirtualMachineInstance { if curPodImage != "" && curPodImage != c.templateService.GetLauncherImage() { if vmi.Labels == nil { vmi.Labels = map[string]string{} } vmi.Labels[virtv1.OutdatedLauncherImageLabel] = "" } else { if vmi.Labels != nil { delete(vmi.Labels, virtv1.OutdatedLauncherImageLabel) } } vmi.Status.LauncherContainerImageVersion = curPodImage return vmi } func (c *Controller) hasOwnerVM(vmi *virtv1.VirtualMachineInstance) bool { controllerRef := v1.GetControllerOf(vmi) if controllerRef == nil || controllerRef.Kind != virtv1.VirtualMachineGroupVersionKind.Kind { return false } obj, exists, _ := c.vmStore.GetByKey(controller.NamespacedKey(vmi.Namespace, controllerRef.Name)) if !exists { return false } ownerVM := obj.(*virtv1.VirtualMachine) return controllerRef.UID == ownerVM.UID } func (c *Controller) updateStatus(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod, dataVolumes []*cdiv1.DataVolume, syncErr common.SyncError) error { key := controller.VirtualMachineInstanceKey(vmi) defer virtControllerVMIWorkQueueTracer.StepTrace(key, "updateStatus", trace.Field{Key: "VMI Name", Value: vmi.Name}) hasFailedDataVolume := storagetypes.HasFailedDataVolumes(dataVolumes) hasWffcDataVolume := false // there is no reason to check for waitForFirstConsumer is there are failed DV's if !hasFailedDataVolume { hasWffcDataVolume = storagetypes.HasWFFCDataVolumes(dataVolumes) } conditionManager := controller.NewVirtualMachineInstanceConditionManager() podConditionManager := controller.NewPodConditionManager() vmiCopy := vmi.DeepCopy() vmiPodExists := controller.PodExists(pod) && !isTempPod(pod) tempPodExists := controller.PodExists(pod) && isTempPod(pod) vmiCopy, err := c.setActivePods(vmiCopy) if err != nil { return fmt.Errorf("Error detecting vmi pods: %v", err) } c.syncReadyConditionFromPod(vmiCopy, pod) if vmiPodExists { var foundImage string for _, container := range pod.Spec.Containers { if container.Name == "compute" { foundImage = container.Image break } } vmiCopy = c.setLauncherContainerInfo(vmiCopy, foundImage) if err := c.syncPausedConditionToPod(vmiCopy, pod); err != nil { return fmt.Errorf("error syncing paused condition to pod: %v", err) } if err := c.syncDynamicLabelsToPod(vmiCopy, pod); err != nil { return fmt.Errorf("error syncing labels to pod: %v", err) } } c.aggregateDataVolumesConditions(vmiCopy, dataVolumes) if pvc := backendstorage.PVCForVMI(c.pvcIndexer, vmi); pvc != nil { c.backendStorage.UpdateVolumeStatus(vmiCopy, pvc) } switch { case vmi.IsUnprocessed(): if vmiPodExists { vmiCopy.Status.Phase = virtv1.Scheduling } else if vmi.DeletionTimestamp != nil || hasFailedDataVolume { vmiCopy.Status.Phase = virtv1.Failed } else { vmiCopy.Status.Phase = virtv1.Pending if vmi.Status.TopologyHints == nil { if topologyHints, tscRequirement, err := c.topologyHinter.TopologyHintsForVMI(vmi); err != nil && tscRequirement == topology.RequiredForBoot { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedGatherhingClusterTopologyHints, err.Error()) return common.NewSyncError(err, controller.FailedGatherhingClusterTopologyHints) } else if topologyHints != nil { vmiCopy.Status.TopologyHints = topologyHints } } if hasWffcDataVolume { condition := virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceProvisioning, Status: k8sv1.ConditionTrue, } if !conditionManager.HasCondition(vmiCopy, condition.Type) { vmiCopy.Status.Conditions = append(vmiCopy.Status.Conditions, condition) } if tempPodExists { // Add PodScheduled False condition to the VM if podConditionManager.HasConditionWithStatus(pod, k8sv1.PodScheduled, k8sv1.ConditionFalse) { conditionManager.AddPodCondition(vmiCopy, podConditionManager.GetCondition(pod, k8sv1.PodScheduled)) } else if conditionManager.HasCondition(vmiCopy, virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled)) { // Remove PodScheduling condition from the VM conditionManager.RemoveCondition(vmiCopy, virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled)) } if controller.IsPodFailedOrGoingDown(pod) { vmiCopy.Status.Phase = virtv1.Failed } } } if syncErr != nil && (syncErr.Reason() == controller.FailedPvcNotFoundReason) { condition := virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled), Reason: k8sv1.PodReasonUnschedulable, Message: syncErr.Error(), Status: k8sv1.ConditionFalse, } cm := controller.NewVirtualMachineInstanceConditionManager() if cm.HasCondition(vmiCopy, condition.Type) { cm.RemoveCondition(vmiCopy, condition.Type) } vmiCopy.Status.Conditions = append(vmiCopy.Status.Conditions, condition) } } case vmi.IsScheduling(): // Remove InstanceProvisioning condition from the VM if conditionManager.HasCondition(vmiCopy, virtv1.VirtualMachineInstanceProvisioning) { conditionManager.RemoveCondition(vmiCopy, virtv1.VirtualMachineInstanceProvisioning) } if vmiPodExists { // ensure that the QOS class on the VMI matches to Pods QOS class if pod.Status.QOSClass == "" { vmiCopy.Status.QOSClass = nil } else { vmiCopy.Status.QOSClass = &pod.Status.QOSClass } // Add PodScheduled False condition to the VM if podConditionManager.HasConditionWithStatus(pod, k8sv1.PodScheduled, k8sv1.ConditionFalse) { conditionManager.AddPodCondition(vmiCopy, podConditionManager.GetCondition(pod, k8sv1.PodScheduled)) } else if conditionManager.HasCondition(vmiCopy, virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled)) { // Remove PodScheduling condition from the VM conditionManager.RemoveCondition(vmiCopy, virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled)) } if imageErr := checkForContainerImageError(pod); imageErr != nil { // only overwrite syncErr if imageErr != nil syncErr = imageErr } if controller.IsPodReady(pod) && vmi.DeletionTimestamp == nil { // fail vmi creation if CPU pinning has been requested but the Pod QOS is not Guaranteed podQosClass := pod.Status.QOSClass if podQosClass != k8sv1.PodQOSGuaranteed && vmi.IsCPUDedicated() { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedGuaranteePodResourcesReason, "failed to guarantee pod resources") syncErr = common.NewSyncError(fmt.Errorf("failed to guarantee pod resources"), controller.FailedGuaranteePodResourcesReason) break } // Initialize the volume status field with information // about the PVCs that the VMI is consuming. This prevents // virt-handler from needing to make API calls to GET the pvc // during reconcile if err := c.updateVolumeStatus(vmiCopy, pod); err != nil { return err } if err := c.updateNetworkStatus(vmiCopy, pod); err != nil { log.Log.Errorf("failed to update the interface status: %v", err) } // vmi is still owned by the controller but pod is already ready, // so let's hand over the vmi too vmiCopy.Status.Phase = virtv1.Scheduled if vmiCopy.Labels == nil { vmiCopy.Labels = map[string]string{} } vmiCopy.ObjectMeta.Labels[virtv1.NodeNameLabel] = pod.Spec.NodeName vmiCopy.Status.NodeName = pod.Spec.NodeName // Set the VMI migration transport now before the VMI can be migrated // This status field is needed to support the migration of legacy virt-launchers // to newer ones. In an absence of this field on the vmi, the target launcher // will set up a TCP proxy, as expected by a legacy virt-launcher. if shouldSetMigrationTransport(pod) { vmiCopy.Status.MigrationTransport = virtv1.MigrationTransportUnix } // Allocate the CID if VSOCK is enabled. if util.IsAutoAttachVSOCK(vmiCopy) { if err := c.cidsMap.Allocate(vmiCopy); err != nil { return err } } } else if controller.IsPodDownOrGoingDown(pod) { vmiCopy.Status.Phase = virtv1.Failed } } else { // someone other than the controller deleted the pod unexpectedly vmiCopy.Status.Phase = virtv1.Failed } case vmi.IsFinal(): allDeleted, err := c.allPodsDeleted(vmi) if err != nil { return err } if allDeleted { log.Log.V(3).Object(vmi).Infof("All pods have been deleted, removing finalizer") controller.RemoveFinalizer(vmiCopy, virtv1.VirtualMachineInstanceFinalizer) if vmiCopy.Labels != nil { delete(vmiCopy.Labels, virtv1.OutdatedLauncherImageLabel) } vmiCopy.Status.LauncherContainerImageVersion = "" } if !c.hasOwnerVM(vmi) && len(vmiCopy.Finalizers) > 0 { // if there's no owner VM around still, then remove the VM controller's finalizer if it exists controller.RemoveFinalizer(vmiCopy, virtv1.VirtualMachineControllerFinalizer) } case vmi.IsRunning(): if !vmiPodExists { vmiCopy.Status.Phase = virtv1.Failed break } if err := c.updateVolumeStatus(vmiCopy, pod); err != nil { return err } if err := c.updateNetworkStatus(vmiCopy, pod); err != nil { log.Log.Errorf("failed to update the interface status: %v", err) } if c.requireCPUHotplug(vmiCopy) { c.syncHotplugCondition(vmiCopy, virtv1.VirtualMachineInstanceVCPUChange) } if c.requireMemoryHotplug(vmiCopy) { c.syncMemoryHotplug(vmiCopy) } if c.requireVolumesUpdate(vmiCopy) { c.syncVolumesUpdate(vmiCopy) } case vmi.IsScheduled(): if !vmiPodExists { vmiCopy.Status.Phase = virtv1.Failed } default: return fmt.Errorf("unknown vmi phase %v", vmi.Status.Phase) } // VMI is owned by virt-handler, so patch instead of update if vmi.IsRunning() || vmi.IsScheduled() { patchSet := prepareVMIPatch(vmi, vmiCopy) if patchSet.IsEmpty() { return nil } patchBytes, err := patchSet.GeneratePayload() if err != nil { return fmt.Errorf("error preparing VMI patch: %v", err) } _, err = c.clientset.VirtualMachineInstance(vmi.Namespace).Patch(context.Background(), vmi.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) // We could not retry if the "test" fails but we have no sane way to detect that right now: https://github.com/kubernetes/kubernetes/issues/68202 for details // So just retry like with any other errors if err != nil { return fmt.Errorf("patching of vmi conditions and activePods failed: %v", err) } return nil } reason := "" if syncErr != nil { reason = syncErr.Reason() } conditionManager.CheckFailure(vmiCopy, syncErr, reason) controller.SetVMIPhaseTransitionTimestamp(vmi, vmiCopy) // If we detect a change on the vmi we update the vmi vmiChanged := !equality.Semantic.DeepEqual(vmi.Status, vmiCopy.Status) || !equality.Semantic.DeepEqual(vmi.Finalizers, vmiCopy.Finalizers) || !equality.Semantic.DeepEqual(vmi.Annotations, vmiCopy.Annotations) || !equality.Semantic.DeepEqual(vmi.Labels, vmiCopy.Labels) if vmiChanged { key := controller.VirtualMachineInstanceKey(vmi) c.vmiExpectations.SetExpectations(key, 1, 0) _, err := c.clientset.VirtualMachineInstance(vmi.Namespace).Update(context.Background(), vmiCopy, v1.UpdateOptions{}) if err != nil { c.vmiExpectations.LowerExpectations(key, 1, 0) return err } } return nil } func preparePodPatch(oldPod, newPod *k8sv1.Pod) *patch.PatchSet { podConditions := controller.NewPodConditionManager() if podConditions.ConditionsEqual(oldPod, newPod) { return patch.New() } return patch.New( patch.WithTest("/status/conditions", oldPod.Status.Conditions), patch.WithReplace("/status/conditions", newPod.Status.Conditions), ) } func prepareVMIPatch(oldVMI, newVMI *virtv1.VirtualMachineInstance) *patch.PatchSet { patchSet := patch.New() if !equality.Semantic.DeepEqual(newVMI.Status.VolumeStatus, oldVMI.Status.VolumeStatus) { // VolumeStatus changed which means either removed or added volumes. if oldVMI.Status.VolumeStatus == nil { patchSet.AddOption(patch.WithAdd("/status/volumeStatus", newVMI.Status.VolumeStatus)) } else { patchSet.AddOption( patch.WithTest("/status/volumeStatus", oldVMI.Status.VolumeStatus), patch.WithReplace("/status/volumeStatus", newVMI.Status.VolumeStatus), ) } log.Log.V(3).Object(oldVMI).Infof("Patching Volume Status") } // We don't own the object anymore, so patch instead of update vmiConditions := controller.NewVirtualMachineInstanceConditionManager() if !vmiConditions.ConditionsEqual(oldVMI, newVMI) { patchSet.AddOption( patch.WithTest("/status/conditions", oldVMI.Status.Conditions), patch.WithReplace("/status/conditions", newVMI.Status.Conditions), ) log.Log.V(3).Object(oldVMI).Infof("Patching VMI conditions") } if !equality.Semantic.DeepEqual(newVMI.Status.ActivePods, oldVMI.Status.ActivePods) { patchSet.AddOption( patch.WithTest("/status/activePods", oldVMI.Status.ActivePods), patch.WithReplace("/status/activePods", newVMI.Status.ActivePods), ) log.Log.V(3).Object(oldVMI).Infof("Patching VMI activePods") } if newVMI.Status.Phase != oldVMI.Status.Phase { patchSet.AddOption( patch.WithTest("/status/phase", oldVMI.Status.Phase), patch.WithReplace("/status/phase", newVMI.Status.Phase), ) log.Log.V(3).Object(oldVMI).Infof("Patching VMI phase") } if newVMI.Status.LauncherContainerImageVersion != oldVMI.Status.LauncherContainerImageVersion { if oldVMI.Status.LauncherContainerImageVersion == "" { patchSet.AddOption(patch.WithAdd("/status/launcherContainerImageVersion", newVMI.Status.LauncherContainerImageVersion)) } else { patchSet.AddOption( patch.WithTest("/status/launcherContainerImageVersion", oldVMI.Status.LauncherContainerImageVersion), patch.WithReplace("/status/launcherContainerImageVersion", newVMI.Status.LauncherContainerImageVersion), ) } } if !equality.Semantic.DeepEqual(oldVMI.Labels, newVMI.Labels) { if oldVMI.Labels == nil { patchSet.AddOption(patch.WithAdd("/metadata/labels", newVMI.Labels)) } else { patchSet.AddOption( patch.WithTest("/metadata/labels", oldVMI.Labels), patch.WithReplace("/metadata/labels", newVMI.Labels), ) } } if !equality.Semantic.DeepEqual(newVMI.Status.Interfaces, oldVMI.Status.Interfaces) { patchSet.AddOption( patch.WithTest("/status/interfaces", oldVMI.Status.Interfaces), patch.WithAdd("/status/interfaces", newVMI.Status.Interfaces), ) log.Log.V(3).Object(oldVMI).Infof("Patching Interface Status") } return patchSet } func (c *Controller) syncReadyConditionFromPod(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) { vmiConditions := controller.NewVirtualMachineInstanceConditionManager() podConditions := controller.NewPodConditionManager() now := v1.Now() if pod == nil || isTempPod(pod) { vmiConditions.UpdateCondition(vmi, &virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceReady, Status: k8sv1.ConditionFalse, Reason: virtv1.PodNotExistsReason, Message: "virt-launcher pod has not yet been scheduled", LastProbeTime: now, LastTransitionTime: now, }) } else if controller.IsPodDownOrGoingDown(pod) { vmiConditions.UpdateCondition(vmi, &virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceReady, Status: k8sv1.ConditionFalse, Reason: virtv1.PodTerminatingReason, Message: "virt-launcher pod is terminating", LastProbeTime: now, LastTransitionTime: now, }) } else if !vmi.IsRunning() { vmiConditions.UpdateCondition(vmi, &virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceReady, Status: k8sv1.ConditionFalse, Reason: virtv1.GuestNotRunningReason, Message: "Guest VM is not reported as running", LastProbeTime: now, LastTransitionTime: now, }) } else if podReadyCond := podConditions.GetCondition(pod, k8sv1.PodReady); podReadyCond != nil { vmiConditions.UpdateCondition(vmi, &virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceReady, Status: podReadyCond.Status, Reason: podReadyCond.Reason, Message: podReadyCond.Message, LastProbeTime: podReadyCond.LastProbeTime, LastTransitionTime: podReadyCond.LastTransitionTime, }) } else { vmiConditions.UpdateCondition(vmi, &virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceReady, Status: k8sv1.ConditionFalse, Reason: virtv1.PodConditionMissingReason, Message: "virt-launcher pod is missing the Ready condition", LastProbeTime: now, LastTransitionTime: now, }) } } func (c *Controller) syncPausedConditionToPod(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { vmiConditions := controller.NewVirtualMachineInstanceConditionManager() podConditions := controller.NewPodConditionManager() podCopy := pod.DeepCopy() now := v1.Now() if vmiConditions.HasConditionWithStatus(vmi, virtv1.VirtualMachineInstancePaused, k8sv1.ConditionTrue) { if podConditions.HasConditionWithStatus(pod, virtv1.VirtualMachineUnpaused, k8sv1.ConditionTrue) { podConditions.UpdateCondition(podCopy, &k8sv1.PodCondition{ Type: virtv1.VirtualMachineUnpaused, Status: k8sv1.ConditionFalse, Reason: "Paused", Message: "the virtual machine is paused", LastProbeTime: now, LastTransitionTime: now, }) } } else { if !podConditions.HasConditionWithStatus(pod, virtv1.VirtualMachineUnpaused, k8sv1.ConditionTrue) { podConditions.UpdateCondition(podCopy, &k8sv1.PodCondition{ Type: virtv1.VirtualMachineUnpaused, Status: k8sv1.ConditionTrue, Reason: "NotPaused", Message: "the virtual machine is not paused", LastProbeTime: now, LastTransitionTime: now, }) } } // Patch pod patchSet := preparePodPatch(pod, podCopy) if patchSet.IsEmpty() { return nil } patchBytes, err := patchSet.GeneratePayload() if err != nil { return fmt.Errorf("error preparing pod patch: %v", err) } log.Log.V(3).Object(pod).Infof("Patching pod conditions") _, err = c.clientset.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}, "status") // We could not retry if the "test" fails but we have no sane way to detect that right now: // https://github.com/kubernetes/kubernetes/issues/68202 for details // So just retry like with any other errors if err != nil { log.Log.Object(pod).Errorf("Patching of pod conditions failed: %v", err) return fmt.Errorf("patching of pod conditions failed: %v", err) } return nil } // checkForContainerImageError checks if an error has occured while handling the image of any of the pod's containers // (including init containers), and returns a syncErr with the details of the error, or nil otherwise. func checkForContainerImageError(pod *k8sv1.Pod) common.SyncError { containerStatuses := append(append([]k8sv1.ContainerStatus{}, pod.Status.InitContainerStatuses...), pod.Status.ContainerStatuses...) for _, containerStatus := range containerStatuses { if containerStatus.State.Waiting == nil { continue } reason := containerStatus.State.Waiting.Reason if reason == controller.ErrImagePullReason || reason == controller.ImagePullBackOffReason { return common.NewSyncError(fmt.Errorf(containerStatus.State.Waiting.Message), reason) } } return nil } func (c *Controller) sync(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod, dataVolumes []*cdiv1.DataVolume) (common.SyncError, *k8sv1.Pod) { key := controller.VirtualMachineInstanceKey(vmi) defer virtControllerVMIWorkQueueTracer.StepTrace(key, "sync", trace.Field{Key: "VMI Name", Value: vmi.Name}) if vmi.DeletionTimestamp != nil { err := c.deleteAllMatchingPods(vmi) if err != nil { return common.NewSyncError(fmt.Errorf("failed to delete pod: %v", err), controller.FailedDeletePodReason), pod } return nil, pod } if vmi.IsFinal() { err := c.deleteAllAttachmentPods(vmi) if err != nil { return common.NewSyncError(fmt.Errorf("failed to delete attachment pods: %v", err), controller.FailedHotplugSyncReason), pod } return nil, pod } if err := c.deleteOrphanedAttachmentPods(vmi); err != nil { log.Log.Reason(err).Errorf("failed to delete orphaned attachment pods %s: %v", controller.VirtualMachineInstanceKey(vmi), err) // do not return; just log the error } dataVolumesReady, isWaitForFirstConsumer, syncErr := c.handleSyncDataVolumes(vmi, dataVolumes) if syncErr != nil { return syncErr, pod } if !controller.PodExists(pod) { // If we came ever that far to detect that we already created a pod, we don't create it again if !vmi.IsUnprocessed() { return nil, pod } // let's check if we already have topology hints or if we are still waiting for them if vmi.Status.TopologyHints == nil && c.topologyHinter.IsTscFrequencyRequired(vmi) { log.Log.V(3).Object(vmi).Infof("Delaying pod creation until topology hints are set") return nil, pod } // ensure that all dataVolumes associated with the VMI are ready before creating the pod if !dataVolumesReady { log.Log.V(3).Object(vmi).Infof("Delaying pod creation while DataVolume populates or while we wait for PVCs to appear.") return nil, pod } // ensure the VMI doesn't have an unfinished migration before creating the pod activeMigration, err := migrations.ActiveMigrationExistsForVMI(c.migrationIndexer, vmi) if err != nil { return common.NewSyncError(err, controller.FailedCreatePodReason), pod } if activeMigration { log.Log.V(3).Object(vmi).Infof("Delaying pod creation because an active migration exists for the VMI.") // We still need to return an error to ensure the VMI gets re-enqueued return common.NewSyncError(fmt.Errorf("active migration exists"), controller.FailedCreatePodReason), pod } backendStoragePVCName, syncErr := c.handleBackendStorage(vmi) if syncErr != nil { return syncErr, pod } // If a backend-storage PVC was just created but not yet seen by the informer, give it time if !c.pvcExpectations.SatisfiedExpectations(key) { return nil, pod } backendStorageReady, err := c.backendStorage.IsPVCReady(vmi, backendStoragePVCName) if err != nil { return common.NewSyncError(err, controller.FailedBackendStorageProbeReason), pod } if !backendStorageReady { log.Log.V(2).Object(vmi).Infof("Delaying pod creation while backend storage populates.") return common.NewSyncError(fmt.Errorf("PVC pending"), controller.BackendStorageNotReadyReason), pod } var templatePod *k8sv1.Pod if isWaitForFirstConsumer { log.Log.V(3).Object(vmi).Infof("Scheduling temporary pod for WaitForFirstConsumer DV") templatePod, err = c.templateService.RenderLaunchManifestNoVm(vmi) } else { templatePod, err = c.templateService.RenderLaunchManifest(vmi) } if _, ok := err.(storagetypes.PvcNotFoundError); ok { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedPvcNotFoundReason, services.FailedToRenderLaunchManifestErrFormat, err) return &informalSyncError{fmt.Errorf(services.FailedToRenderLaunchManifestErrFormat, err), controller.FailedPvcNotFoundReason}, pod } else if err != nil { return common.NewSyncError(fmt.Errorf(services.FailedToRenderLaunchManifestErrFormat, err), controller.FailedCreatePodReason), pod } var validateErrors []error for _, cause := range c.validateNetworkSpec(k8sfield.NewPath("spec"), &vmi.Spec, c.clusterConfig) { validateErrors = append(validateErrors, errors.New(cause.String())) } if validateErr := errors.Join(validateErrors...); validateErrors != nil { return common.NewSyncError(fmt.Errorf("failed create validation: %v", validateErr), "FailedCreateValidation"), pod } vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), templatePod, v1.CreateOptions{}) if k8serrors.IsForbidden(err) && strings.Contains(err.Error(), "violates PodSecurity") { psaErr := fmt.Errorf("failed to create pod for vmi %s/%s, it needs a privileged namespace to run: %w", vmi.GetNamespace(), vmi.GetName(), err) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, services.FailedToRenderLaunchManifestErrFormat, psaErr) c.podExpectations.CreationObserved(vmiKey) return common.NewSyncError(psaErr, controller.FailedCreatePodReason), nil } if err != nil { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, "Error creating pod: %v", err) c.podExpectations.CreationObserved(vmiKey) return common.NewSyncError(fmt.Errorf("failed to create virtual machine pod: %v", err), controller.FailedCreatePodReason), nil } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulCreatePodReason, "Created virtual machine pod %s", pod.Name) return nil, pod } if !isWaitForFirstConsumer { err := c.cleanupWaitForFirstConsumerTemporaryPods(vmi, pod) if err != nil { return common.NewSyncError(fmt.Errorf("failed to clean up temporary pods: %v", err), controller.FailedHotplugSyncReason), pod } } if !isTempPod(pod) && controller.IsPodReady(pod) { newAnnotations := map[string]string{descheduler.EvictOnlyAnnotation: ""} maps.Copy(newAnnotations, c.netAnnotationsGenerator.GenerateFromActivePod(vmi, pod)) patchedPod, err := c.syncPodAnnotations(pod, newAnnotations) if err != nil { return common.NewSyncError(err, controller.FailedPodPatchReason), pod } pod = patchedPod hotplugVolumes := controller.GetHotplugVolumes(vmi, pod) hotplugAttachmentPods, err := controller.AttachmentPods(pod, c.podIndexer) if err != nil { return common.NewSyncError(fmt.Errorf("failed to get attachment pods: %v", err), controller.FailedHotplugSyncReason), pod } if pod.DeletionTimestamp == nil && c.needsHandleHotplug(hotplugVolumes, hotplugAttachmentPods) { var hotplugSyncErr common.SyncError = nil hotplugSyncErr = c.handleHotplugVolumes(hotplugVolumes, hotplugAttachmentPods, vmi, pod, dataVolumes) if hotplugSyncErr != nil { if hotplugSyncErr.Reason() == controller.MissingAttachmentPodReason { // We are missing an essential hotplug pod. Delete all pods associated with the VMI. if err := c.deleteAllMatchingPods(vmi); err != nil { log.Log.Warningf("failed to deleted VMI %s pods: %v", vmi.GetUID(), err) } } else { return hotplugSyncErr, pod } } } } return nil, pod } func (c *Controller) handleBackendStorage(vmi *virtv1.VirtualMachineInstance) (string, common.SyncError) { key, err := controller.KeyFunc(vmi) if err != nil { return "", common.NewSyncError(err, controller.FailedBackendStorageCreateReason) } if !backendstorage.IsBackendStorageNeededForVMI(&vmi.Spec) { return "", nil } pvc := backendstorage.PVCForVMI(c.pvcIndexer, vmi) if pvc == nil { c.pvcExpectations.ExpectCreations(key, 1) if pvc, err = c.backendStorage.CreatePVCForVMI(vmi); err != nil { c.pvcExpectations.CreationObserved(key) return "", common.NewSyncError(err, controller.FailedBackendStorageCreateReason) } } return pvc.Name, nil } func (c *Controller) handleSyncDataVolumes(vmi *virtv1.VirtualMachineInstance, dataVolumes []*cdiv1.DataVolume) (bool, bool, common.SyncError) { ready := true wffc := false for _, volume := range vmi.Spec.Volumes { // Check both DVs and PVCs if volume.VolumeSource.DataVolume != nil || volume.VolumeSource.PersistentVolumeClaim != nil { volumeReady, volumeWffc, err := storagetypes.VolumeReadyToAttachToNode(vmi.Namespace, volume, dataVolumes, c.dataVolumeIndexer, c.pvcIndexer) if err != nil { if _, ok := err.(storagetypes.PvcNotFoundError); ok { // due to the eventually consistent nature of controllers, CDI or users may need some time to actually crate the PVC. // We wait for them to appear. c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.FailedPvcNotFoundReason, "PVC %s/%s does not exist, waiting for it to appear", vmi.Namespace, storagetypes.PVCNameFromVirtVolume(&volume)) return false, false, &informalSyncError{err: fmt.Errorf("PVC %s/%s does not exist, waiting for it to appear", vmi.Namespace, storagetypes.PVCNameFromVirtVolume(&volume)), reason: controller.FailedPvcNotFoundReason} } else { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedPvcNotFoundReason, "Error determining if volume is ready: %v", err) return false, false, common.NewSyncError(fmt.Errorf("Error determining if volume is ready %v", err), controller.FailedDataVolumeImportReason) } } wffc = wffc || volumeWffc // Ready only becomes false if WFFC is also false. ready = ready && (volumeReady || volumeWffc) } } return ready, wffc, nil } func (c *Controller) addPVC(obj interface{}) { pvc := obj.(*k8sv1.PersistentVolumeClaim) if pvc.DeletionTimestamp != nil { return } persistentStateFor, exists := pvc.Labels[backendstorage.PVCPrefix] if exists { vmiKey := controller.NamespacedKey(pvc.Namespace, persistentStateFor) c.pvcExpectations.CreationObserved(vmiKey) c.Queue.Add(vmiKey) return // The PVC is a backend-storage PVC, won't be listed by `c.listVMIsMatchingDV()` } vmis, err := c.listVMIsMatchingDV(pvc.Namespace, pvc.Name) if err != nil { return } for _, vmi := range vmis { log.Log.V(4).Object(pvc).Infof("PVC created for vmi %s", vmi.Name) c.enqueueVirtualMachine(vmi) } } func (c *Controller) updatePVC(old, cur interface{}) { curPVC := cur.(*k8sv1.PersistentVolumeClaim) oldPVC := old.(*k8sv1.PersistentVolumeClaim) if curPVC.ResourceVersion == oldPVC.ResourceVersion { // Periodic resync will send update events for all known PVCs. // Two different versions of the same PVC will always // have different RVs. return } if curPVC.DeletionTimestamp != nil { return } if equality.Semantic.DeepEqual(curPVC.Status.Capacity, oldPVC.Status.Capacity) { // We only do something when the capacity changes return } vmis, err := c.listVMIsMatchingDV(curPVC.Namespace, curPVC.Name) if err != nil { log.Log.Object(curPVC).Errorf("Error encountered getting VMIs for DataVolume: %v", err) return } for _, vmi := range vmis { log.Log.V(4).Object(curPVC).Infof("PVC updated for vmi %s", vmi.Name) c.enqueueVirtualMachine(vmi) } } func (c *Controller) addDataVolume(obj interface{}) { dataVolume := obj.(*cdiv1.DataVolume) if dataVolume.DeletionTimestamp != nil { c.deleteDataVolume(dataVolume) return } vmis, err := c.listVMIsMatchingDV(dataVolume.Namespace, dataVolume.Name) if err != nil { return } for _, vmi := range vmis { log.Log.V(4).Object(dataVolume).Infof("DataVolume created for vmi %s", vmi.Name) c.enqueueVirtualMachine(vmi) } } func (c *Controller) updateDataVolume(old, cur interface{}) { curDataVolume := cur.(*cdiv1.DataVolume) oldDataVolume := old.(*cdiv1.DataVolume) if curDataVolume.ResourceVersion == oldDataVolume.ResourceVersion { // Periodic resync will send update events for all known DataVolumes. // Two different versions of the same dataVolume will always // have different RVs. return } if curDataVolume.DeletionTimestamp != nil { labelChanged := !equality.Semantic.DeepEqual(curDataVolume.Labels, oldDataVolume.Labels) // having a DataVOlume marked for deletion is enough // to count as a deletion expectation c.deleteDataVolume(curDataVolume) if labelChanged { // we don't need to check the oldDataVolume.DeletionTimestamp // because DeletionTimestamp cannot be unset. c.deleteDataVolume(oldDataVolume) } return } vmis, err := c.listVMIsMatchingDV(curDataVolume.Namespace, curDataVolume.Name) if err != nil { log.Log.Object(curDataVolume).Errorf("Error encountered during datavolume update: %v", err) return } for _, vmi := range vmis { log.Log.V(4).Object(curDataVolume).Infof("DataVolume updated for vmi %s", vmi.Name) c.enqueueVirtualMachine(vmi) } } func (c *Controller) deleteDataVolume(obj interface{}) { dataVolume, ok := obj.(*cdiv1.DataVolume) // When a delete is dropped, the relist will notice a dataVolume in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the dataVolume // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf(tombstoneGetObjectErrFmt, obj)).Error(deleteNotifFailed) return } dataVolume, ok = tombstone.Obj.(*cdiv1.DataVolume) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a dataVolume %#v", obj)).Error(deleteNotifFailed) return } } vmis, err := c.listVMIsMatchingDV(dataVolume.Namespace, dataVolume.Name) if err != nil { return } for _, vmi := range vmis { log.Log.V(4).Object(dataVolume).Infof("DataVolume deleted for vmi %s", vmi.Name) c.enqueueVirtualMachine(vmi) } } // When a pod is created, enqueue the vmi that manages it and update its podExpectations. func (c *Controller) addPod(obj interface{}) { pod := obj.(*k8sv1.Pod) if pod.DeletionTimestamp != nil { // on a restart of the controller manager, it's possible a new pod shows up in a state that // is already pending deletion. Prevent the pod from being a creation observation. c.deletePod(pod) return } controllerRef := controller.GetControllerOf(pod) vmi := c.resolveControllerRef(pod.Namespace, controllerRef) if vmi == nil { return } vmiKey, err := controller.KeyFunc(vmi) if err != nil { return } log.Log.V(4).Object(pod).Infof("Pod created") c.podExpectations.CreationObserved(vmiKey) c.enqueueVirtualMachine(vmi) } // When a pod is updated, figure out what vmi/s manage it and wake them // up. If the labels of the pod have changed we need to awaken both the old // and new vmi. old and cur must be *v1.Pod types. func (c *Controller) updatePod(old, cur interface{}) { curPod := cur.(*k8sv1.Pod) oldPod := old.(*k8sv1.Pod) if curPod.ResourceVersion == oldPod.ResourceVersion { // Periodic resync will send update events for all known pods. // Two different versions of the same pod will always have different RVs. return } if curPod.DeletionTimestamp != nil { labelChanged := !equality.Semantic.DeepEqual(curPod.Labels, oldPod.Labels) // having a pod marked for deletion is enough to count as a deletion expectation c.deletePod(curPod) if labelChanged { // we don't need to check the oldPod.DeletionTimestamp because DeletionTimestamp cannot be unset. c.deletePod(oldPod) } return } curControllerRef := controller.GetControllerOf(curPod) oldControllerRef := controller.GetControllerOf(oldPod) controllerRefChanged := !equality.Semantic.DeepEqual(curControllerRef, oldControllerRef) if controllerRefChanged { // The ControllerRef was changed. Sync the old controller, if any. if vmi := c.resolveControllerRef(oldPod.Namespace, oldControllerRef); vmi != nil { c.enqueueVirtualMachine(vmi) } } vmi := c.resolveControllerRef(curPod.Namespace, curControllerRef) if vmi == nil { return } log.Log.V(4).Object(curPod).Infof("Pod updated") c.enqueueVirtualMachine(vmi) } // When a pod is deleted, enqueue the vmi that manages the pod and update its podExpectations. // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. func (c *Controller) deletePod(obj interface{}) { pod, ok := obj.(*k8sv1.Pod) // When a delete is dropped, the relist will notice a pod in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the pod // changed labels the new vmi will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf(tombstoneGetObjectErrFmt, obj)).Error(deleteNotifFailed) return } pod, ok = tombstone.Obj.(*k8sv1.Pod) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a pod %#v", obj)).Error(deleteNotifFailed) return } } controllerRef := controller.GetControllerOf(pod) vmi := c.resolveControllerRef(pod.Namespace, controllerRef) if vmi == nil { return } vmiKey, err := controller.KeyFunc(vmi) if err != nil { return } c.podExpectations.DeletionObserved(vmiKey, controller.PodKey(pod)) c.enqueueVirtualMachine(vmi) } func (c *Controller) addVirtualMachineInstance(obj interface{}) { c.lowerVMIExpectation(obj) c.enqueueVirtualMachine(obj) } func (c *Controller) deleteVirtualMachineInstance(obj interface{}) { vmi, ok := obj.(*virtv1.VirtualMachineInstance) // When a delete is dropped, the relist will notice a vmi in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { log.Log.Reason(fmt.Errorf(tombstoneGetObjectErrFmt, obj)).Error(deleteNotifFailed) return } vmi, ok = tombstone.Obj.(*virtv1.VirtualMachineInstance) if !ok { log.Log.Reason(fmt.Errorf("tombstone contained object that is not a vmi %#v", obj)).Error(deleteNotifFailed) return } } c.lowerVMIExpectation(vmi) c.enqueueVirtualMachine(vmi) } func (c *Controller) updateVirtualMachineInstance(_, curr interface{}) { c.lowerVMIExpectation(curr) c.enqueueVirtualMachine(curr) } func (c *Controller) lowerVMIExpectation(curr interface{}) { key, err := controller.KeyFunc(curr) if err != nil { return } c.vmiExpectations.LowerExpectations(key, 1, 0) } func (c *Controller) enqueueVirtualMachine(obj interface{}) { logger := log.Log vmi := obj.(*virtv1.VirtualMachineInstance) key, err := controller.KeyFunc(vmi) if err != nil { logger.Object(vmi).Reason(err).Error("Failed to extract key from virtualmachine.") return } c.Queue.Add(key) } // resolveControllerRef returns the controller referenced by a ControllerRef, // or nil if the ControllerRef could not be resolved to a matching controller // of the correct Kind. func (c *Controller) resolveControllerRef(namespace string, controllerRef *v1.OwnerReference) *virtv1.VirtualMachineInstance { if controllerRef != nil && controllerRef.Kind == "Pod" { // This could be an attachment pod, look up the pod, and check if it is owned by a VMI. obj, exists, err := c.podIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } pod, _ := obj.(*k8sv1.Pod) controllerRef = controller.GetControllerOf(pod) } // We can't look up by UID, so look up by Name and then verify UID. // Don't even try to look up by Name if it is nil or the wrong Kind. if controllerRef == nil || controllerRef.Kind != virtv1.VirtualMachineInstanceGroupVersionKind.Kind { return nil } vmi, exists, err := c.vmiIndexer.GetByKey(controller.NamespacedKey(namespace, controllerRef.Name)) if err != nil { return nil } if !exists { return nil } if vmi.(*virtv1.VirtualMachineInstance).UID != controllerRef.UID { // The controller we found with this Name is not the same one that the // ControllerRef points to. return nil } return vmi.(*virtv1.VirtualMachineInstance) } func (c *Controller) listVMIsMatchingDV(namespace string, dvName string) ([]*virtv1.VirtualMachineInstance, error) { // TODO - refactor if/when dv/pvc do not have the same name vmis := []*virtv1.VirtualMachineInstance{} for _, indexName := range []string{"dv", "pvc"} { objs, err := c.vmiIndexer.ByIndex(indexName, namespace+"/"+dvName) if err != nil { return nil, err } for _, obj := range objs { vmi := obj.(*virtv1.VirtualMachineInstance) vmis = append(vmis, vmi.DeepCopy()) } } return vmis, nil } func (c *Controller) allPodsDeleted(vmi *virtv1.VirtualMachineInstance) (bool, error) { pods, err := c.listPodsFromNamespace(vmi.Namespace) if err != nil { return false, err } for _, pod := range pods { if controller.IsControlledBy(pod, vmi) { return false, nil } } return true, nil } func (c *Controller) deleteAllMatchingPods(vmi *virtv1.VirtualMachineInstance) error { pods, err := c.listPodsFromNamespace(vmi.Namespace) if err != nil { return err } vmiKey := controller.VirtualMachineInstanceKey(vmi) for _, pod := range pods { if pod.DeletionTimestamp != nil { continue } if !controller.IsControlledBy(pod, vmi) { continue } c.podExpectations.ExpectDeletions(vmiKey, []string{controller.PodKey(pod)}) err := c.clientset.CoreV1().Pods(vmi.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) if err != nil { c.podExpectations.DeletionObserved(vmiKey, controller.PodKey(pod)) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedDeletePodReason, "Failed to delete virtual machine pod %s", pod.Name) return err } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulDeletePodReason, "Deleted virtual machine pod %s", pod.Name) } return nil } // listPodsFromNamespace takes a namespace and returns all Pods from the pod cache which run in this namespace func (c *Controller) listPodsFromNamespace(namespace string) ([]*k8sv1.Pod, error) { objs, err := c.podIndexer.ByIndex(cache.NamespaceIndex, namespace) if err != nil { return nil, err } pods := []*k8sv1.Pod{} for _, obj := range objs { pod := obj.(*k8sv1.Pod) pods = append(pods, pod) } return pods, nil } func (c *Controller) setActivePods(vmi *virtv1.VirtualMachineInstance) (*virtv1.VirtualMachineInstance, error) { pods, err := c.listPodsFromNamespace(vmi.Namespace) if err != nil { return nil, err } activePods := make(map[types.UID]string) count := 0 for _, pod := range pods { if !controller.IsControlledBy(pod, vmi) { continue } count++ activePods[pod.UID] = pod.Spec.NodeName } if count == 0 && vmi.Status.ActivePods == nil { return vmi, nil } vmi.Status.ActivePods = activePods return vmi, nil } func isTempPod(pod *k8sv1.Pod) bool { _, ok := pod.Annotations[virtv1.EphemeralProvisioningObject] return ok } func shouldSetMigrationTransport(pod *k8sv1.Pod) bool { _, ok := pod.Annotations[virtv1.MigrationTransportUnixAnnotation] return ok } func (c *Controller) cleanupWaitForFirstConsumerTemporaryPods(vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod) error { triggerPods, err := c.waitForFirstConsumerTemporaryPods(vmi, virtLauncherPod) if err != nil { return err } return c.deleteRunningOrFinishedWFFCPods(vmi, triggerPods...) } func (c *Controller) deleteRunningOrFinishedWFFCPods(vmi *virtv1.VirtualMachineInstance, pods ...*k8sv1.Pod) error { for _, pod := range pods { err := c.deleteRunningFinishedOrFailedPod(vmi, pod) if err != nil && !k8serrors.IsNotFound(err) { c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedDeletePodReason, "Failed to delete WaitForFirstConsumer temporary pod %s", pod.Name) return err } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulDeletePodReason, "Deleted WaitForFirstConsumer temporary pod %s", pod.Name) } return nil } func (c *Controller) deleteRunningFinishedOrFailedPod(vmi *virtv1.VirtualMachineInstance, pod *k8sv1.Pod) error { zero := int64(0) if pod.Status.Phase == k8sv1.PodRunning || pod.Status.Phase == k8sv1.PodSucceeded || pod.Status.Phase == k8sv1.PodFailed { vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectDeletions(vmiKey, []string{controller.PodKey(pod)}) err := c.clientset.CoreV1().Pods(pod.GetNamespace()).Delete(context.Background(), pod.Name, v1.DeleteOptions{ GracePeriodSeconds: &zero, }) if err != nil { c.podExpectations.DeletionObserved(vmiKey, controller.PodKey(pod)) return err } } return nil } func (c *Controller) waitForFirstConsumerTemporaryPods(vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod) ([]*k8sv1.Pod, error) { var temporaryPods []*k8sv1.Pod // Get all pods from the namespace pods, err := c.listPodsFromNamespace(vmi.Namespace) if err != nil { return temporaryPods, err } for _, pod := range pods { // Cleanup candidates are temporary pods that are either controlled by the VMI or the virt launcher pod if !isTempPod(pod) { continue } if controller.IsControlledBy(pod, vmi) { temporaryPods = append(temporaryPods, pod) } if ownerRef := controller.GetControllerOf(pod); ownerRef != nil && ownerRef.UID == virtLauncherPod.UID { temporaryPods = append(temporaryPods, pod) } } return temporaryPods, nil } func (c *Controller) needsHandleHotplug(hotplugVolumes []*virtv1.Volume, hotplugAttachmentPods []*k8sv1.Pod) bool { if len(hotplugAttachmentPods) > 1 { return true } // Determine if the ready volumes have changed compared to the current pod if len(hotplugAttachmentPods) == 1 && c.podVolumesMatchesReadyVolumes(hotplugAttachmentPods[0], hotplugVolumes) { return false } return len(hotplugVolumes) > 0 || len(hotplugAttachmentPods) > 0 } func (c *Controller) getActiveAndOldAttachmentPods(readyHotplugVolumes []*virtv1.Volume, hotplugAttachmentPods []*k8sv1.Pod) (*k8sv1.Pod, []*k8sv1.Pod) { var currentPod *k8sv1.Pod oldPods := make([]*k8sv1.Pod, 0) for _, attachmentPod := range hotplugAttachmentPods { if !c.podVolumesMatchesReadyVolumes(attachmentPod, readyHotplugVolumes) { oldPods = append(oldPods, attachmentPod) } else { currentPod = attachmentPod } } sort.Slice(oldPods, func(i, j int) bool { return oldPods[i].CreationTimestamp.Time.After(oldPods[j].CreationTimestamp.Time) }) return currentPod, oldPods } // cleanupAttachmentPods deletes all old attachment pods when the following is true // 1. There is a currentPod that is running. (not nil and phase.Status == Running) // 2. There are no readyVolumes (numReadyVolumes == 0) // 3. The newest oldPod is not running and not marked for deletion. // If any of those are true, it will not delete the newest oldPod, since that one is the latest // pod that is closest to the desired state. func (c *Controller) cleanupAttachmentPods(currentPod *k8sv1.Pod, oldPods []*k8sv1.Pod, vmi *virtv1.VirtualMachineInstance, numReadyVolumes int) common.SyncError { foundRunning := false for _, attachmentPod := range oldPods { if !foundRunning && attachmentPod.Status.Phase == k8sv1.PodRunning && attachmentPod.DeletionTimestamp == nil && numReadyVolumes > 0 && (currentPod == nil || currentPod.Status.Phase != k8sv1.PodRunning) { foundRunning = true continue } if err := c.deleteAttachmentPodForVolume(vmi, attachmentPod); err != nil { return common.NewSyncError(fmt.Errorf("Error deleting attachment pod %v", err), controller.FailedDeletePodReason) } } return nil } func hasPendingPods(pods []*k8sv1.Pod) bool { for _, pod := range pods { if pod.Status.Phase == k8sv1.PodRunning || pod.Status.Phase == k8sv1.PodSucceeded || pod.Status.Phase == k8sv1.PodFailed { continue } return true } return false } func (c *Controller) requeueAfter(oldPods []*k8sv1.Pod, threshold time.Duration) (bool, time.Duration) { if len(oldPods) > 0 && oldPods[0].CreationTimestamp.Time.After(time.Now().Add(-1*threshold)) { return true, threshold - time.Since(oldPods[0].CreationTimestamp.Time) } return false, 0 } func (c *Controller) handleHotplugVolumes(hotplugVolumes []*virtv1.Volume, hotplugAttachmentPods []*k8sv1.Pod, vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod, dataVolumes []*cdiv1.DataVolume) common.SyncError { logger := log.Log.Object(vmi) readyHotplugVolumes := make([]*virtv1.Volume, 0) // Find all ready volumes for _, volume := range hotplugVolumes { var err error ready, wffc, err := storagetypes.VolumeReadyToAttachToNode(vmi.Namespace, *volume, dataVolumes, c.dataVolumeIndexer, c.pvcIndexer) if err != nil { return common.NewSyncError(fmt.Errorf("Error determining volume status %v", err), controller.PVCNotReadyReason) } if wffc { // Volume in WaitForFirstConsumer, it has not been populated by CDI yet. create a dummy pod logger.V(1).Infof("Volume %s/%s is in WaitForFistConsumer, triggering population", vmi.Namespace, volume.Name) syncError := c.triggerHotplugPopulation(volume, vmi, virtLauncherPod) if syncError != nil { return syncError } continue } if !ready { // Volume not ready, skip until it is. logger.V(3).Infof("Skipping hotplugged volume: %s, not ready", volume.Name) continue } readyHotplugVolumes = append(readyHotplugVolumes, volume) } currentPod, oldPods := c.getActiveAndOldAttachmentPods(readyHotplugVolumes, hotplugAttachmentPods) if currentPod == nil && !hasPendingPods(oldPods) && len(readyHotplugVolumes) > 0 { if rateLimited, waitTime := c.requeueAfter(oldPods, time.Duration(len(readyHotplugVolumes)/-10)); rateLimited { key, err := controller.KeyFunc(vmi) if err != nil { logger.Object(vmi).Reason(err).Error("failed to extract key from virtualmachine.") return common.NewSyncError(fmt.Errorf("failed to extract key from virtualmachine. %v", err), controller.FailedHotplugSyncReason) } c.Queue.AddAfter(key, waitTime) } else { if newPod, err := c.createAttachmentPod(vmi, virtLauncherPod, readyHotplugVolumes); err != nil { return err } else { currentPod = newPod } } } if err := c.cleanupAttachmentPods(currentPod, oldPods, vmi, len(readyHotplugVolumes)); err != nil { return err } return nil } func (c *Controller) podVolumesMatchesReadyVolumes(attachmentPod *k8sv1.Pod, volumes []*virtv1.Volume) bool { // -2 for empty dir and token if len(attachmentPod.Spec.Volumes)-2 != len(volumes) { return false } podVolumeMap := make(map[string]k8sv1.Volume) for _, volume := range attachmentPod.Spec.Volumes { if volume.PersistentVolumeClaim != nil { podVolumeMap[volume.Name] = volume } } for _, volume := range volumes { delete(podVolumeMap, volume.Name) } return len(podVolumeMap) == 0 } func (c *Controller) createAttachmentPod(vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod, volumes []*virtv1.Volume) (*k8sv1.Pod, common.SyncError) { attachmentPodTemplate, _ := c.createAttachmentPodTemplate(vmi, virtLauncherPod, volumes) if attachmentPodTemplate == nil { return nil, nil } vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) pod, err := c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), attachmentPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(vmiKey) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, "Error creating attachment pod: %v", err) return nil, common.NewSyncError(fmt.Errorf("Error creating attachment pod %v", err), controller.FailedCreatePodReason) } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulCreatePodReason, "Created attachment pod %s", pod.Name) return pod, nil } func (c *Controller) triggerHotplugPopulation(volume *virtv1.Volume, vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod) common.SyncError { populateHotplugPodTemplate, err := c.createAttachmentPopulateTriggerPodTemplate(volume, virtLauncherPod, vmi) if err != nil { return common.NewSyncError(fmt.Errorf("Error creating trigger pod template %v", err), controller.FailedCreatePodReason) } if populateHotplugPodTemplate != nil { // nil means the PVC is not populated yet. vmiKey := controller.VirtualMachineInstanceKey(vmi) c.podExpectations.ExpectCreations(vmiKey, 1) _, err = c.clientset.CoreV1().Pods(vmi.GetNamespace()).Create(context.Background(), populateHotplugPodTemplate, v1.CreateOptions{}) if err != nil { c.podExpectations.CreationObserved(vmiKey) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedCreatePodReason, "Error creating hotplug population trigger pod for volume %s: %v", volume.Name, err) return common.NewSyncError(fmt.Errorf("Error creating hotplug population trigger pod %v", err), controller.FailedCreatePodReason) } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulCreatePodReason, "Created hotplug trigger pod for volume %s", volume.Name) } return nil } func (c *Controller) volumeStatusContainsVolumeAndPod(volumeStatus []virtv1.VolumeStatus, volume *virtv1.Volume) bool { for _, status := range volumeStatus { if status.Name == volume.Name && status.HotplugVolume != nil && status.HotplugVolume.AttachPodName != "" { return true } } return false } func (c *Controller) getNewHotplugVolumes(hotplugAttachmentPods []*k8sv1.Pod, hotplugVolumes []*virtv1.Volume) []*virtv1.Volume { var newVolumes []*virtv1.Volume hotplugVolumeMap := make(map[string]*virtv1.Volume) for _, volume := range hotplugVolumes { hotplugVolumeMap[volume.Name] = volume } // Remove all the volumes that we have a pod for. for _, pod := range hotplugAttachmentPods { for _, volume := range pod.Spec.Volumes { delete(hotplugVolumeMap, volume.Name) } } // Any remaining volumes are new. for _, v := range hotplugVolumeMap { newVolumes = append(newVolumes, v) } return newVolumes } func (c *Controller) getDeletedHotplugVolumes(hotplugPods []*k8sv1.Pod, hotplugVolumes []*virtv1.Volume) []k8sv1.Volume { var deletedVolumes []k8sv1.Volume hotplugVolumeMap := make(map[string]*virtv1.Volume) for _, volume := range hotplugVolumes { hotplugVolumeMap[volume.Name] = volume } for _, pod := range hotplugPods { for _, volume := range pod.Spec.Volumes { if _, ok := hotplugVolumeMap[volume.Name]; !ok && volume.PersistentVolumeClaim != nil { deletedVolumes = append(deletedVolumes, volume) } } } return deletedVolumes } func (c *Controller) deleteAttachmentPodForVolume(vmi *virtv1.VirtualMachineInstance, attachmentPod *k8sv1.Pod) error { vmiKey := controller.VirtualMachineInstanceKey(vmi) zero := int64(0) if attachmentPod.DeletionTimestamp != nil { return nil } c.podExpectations.ExpectDeletions(vmiKey, []string{controller.PodKey(attachmentPod)}) err := c.clientset.CoreV1().Pods(attachmentPod.GetNamespace()).Delete(context.Background(), attachmentPod.Name, v1.DeleteOptions{ GracePeriodSeconds: &zero, }) if err != nil { c.podExpectations.DeletionObserved(vmiKey, controller.PodKey(attachmentPod)) c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, controller.FailedDeletePodReason, "Failed to delete attachment pod %s", attachmentPod.Name) return err } c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, controller.SuccessfulDeletePodReason, "Deleted attachment pod %s", attachmentPod.Name) return nil } func (c *Controller) createAttachmentPodTemplate(vmi *virtv1.VirtualMachineInstance, virtlauncherPod *k8sv1.Pod, volumes []*virtv1.Volume) (*k8sv1.Pod, error) { logger := log.Log.Object(vmi) var pod *k8sv1.Pod var err error volumeNamesPVCMap, err := storagetypes.VirtVolumesToPVCMap(volumes, c.pvcIndexer, virtlauncherPod.Namespace) if err != nil { return nil, fmt.Errorf("failed to get PVC map: %v", err) } for volumeName, pvc := range volumeNamesPVCMap { //Verify the PVC is ready to be used. populated, err := cdiv1.IsSucceededOrPendingPopulation(pvc, func(name, namespace string) (*cdiv1.DataVolume, error) { dv, exists, _ := c.dataVolumeIndexer.GetByKey(fmt.Sprintf("%s/%s", namespace, name)) if !exists { return nil, fmt.Errorf("unable to find datavolume %s/%s", namespace, name) } return dv.(*cdiv1.DataVolume), nil }) if err != nil { return nil, err } if !populated { logger.Infof("Unable to hotplug, claim %s found, but not ready", pvc.Name) delete(volumeNamesPVCMap, volumeName) } } if len(volumeNamesPVCMap) > 0 { pod, err = c.templateService.RenderHotplugAttachmentPodTemplate(volumes, virtlauncherPod, vmi, volumeNamesPVCMap) } return pod, err } func (c *Controller) createAttachmentPopulateTriggerPodTemplate(volume *virtv1.Volume, virtlauncherPod *k8sv1.Pod, vmi *virtv1.VirtualMachineInstance) (*k8sv1.Pod, error) { claimName := storagetypes.PVCNameFromVirtVolume(volume) if claimName == "" { return nil, errors.New("Unable to hotplug, claim not PVC or Datavolume") } pvc, exists, isBlock, err := storagetypes.IsPVCBlockFromStore(c.pvcIndexer, virtlauncherPod.Namespace, claimName) if err != nil { return nil, err } if !exists { return nil, fmt.Errorf("Unable to trigger hotplug population, claim %s not found", claimName) } pod, err := c.templateService.RenderHotplugAttachmentTriggerPodTemplate(volume, virtlauncherPod, vmi, pvc.Name, isBlock, true) return pod, err } func (c *Controller) deleteAllAttachmentPods(vmi *virtv1.VirtualMachineInstance) error { virtlauncherPod, err := controller.CurrentVMIPod(vmi, c.podIndexer) if err != nil { return err } if virtlauncherPod != nil { attachmentPods, err := controller.AttachmentPods(virtlauncherPod, c.podIndexer) if err != nil { return err } for _, attachmentPod := range attachmentPods { err := c.deleteAttachmentPodForVolume(vmi, attachmentPod) if err != nil && !k8serrors.IsNotFound(err) { return err } } } return nil } func (c *Controller) deleteOrphanedAttachmentPods(vmi *virtv1.VirtualMachineInstance) error { pods, err := c.listPodsFromNamespace(vmi.Namespace) if err != nil { return fmt.Errorf("failed to list pods from namespace %s: %v", vmi.Namespace, err) } for _, pod := range pods { if !controller.IsControlledBy(pod, vmi) { continue } if !controller.PodIsDown(pod) { continue } attachmentPods, err := controller.AttachmentPods(pod, c.podIndexer) if err != nil { log.Log.Reason(err).Errorf("failed to get attachment pods %s: %v", controller.PodKey(pod), err) // do not return; continue the cleanup... continue } for _, attachmentPod := range attachmentPods { if err := c.deleteAttachmentPodForVolume(vmi, attachmentPod); err != nil { log.Log.Reason(err).Errorf("failed to delete attachment pod %s: %v", controller.PodKey(attachmentPod), err) // do not return; continue the cleanup... } } } return nil } func (c *Controller) updateVolumeStatus(vmi *virtv1.VirtualMachineInstance, virtlauncherPod *k8sv1.Pod) error { oldStatus := vmi.Status.DeepCopy().VolumeStatus oldStatusMap := make(map[string]virtv1.VolumeStatus) for _, status := range oldStatus { oldStatusMap[status.Name] = status } hotplugVolumes := controller.GetHotplugVolumes(vmi, virtlauncherPod) hotplugVolumesMap := make(map[string]*virtv1.Volume) for _, volume := range hotplugVolumes { hotplugVolumesMap[volume.Name] = volume } attachmentPods, err := controller.AttachmentPods(virtlauncherPod, c.podIndexer) if err != nil { return err } attachmentPod, _ := c.getActiveAndOldAttachmentPods(hotplugVolumes, attachmentPods) newStatus := make([]virtv1.VolumeStatus, 0) backendStoragePVC := backendstorage.PVCForVMI(c.pvcIndexer, vmi) if backendStoragePVC != nil { if backendStorage, ok := oldStatusMap[backendStoragePVC.Name]; ok { newStatus = append(newStatus, backendStorage) } } for i, volume := range vmi.Spec.Volumes { status := virtv1.VolumeStatus{} if _, ok := oldStatusMap[volume.Name]; ok { // Already have the status, modify if needed status = oldStatusMap[volume.Name] } else { status.Name = volume.Name } // Remove from map so I can detect existing volumes that have been removed from spec. delete(oldStatusMap, volume.Name) if _, ok := hotplugVolumesMap[volume.Name]; ok { // Hotplugged volume if status.HotplugVolume == nil { status.HotplugVolume = &virtv1.HotplugVolumeStatus{} } if volume.MemoryDump != nil && status.MemoryDumpVolume == nil { status.MemoryDumpVolume = &virtv1.DomainMemoryDumpInfo{ ClaimName: volume.Name, } } if attachmentPod == nil { if !c.volumeReady(status.Phase) { status.HotplugVolume.AttachPodUID = "" // Volume is not hotplugged in VM and Pod is gone, or hasn't been created yet, check for the PVC associated with the volume to set phase and message phase, reason, message := c.getVolumePhaseMessageReason(&vmi.Spec.Volumes[i], vmi.Namespace) status.Phase = phase status.Message = message status.Reason = reason } } else { status.HotplugVolume.AttachPodName = attachmentPod.Name if len(attachmentPod.Status.ContainerStatuses) == 1 && attachmentPod.Status.ContainerStatuses[0].Ready { status.HotplugVolume.AttachPodUID = attachmentPod.UID } else { // Remove UID of old pod if a new one is available, but not yet ready status.HotplugVolume.AttachPodUID = "" } if c.canMoveToAttachedPhase(status.Phase) { status.Phase = virtv1.HotplugVolumeAttachedToNode status.Message = fmt.Sprintf("Created hotplug attachment pod %s, for volume %s", attachmentPod.Name, volume.Name) status.Reason = controller.SuccessfulCreatePodReason c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, status.Reason, status.Message) } } } if volume.VolumeSource.PersistentVolumeClaim != nil || volume.VolumeSource.DataVolume != nil || volume.VolumeSource.MemoryDump != nil { pvcName := storagetypes.PVCNameFromVirtVolume(&volume) pvcInterface, pvcExists, _ := c.pvcIndexer.GetByKey(fmt.Sprintf("%s/%s", vmi.Namespace, pvcName)) if pvcExists { pvc := pvcInterface.(*k8sv1.PersistentVolumeClaim) status.PersistentVolumeClaimInfo = &virtv1.PersistentVolumeClaimInfo{ ClaimName: pvc.Name, AccessModes: pvc.Spec.AccessModes, VolumeMode: pvc.Spec.VolumeMode, Capacity: pvc.Status.Capacity, Requests: pvc.Spec.Resources.Requests, Preallocated: storagetypes.IsPreallocated(pvc.ObjectMeta.Annotations), } filesystemOverhead, err := c.getFilesystemOverhead(pvc) if err != nil { log.Log.Reason(err).Errorf("Failed to get filesystem overhead for PVC %s/%s", vmi.Namespace, pvcName) return err } status.PersistentVolumeClaimInfo.FilesystemOverhead = &filesystemOverhead } } newStatus = append(newStatus, status) } // We have updated the status of current volumes, but if a volume was removed, we want to keep that status, until there is no // associated pod, then remove it. Any statuses left in the map are statuses without a matching volume in the spec. for k, v := range oldStatusMap { attachmentPod := c.findAttachmentPodByVolumeName(k, attachmentPods) if attachmentPod != nil { v.HotplugVolume.AttachPodName = attachmentPod.Name v.HotplugVolume.AttachPodUID = attachmentPod.UID v.Phase = virtv1.HotplugVolumeDetaching if attachmentPod.DeletionTimestamp != nil { v.Message = fmt.Sprintf("Deleted hotplug attachment pod %s, for volume %s", attachmentPod.Name, k) v.Reason = controller.SuccessfulDeletePodReason c.recorder.Eventf(vmi, k8sv1.EventTypeNormal, v.Reason, v.Message) } // If the pod exists, we keep the status. newStatus = append(newStatus, v) } } sort.SliceStable(newStatus, func(i, j int) bool { return strings.Compare(newStatus[i].Name, newStatus[j].Name) == -1 }) vmi.Status.VolumeStatus = newStatus return nil } func (c *Controller) volumeReady(phase virtv1.VolumePhase) bool { return phase == virtv1.VolumeReady } func (c *Controller) getFilesystemOverhead(pvc *k8sv1.PersistentVolumeClaim) (virtv1.Percent, error) { // To avoid conflicts, we only allow having one CDI instance if cdiInstances := len(c.cdiStore.List()); cdiInstances != 1 { if cdiInstances > 1 { log.Log.V(3).Object(pvc).Reason(storagetypes.ErrMultipleCdiInstances).Infof(storagetypes.FSOverheadMsg) } else { log.Log.V(3).Object(pvc).Reason(storagetypes.ErrFailedToFindCdi).Infof(storagetypes.FSOverheadMsg) } return storagetypes.DefaultFSOverhead, nil } cdiConfigInterface, cdiConfigExists, err := c.cdiConfigStore.GetByKey(storagetypes.ConfigName) if !cdiConfigExists || err != nil { return "0", fmt.Errorf("Failed to find CDIConfig but CDI exists: %w", err) } cdiConfig, ok := cdiConfigInterface.(*cdiv1.CDIConfig) if !ok { return "0", fmt.Errorf("Failed to convert CDIConfig object %v to type CDIConfig", cdiConfigInterface) } return storagetypes.GetFilesystemOverhead(pvc.Spec.VolumeMode, pvc.Spec.StorageClassName, cdiConfig) } func (c *Controller) canMoveToAttachedPhase(currentPhase virtv1.VolumePhase) bool { return currentPhase == "" || currentPhase == virtv1.VolumeBound || currentPhase == virtv1.VolumePending } func (c *Controller) findAttachmentPodByVolumeName(volumeName string, attachmentPods []*k8sv1.Pod) *k8sv1.Pod { for _, pod := range attachmentPods { for _, podVolume := range pod.Spec.Volumes { if podVolume.Name == volumeName { return pod } } } return nil } func (c *Controller) getVolumePhaseMessageReason(volume *virtv1.Volume, namespace string) (virtv1.VolumePhase, string, string) { claimName := storagetypes.PVCNameFromVirtVolume(volume) pvcInterface, pvcExists, _ := c.pvcIndexer.GetByKey(fmt.Sprintf("%s/%s", namespace, claimName)) if !pvcExists { return virtv1.VolumePending, controller.FailedPvcNotFoundReason, "Unable to determine PVC name" } pvc := pvcInterface.(*k8sv1.PersistentVolumeClaim) if pvc.Status.Phase == k8sv1.ClaimPending { return virtv1.VolumePending, controller.PVCNotReadyReason, "PVC is in phase ClaimPending" } else if pvc.Status.Phase == k8sv1.ClaimBound { return virtv1.VolumeBound, controller.PVCNotReadyReason, "PVC is in phase Bound" } return virtv1.VolumePending, controller.PVCNotReadyReason, "PVC is in phase Lost" } func (c *Controller) syncHotplugCondition(vmi *virtv1.VirtualMachineInstance, conditionType virtv1.VirtualMachineInstanceConditionType) { vmiConditions := controller.NewVirtualMachineInstanceConditionManager() condition := virtv1.VirtualMachineInstanceCondition{ Type: conditionType, Status: k8sv1.ConditionTrue, } if !vmiConditions.HasCondition(vmi, condition.Type) { vmiConditions.UpdateCondition(vmi, &condition) log.Log.Object(vmi).V(4).Infof("adding hotplug condition %s", conditionType) } } func (c *Controller) requireCPUHotplug(vmi *virtv1.VirtualMachineInstance) bool { if vmi.Status.CurrentCPUTopology == nil || vmi.Spec.Domain.CPU == nil || vmi.Spec.Domain.CPU.MaxSockets == 0 { return false } cpuTopoLogyFromStatus := &virtv1.CPU{ Cores: vmi.Status.CurrentCPUTopology.Cores, Sockets: vmi.Status.CurrentCPUTopology.Sockets, Threads: vmi.Status.CurrentCPUTopology.Threads, } return hardware.GetNumberOfVCPUs(vmi.Spec.Domain.CPU) != hardware.GetNumberOfVCPUs(cpuTopoLogyFromStatus) } func (c *Controller) requireMemoryHotplug(vmi *virtv1.VirtualMachineInstance) bool { if vmi.Status.Memory == nil || vmi.Spec.Domain.Memory == nil || vmi.Spec.Domain.Memory.Guest == nil || vmi.Spec.Domain.Memory.MaxGuest == nil { return false } return vmi.Spec.Domain.Memory.Guest.Value() != vmi.Status.Memory.GuestRequested.Value() } func (c *Controller) syncMemoryHotplug(vmi *virtv1.VirtualMachineInstance) { c.syncHotplugCondition(vmi, virtv1.VirtualMachineInstanceMemoryChange) // store additionalGuestMemoryOverheadRatio overheadRatio := c.clusterConfig.GetConfig().AdditionalGuestMemoryOverheadRatio if overheadRatio != nil { if vmi.Labels == nil { vmi.Labels = map[string]string{} } vmi.Labels[virtv1.MemoryHotplugOverheadRatioLabel] = *overheadRatio } } func (c *Controller) requireVolumesUpdate(vmi *virtv1.VirtualMachineInstance) bool { if len(vmi.Status.MigratedVolumes) < 1 { return false } if controller.NewVirtualMachineInstanceConditionManager().HasCondition(vmi, virtv1.VirtualMachineInstanceVolumesChange) { return false } migVolsMap := make(map[string]string) for _, v := range vmi.Status.MigratedVolumes { migVolsMap[v.SourcePVCInfo.ClaimName] = v.DestinationPVCInfo.ClaimName } for _, v := range vmi.Spec.Volumes { claim := storagetypes.PVCNameFromVirtVolume(&v) if claim == "" { continue } if _, ok := migVolsMap[claim]; !ok { return true } } return false } func (c *Controller) syncVolumesUpdate(vmi *virtv1.VirtualMachineInstance) { vmiConditions := controller.NewVirtualMachineInstanceConditionManager() condition := virtv1.VirtualMachineInstanceCondition{ Type: virtv1.VirtualMachineInstanceVolumesChange, LastTransitionTime: v1.Now(), Status: k8sv1.ConditionTrue, Message: "migrate volumes", } vmiConditions.UpdateCondition(vmi, &condition) } func (c *Controller) aggregateDataVolumesConditions(vmiCopy *virtv1.VirtualMachineInstance, dvs []*cdiv1.DataVolume) { if len(dvs) == 0 { return } dvsReadyCondition := virtv1.VirtualMachineInstanceCondition{ Status: k8sv1.ConditionTrue, Type: virtv1.VirtualMachineInstanceDataVolumesReady, Reason: virtv1.VirtualMachineInstanceReasonAllDVsReady, Message: "All of the VMI's DVs are bound and not running", } for _, dv := range dvs { cStatus := statusOfReadyCondition(dv.Status.Conditions) if cStatus != k8sv1.ConditionTrue { dvsReadyCondition.Reason = virtv1.VirtualMachineInstanceReasonNotAllDVsReady if cStatus == k8sv1.ConditionFalse { dvsReadyCondition.Status = cStatus } else if dvsReadyCondition.Status == k8sv1.ConditionTrue { dvsReadyCondition.Status = cStatus } } } if dvsReadyCondition.Status != k8sv1.ConditionTrue { dvsReadyCondition.Message = "Not all of the VMI's DVs are ready" } vmiConditions := controller.NewVirtualMachineInstanceConditionManager() vmiConditions.UpdateCondition(vmiCopy, &dvsReadyCondition) } func statusOfReadyCondition(conditions []cdiv1.DataVolumeCondition) k8sv1.ConditionStatus { for _, condition := range conditions { if condition.Type == cdiv1.DataVolumeReady { return condition.Status } } return k8sv1.ConditionUnknown }
package apply import ( "context" "fmt" "reflect" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" admissionregistrationv1beta1 "k8s.io/api/admissionregistration/v1beta1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" ) func (r *Reconciler) createOrUpdateValidatingWebhookConfigurations(caBundle []byte) error { for _, webhook := range r.targetStrategy.ValidatingWebhookConfigurations() { err := r.createOrUpdateValidatingWebhookConfiguration(webhook, caBundle) if err != nil { return err } } return nil } func convertV1ValidatingWebhookToV1beta1(from *admissionregistrationv1.ValidatingWebhookConfiguration) (*admissionregistrationv1beta1.ValidatingWebhookConfiguration, error) { var b []byte b, err := from.Marshal() if err != nil { return nil, err } webhookv1beta1 := &admissionregistrationv1beta1.ValidatingWebhookConfiguration{} if err = webhookv1beta1.Unmarshal(b); err != nil { return nil, err } return webhookv1beta1, nil } func convertV1beta1ValidatingWebhookToV1(from *admissionregistrationv1beta1.ValidatingWebhookConfiguration) (*admissionregistrationv1.ValidatingWebhookConfiguration, error) { var b []byte b, err := from.Marshal() if err != nil { return nil, err } webhookv1 := &admissionregistrationv1.ValidatingWebhookConfiguration{} if err = webhookv1.Unmarshal(b); err != nil { return nil, err } return webhookv1, nil } func convertV1MutatingWebhookToV1beta1(from *admissionregistrationv1.MutatingWebhookConfiguration) (*admissionregistrationv1beta1.MutatingWebhookConfiguration, error) { var b []byte b, err := from.Marshal() if err != nil { return nil, err } webhookv1beta1 := &admissionregistrationv1beta1.MutatingWebhookConfiguration{} if err = webhookv1beta1.Unmarshal(b); err != nil { return nil, err } return webhookv1beta1, nil } func convertV1beta1MutatingWebhookToV1(from *admissionregistrationv1beta1.MutatingWebhookConfiguration) (*admissionregistrationv1.MutatingWebhookConfiguration, error) { var b []byte b, err := from.Marshal() if err != nil { return nil, err } webhookv1 := &admissionregistrationv1.MutatingWebhookConfiguration{} if err = webhookv1.Unmarshal(b); err != nil { return nil, err } return webhookv1, nil } func (r *Reconciler) patchValidatingWebhookConfiguration(webhook *admissionregistrationv1.ValidatingWebhookConfiguration, patchBytes []byte) (patchedWebhook *admissionregistrationv1.ValidatingWebhookConfiguration, err error) { switch webhook.APIVersion { case admissionregistrationv1.SchemeGroupVersion.Version, admissionregistrationv1.SchemeGroupVersion.String(): patchedWebhook, err = r.clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Patch(context.Background(), webhook.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) case admissionregistrationv1beta1.SchemeGroupVersion.Version, admissionregistrationv1beta1.SchemeGroupVersion.String(): var out *admissionregistrationv1beta1.ValidatingWebhookConfiguration out, err = r.clientset.AdmissionregistrationV1beta1().ValidatingWebhookConfigurations().Patch(context.Background(), webhook.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return } patchedWebhook, err = convertV1beta1ValidatingWebhookToV1(out) default: err = fmt.Errorf("ValidatingWebhookConfiguration APIVersion %s not supported", webhook.APIVersion) } return } func (r *Reconciler) createValidatingWebhookConfiguration(webhook *admissionregistrationv1.ValidatingWebhookConfiguration) (createdWebhook *admissionregistrationv1.ValidatingWebhookConfiguration, err error) { switch webhook.APIVersion { case admissionregistrationv1.SchemeGroupVersion.Version, admissionregistrationv1.SchemeGroupVersion.String(): createdWebhook, err = r.clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Create(context.Background(), webhook, metav1.CreateOptions{}) case admissionregistrationv1beta1.SchemeGroupVersion.Version, admissionregistrationv1beta1.SchemeGroupVersion.String(): var webhookv1beta1 *admissionregistrationv1beta1.ValidatingWebhookConfiguration webhookv1beta1, err = convertV1ValidatingWebhookToV1beta1(webhook) if err != nil { return nil, err } webhookv1beta1, err = r.clientset.AdmissionregistrationV1beta1().ValidatingWebhookConfigurations().Create(context.Background(), webhookv1beta1, metav1.CreateOptions{}) if err != nil { return nil, err } createdWebhook, err = convertV1beta1ValidatingWebhookToV1(webhookv1beta1) default: err = fmt.Errorf("ValidatingWebhookConfiguration APIVersion %s not supported", webhook.APIVersion) } return } func (r *Reconciler) patchMutatingWebhookConfiguration(webhook *admissionregistrationv1.MutatingWebhookConfiguration, patchBytes []byte) (patchedWebhook *admissionregistrationv1.MutatingWebhookConfiguration, err error) { switch webhook.APIVersion { case admissionregistrationv1.SchemeGroupVersion.Version, admissionregistrationv1.SchemeGroupVersion.String(): patchedWebhook, err = r.clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Patch(context.Background(), webhook.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) case admissionregistrationv1beta1.SchemeGroupVersion.Version, admissionregistrationv1beta1.SchemeGroupVersion.String(): var out *admissionregistrationv1beta1.MutatingWebhookConfiguration out, err = r.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations().Patch(context.Background(), webhook.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return } patchedWebhook, err = convertV1beta1MutatingWebhookToV1(out) default: err = fmt.Errorf("MutatingWebhookConfiguration APIVersion %s not supported", webhook.APIVersion) } return } func (r *Reconciler) createMutatingWebhookConfiguration(webhook *admissionregistrationv1.MutatingWebhookConfiguration) (createdWebhook *admissionregistrationv1.MutatingWebhookConfiguration, err error) { switch webhook.APIVersion { case admissionregistrationv1.SchemeGroupVersion.Version, admissionregistrationv1.SchemeGroupVersion.String(): createdWebhook, err = r.clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Create(context.Background(), webhook, metav1.CreateOptions{}) case admissionregistrationv1beta1.SchemeGroupVersion.Version, admissionregistrationv1beta1.SchemeGroupVersion.String(): var webhookv1beta1 *admissionregistrationv1beta1.MutatingWebhookConfiguration webhookv1beta1, err = convertV1MutatingWebhookToV1beta1(webhook) if err != nil { return nil, err } webhookv1beta1, err = r.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations().Create(context.Background(), webhookv1beta1, metav1.CreateOptions{}) if err != nil { return nil, err } createdWebhook, err = convertV1beta1MutatingWebhookToV1(webhookv1beta1) default: err = fmt.Errorf("MutatingWebhookConfiguration APIVersion %s not supported", webhook.APIVersion) } return } func (r *Reconciler) createOrUpdateValidatingWebhookConfiguration(webhook *admissionregistrationv1.ValidatingWebhookConfiguration, caBundle []byte) error { version, imageRegistry, id := getTargetVersionRegistryID(r.kv) webhook = webhook.DeepCopy() for i := range webhook.Webhooks { webhook.Webhooks[i].ClientConfig.CABundle = caBundle } injectOperatorMetadata(r.kv, &webhook.ObjectMeta, version, imageRegistry, id, true) var cachedWebhook *admissionregistrationv1.ValidatingWebhookConfiguration var err error obj, exists, _ := r.stores.ValidationWebhookCache.Get(webhook) // since these objects was in the past unmanaged, reconcile and pick it up if it exists if !exists { cachedWebhook, err = r.clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), webhook.Name, metav1.GetOptions{}) if errors.IsNotFound(err) { exists = false } else if err != nil { return err } else { exists = true } } else { cachedWebhook = obj.(*admissionregistrationv1.ValidatingWebhookConfiguration) } certsMatch := true if exists { for _, wh := range cachedWebhook.Webhooks { if !reflect.DeepEqual(wh.ClientConfig.CABundle, caBundle) { certsMatch = false break } } } if !exists { r.expectations.ValidationWebhook.RaiseExpectations(r.kvKey, 1, 0) webhook, err := r.createValidatingWebhookConfiguration(webhook) if err != nil { r.expectations.ValidationWebhook.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create validatingwebhook %+v: %v", webhook, err) } SetGeneration(&r.kv.Status.Generations, webhook) return nil } modified := resourcemerge.BoolPtr(false) existingCopy := cachedWebhook.DeepCopy() expectedGeneration := GetExpectedGeneration(webhook, r.kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, webhook.ObjectMeta) // there was no change to metadata, the generation was right if !*modified && existingCopy.ObjectMeta.Generation == expectedGeneration && certsMatch { log.Log.V(4).Infof("validatingwebhookconfiguration %v is up-to-date", webhook.GetName()) return nil } // Patch if old version patchBytes, err := generateWebhooksPatch(cachedWebhook.ObjectMeta.Generation, webhook.ObjectMeta, webhook.Webhooks) if err != nil { return err } webhook, err = r.patchValidatingWebhookConfiguration(webhook, patchBytes) if err != nil { return fmt.Errorf("unable to update validatingwebhookconfiguration %+v: %v", webhook, err) } SetGeneration(&r.kv.Status.Generations, webhook) log.Log.V(2).Infof("validatingwebhoookconfiguration %v updated", webhook.Name) return nil } func (r *Reconciler) createOrUpdateMutatingWebhookConfigurations(caBundle []byte) error { for _, webhook := range r.targetStrategy.MutatingWebhookConfigurations() { err := r.createOrUpdateMutatingWebhookConfiguration(webhook, caBundle) if err != nil { return err } } return nil } func generateWebhooksPatch(generation int64, metaData metav1.ObjectMeta, webhooks interface{}) ([]byte, error) { patchSet := patch.New(patch.WithTest("/metadata/generation", generation)) patchSet.AddOption(createLabelsAndAnnotationsPatch(&metaData)...) patchSet.AddOption(patch.WithReplace("/webhooks", webhooks)) return patchSet.GeneratePayload() } func (r *Reconciler) createOrUpdateMutatingWebhookConfiguration(webhook *admissionregistrationv1.MutatingWebhookConfiguration, caBundle []byte) error { version, imageRegistry, id := getTargetVersionRegistryID(r.kv) webhook = webhook.DeepCopy() for i := range webhook.Webhooks { webhook.Webhooks[i].ClientConfig.CABundle = caBundle } injectOperatorMetadata(r.kv, &webhook.ObjectMeta, version, imageRegistry, id, true) var cachedWebhook *admissionregistrationv1.MutatingWebhookConfiguration var err error obj, exists, _ := r.stores.MutatingWebhookCache.Get(webhook) // since these objects was in the past unmanaged, reconcile and pick it up if it exists if !exists { cachedWebhook, err = r.clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Get(context.Background(), webhook.Name, metav1.GetOptions{}) if errors.IsNotFound(err) { exists = false } else if err != nil { return err } else { exists = true } } else { cachedWebhook = obj.(*admissionregistrationv1.MutatingWebhookConfiguration) } certsMatch := true if exists { for _, wh := range cachedWebhook.Webhooks { if !reflect.DeepEqual(wh.ClientConfig.CABundle, caBundle) { certsMatch = false break } } } if !exists { r.expectations.MutatingWebhook.RaiseExpectations(r.kvKey, 1, 0) webhook, err := r.createMutatingWebhookConfiguration(webhook) if err != nil { r.expectations.MutatingWebhook.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create mutatingwebhook %+v: %v", webhook, err) } SetGeneration(&r.kv.Status.Generations, webhook) log.Log.V(2).Infof("mutatingwebhoookconfiguration %v created", webhook.Name) return nil } modified := resourcemerge.BoolPtr(false) existingCopy := cachedWebhook.DeepCopy() expectedGeneration := GetExpectedGeneration(webhook, r.kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, webhook.ObjectMeta) // there was no change to metadata, the generation was right if !*modified && existingCopy.ObjectMeta.Generation == expectedGeneration && certsMatch { log.Log.V(4).Infof("mutating webhook configuration %v is up-to-date", webhook.GetName()) return nil } patchBytes, err := generateWebhooksPatch(cachedWebhook.ObjectMeta.Generation, webhook.ObjectMeta, webhook.Webhooks) if err != nil { return err } webhook, err = r.patchMutatingWebhookConfiguration(webhook, patchBytes) if err != nil { return fmt.Errorf("unable to update mutatingwebhookconfiguration %+v: %v", webhook, err) } SetGeneration(&r.kv.Status.Generations, webhook) log.Log.V(2).Infof("mutatingwebhoookconfiguration %v updated", webhook.Name) return nil } func (r *Reconciler) createOrUpdateValidatingAdmissionPolicyBindings() error { if !r.config.ValidatingAdmissionPolicyBindingEnabled { return nil } for _, validatingAdmissionPolicyBinding := range r.targetStrategy.ValidatingAdmissionPolicyBindings() { err := r.createOrUpdateValidatingAdmissionPolicyBinding(validatingAdmissionPolicyBinding.DeepCopy()) if err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateValidatingAdmissionPolicyBinding(validatingAdmissionPolicyBinding *admissionregistrationv1.ValidatingAdmissionPolicyBinding) error { admissionRegistrationV1 := r.clientset.AdmissionregistrationV1() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &validatingAdmissionPolicyBinding.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.ValidatingAdmissionPolicyBindingCache.Get(validatingAdmissionPolicyBinding) if !exists { r.expectations.ValidatingAdmissionPolicyBinding.RaiseExpectations(r.kvKey, 1, 0) _, err := admissionRegistrationV1.ValidatingAdmissionPolicyBindings().Create(context.Background(), validatingAdmissionPolicyBinding, metav1.CreateOptions{}) if err != nil { r.expectations.ValidatingAdmissionPolicyBinding.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create validatingAdmissionPolicyBinding %+v: %v", validatingAdmissionPolicyBinding, err) } return nil } cachedValidatingAdmissionPolicyBinding := obj.(*admissionregistrationv1.ValidatingAdmissionPolicyBinding) patchSet := patch.New() patchSet.AddOption(getObjectMetaPatch(validatingAdmissionPolicyBinding.ObjectMeta, cachedValidatingAdmissionPolicyBinding.ObjectMeta)...) if !equality.Semantic.DeepEqual(cachedValidatingAdmissionPolicyBinding.Spec, validatingAdmissionPolicyBinding.Spec) { patchSet.AddOption(patch.WithReplace("/spec", validatingAdmissionPolicyBinding.Spec)) } if patchSet.IsEmpty() { log.Log.V(4).Infof("validatingAdmissionPolicyBinding %v is up-to-date", validatingAdmissionPolicyBinding.GetName()) return nil } p, err := patchSet.GeneratePayload() if err != nil { return fmt.Errorf("unable to generate validatingAdmissionPolicyBinding patch operations for %+v: %v", validatingAdmissionPolicyBinding, err) } _, err = admissionRegistrationV1.ValidatingAdmissionPolicyBindings().Patch(context.Background(), validatingAdmissionPolicyBinding.Name, types.JSONPatchType, p, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch validatingAdmissionPolicyBinding %+v: %v", validatingAdmissionPolicyBinding, err) } log.Log.V(2).Infof("validatingAdmissionPolicyBinding %v patched", validatingAdmissionPolicyBinding.GetName()) return nil } func (r *Reconciler) createOrUpdateValidatingAdmissionPolicies() error { if !r.config.ValidatingAdmissionPolicyEnabled { return nil } for _, validatingAdmissionPolicy := range r.targetStrategy.ValidatingAdmissionPolicies() { err := r.createOrUpdateValidatingAdmissionPolicy(validatingAdmissionPolicy.DeepCopy()) if err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateValidatingAdmissionPolicy(validatingAdmissionPolicy *admissionregistrationv1.ValidatingAdmissionPolicy) error { admissionRegistrationV1 := r.clientset.AdmissionregistrationV1() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &validatingAdmissionPolicy.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.ValidatingAdmissionPolicyCache.Get(validatingAdmissionPolicy) if !exists { r.expectations.ValidatingAdmissionPolicy.RaiseExpectations(r.kvKey, 1, 0) _, err := admissionRegistrationV1.ValidatingAdmissionPolicies().Create(context.Background(), validatingAdmissionPolicy, metav1.CreateOptions{}) if err != nil { r.expectations.ValidatingAdmissionPolicy.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create validatingAdmissionPolicy %+v: %v", validatingAdmissionPolicy, err) } return nil } cachedValidatingAdmissionPolicy := obj.(*admissionregistrationv1.ValidatingAdmissionPolicy) patchSet := patch.New() patchSet.AddOption(getObjectMetaPatch(validatingAdmissionPolicy.ObjectMeta, cachedValidatingAdmissionPolicy.ObjectMeta)...) if !equality.Semantic.DeepEqual(cachedValidatingAdmissionPolicy.Spec, validatingAdmissionPolicy.Spec) { patchSet.AddOption(patch.WithReplace("/spec", validatingAdmissionPolicy.Spec)) } if patchSet.IsEmpty() { log.Log.V(4).Infof("validatingAdmissionPolicyBinding %v is up-to-date", validatingAdmissionPolicy.GetName()) return nil } p, err := patchSet.GeneratePayload() if err != nil { return fmt.Errorf("unable to generate validatingAdmissionPolicy patch operations for %+v: %v", validatingAdmissionPolicy, err) } _, err = admissionRegistrationV1.ValidatingAdmissionPolicies().Patch(context.Background(), validatingAdmissionPolicy.Name, types.JSONPatchType, p, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch validatingAdmissionPolicy %+v: %v", validatingAdmissionPolicy, err) } log.Log.V(2).Infof("validatingAdmissionPolicy %v patched", validatingAdmissionPolicy.GetName()) return nil }
package apply import ( "context" "fmt" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" apiregv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" ) func (r *Reconciler) createOrUpdateAPIServices(caBundle []byte) error { for _, apiService := range r.targetStrategy.APIServices() { err := r.createOrUpdateAPIService(apiService.DeepCopy(), caBundle) if err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateAPIService(apiService *apiregv1.APIService, caBundle []byte) error { version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &apiService.ObjectMeta, version, imageRegistry, id, true) apiService.Spec.CABundle = caBundle var cachedAPIService *apiregv1.APIService var err error obj, exists, _ := r.stores.APIServiceCache.Get(apiService) // since these objects was in the past unmanaged, reconcile and pick it up if it exists if !exists { cachedAPIService, err = r.aggregatorclient.Get(context.Background(), apiService.Name, metav1.GetOptions{}) if errors.IsNotFound(err) { exists = false } else if err != nil { return err } else { exists = true } } else { cachedAPIService = obj.(*apiregv1.APIService) } if !exists { r.expectations.APIService.RaiseExpectations(r.kvKey, 1, 0) _, err := r.aggregatorclient.Create(context.Background(), apiService, metav1.CreateOptions{}) if err != nil { r.expectations.APIService.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create apiservice %+v: %v", apiService, err) } return nil } modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &cachedAPIService.ObjectMeta, apiService.ObjectMeta) serviceSame := equality.Semantic.DeepEqual(cachedAPIService.Spec.Service, apiService.Spec.Service) certsSame := equality.Semantic.DeepEqual(apiService.Spec.CABundle, cachedAPIService.Spec.CABundle) prioritySame := cachedAPIService.Spec.VersionPriority == apiService.Spec.VersionPriority && cachedAPIService.Spec.GroupPriorityMinimum == apiService.Spec.GroupPriorityMinimum insecureSame := cachedAPIService.Spec.InsecureSkipTLSVerify == apiService.Spec.InsecureSkipTLSVerify // there was no change to metadata, the service and priorities were right if !*modified && serviceSame && prioritySame && insecureSame && certsSame { log.Log.V(4).Infof("apiservice %v is up-to-date", apiService.GetName()) return nil } patchBytes, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{}, &apiService.ObjectMeta, apiService.Spec)...).GeneratePayload() if err != nil { return err } _, err = r.aggregatorclient.Patch(context.Background(), apiService.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch apiservice %+v: %v", apiService, err) } log.Log.V(4).Infof("apiservice %v updated", apiService.GetName()) return nil }
package apply import ( "context" "fmt" jsonpatch "github.com/evanphx/json-patch" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" v1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/pointer" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" "kubevirt.io/kubevirt/pkg/virt-operator/util" ) const ( failedUpdateDaemonSetReason = "FailedUpdate" ) var ( daemonSetDefaultMaxUnavailable = intstr.FromInt(1) daemonSetFastMaxUnavailable = intstr.FromString("10%") ) type CanaryUpgradeStatus string const ( CanaryUpgradeStatusStarted CanaryUpgradeStatus = "started" CanaryUpgradeStatusUpgradingDaemonSet CanaryUpgradeStatus = "upgrading daemonset" CanaryUpgradeStatusWaitingDaemonSetRollout CanaryUpgradeStatus = "waiting for daemonset rollout" CanaryUpgradeStatusSuccessful CanaryUpgradeStatus = "successful" CanaryUpgradeStatusFailed CanaryUpgradeStatus = "failed" ) func (r *Reconciler) syncDeployment(origDeployment *appsv1.Deployment) (*appsv1.Deployment, error) { kv := r.kv deployment := origDeployment.DeepCopy() apps := r.clientset.AppsV1() imageTag, imageRegistry, id := getTargetVersionRegistryID(kv) injectOperatorMetadata(kv, &deployment.ObjectMeta, imageTag, imageRegistry, id, true) injectOperatorMetadata(kv, &deployment.Spec.Template.ObjectMeta, imageTag, imageRegistry, id, false) InjectPlacementMetadata(kv.Spec.Infra, &deployment.Spec.Template.Spec, RequireControlPlanePreferNonWorker) if kv.Spec.Infra != nil && kv.Spec.Infra.Replicas != nil { replicas := int32(*kv.Spec.Infra.Replicas) if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas != replicas { deployment.Spec.Replicas = &replicas r.recorder.Eventf(deployment, corev1.EventTypeWarning, "AdvancedFeatureUse", "applying custom number of infra replica. this is an advanced feature that prevents "+ "auto-scaling for core kubevirt components. Please use with caution!") } } else if deployment.Name == components.VirtAPIName && !replicasAlreadyPatched(r.kv.Spec.CustomizeComponents.Patches, components.VirtAPIName) { replicas, err := getDesiredApiReplicas(r.clientset) if err != nil { log.Log.Object(deployment).Warningf(err.Error()) } else { deployment.Spec.Replicas = pointer.P(replicas) } } obj, exists, _ := r.stores.DeploymentCache.Get(deployment) if !exists { r.expectations.Deployment.RaiseExpectations(r.kvKey, 1, 0) deployment, err := apps.Deployments(kv.Namespace).Create(context.Background(), deployment, metav1.CreateOptions{}) if err != nil { r.expectations.Deployment.LowerExpectations(r.kvKey, 1, 0) return nil, fmt.Errorf("unable to create deployment %+v: %v", deployment, err) } SetGeneration(&kv.Status.Generations, deployment) return deployment, nil } cachedDeployment := obj.(*appsv1.Deployment) modified := resourcemerge.BoolPtr(false) existingCopy := cachedDeployment.DeepCopy() expectedGeneration := GetExpectedGeneration(deployment, kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, deployment.ObjectMeta) // there was no change to metadata, the generation matched if !*modified && *existingCopy.Spec.Replicas == *deployment.Spec.Replicas && existingCopy.GetGeneration() == expectedGeneration { log.Log.V(4).Infof("deployment %v is up-to-date", deployment.GetName()) return deployment, nil } const revisionAnnotation = "deployment.kubernetes.io/revision" if val, ok := existingCopy.ObjectMeta.Annotations[revisionAnnotation]; ok { if deployment.ObjectMeta.Annotations == nil { deployment.ObjectMeta.Annotations = map[string]string{} } deployment.ObjectMeta.Annotations[revisionAnnotation] = val } ops, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{ patch.WithTest("/metadata/generation", cachedDeployment.ObjectMeta.Generation)}, &deployment.ObjectMeta, deployment.Spec)...).GeneratePayload() if err != nil { return nil, err } deployment, err = apps.Deployments(kv.Namespace).Patch(context.Background(), deployment.Name, types.JSONPatchType, ops, metav1.PatchOptions{}) if err != nil { return nil, fmt.Errorf("unable to update deployment %+v: %v", deployment, err) } SetGeneration(&kv.Status.Generations, deployment) log.Log.V(2).Infof("deployment %v updated", deployment.GetName()) return deployment, nil } func setMaxUnavailable(daemonSet *appsv1.DaemonSet, maxUnavailable intstr.IntOrString) { daemonSet.Spec.UpdateStrategy.RollingUpdate = &appsv1.RollingUpdateDaemonSet{ MaxUnavailable: &maxUnavailable, } } func generateDaemonSetPatch(oldDs, newDs *appsv1.DaemonSet) ([]byte, error) { return patch.New( getPatchWithObjectMetaAndSpec([]patch.PatchOption{ patch.WithTest("/metadata/generation", oldDs.ObjectMeta.Generation)}, &newDs.ObjectMeta, newDs.Spec)...).GeneratePayload() } func (r *Reconciler) patchDaemonSet(oldDs, newDs *appsv1.DaemonSet) (*appsv1.DaemonSet, error) { patch, err := generateDaemonSetPatch(oldDs, newDs) if err != nil { return nil, err } newDs, err = r.clientset.AppsV1().DaemonSets(r.kv.Namespace).Patch( context.Background(), newDs.Name, types.JSONPatchType, patch, metav1.PatchOptions{}) if err != nil { return nil, fmt.Errorf("unable to update daemonset %+v: %v", oldDs, err) } return newDs, nil } func (r *Reconciler) getCanaryPods(daemonSet *appsv1.DaemonSet) []*corev1.Pod { canaryPods := []*corev1.Pod{} for _, obj := range r.stores.InfrastructurePodCache.List() { pod := obj.(*corev1.Pod) owner := metav1.GetControllerOf(pod) if owner != nil && owner.Name == daemonSet.Name && util.PodIsUpToDate(pod, r.kv) { canaryPods = append(canaryPods, pod) } } return canaryPods } func (r *Reconciler) howManyUpdatedAndReadyPods(daemonSet *appsv1.DaemonSet) int32 { var updatedReadyPods int32 for _, obj := range r.stores.InfrastructurePodCache.List() { pod := obj.(*corev1.Pod) owner := metav1.GetControllerOf(pod) if owner != nil && owner.Name == daemonSet.Name && util.PodIsUpToDate(pod, r.kv) && util.PodIsReady(pod) { updatedReadyPods++ } } return updatedReadyPods } func daemonHasDefaultRolloutStrategy(daemonSet *appsv1.DaemonSet) bool { return getMaxUnavailable(daemonSet) == daemonSetDefaultMaxUnavailable.IntValue() } func (r *Reconciler) processCanaryUpgrade(cachedDaemonSet, newDS *appsv1.DaemonSet, forceUpdate bool) (bool, error, CanaryUpgradeStatus) { var updatedAndReadyPods int32 var status CanaryUpgradeStatus done := false isDaemonSetUpdated := util.DaemonSetIsUpToDate(r.kv, cachedDaemonSet) && !forceUpdate desiredReadyPods := cachedDaemonSet.Status.DesiredNumberScheduled if isDaemonSetUpdated { updatedAndReadyPods = r.howManyUpdatedAndReadyPods(cachedDaemonSet) } switch { case updatedAndReadyPods == 0: if !isDaemonSetUpdated { // start canary upgrade setMaxUnavailable(newDS, daemonSetDefaultMaxUnavailable) _, err := r.patchDaemonSet(cachedDaemonSet, newDS) if err != nil { return false, fmt.Errorf("unable to start canary upgrade for daemonset %+v: %v", newDS, err), CanaryUpgradeStatusFailed } } else { // check for a crashed canary pod canaryPods := r.getCanaryPods(cachedDaemonSet) for _, canary := range canaryPods { if canary != nil && util.PodIsCrashLooping(canary) { r.recorder.Eventf(cachedDaemonSet, corev1.EventTypeWarning, failedUpdateDaemonSetReason, "daemonSet %v rollout failed", cachedDaemonSet.Name) return false, fmt.Errorf("daemonSet %s rollout failed", cachedDaemonSet.Name), CanaryUpgradeStatusFailed } } } done, status = false, CanaryUpgradeStatusStarted case updatedAndReadyPods > 0 && updatedAndReadyPods < desiredReadyPods: if daemonHasDefaultRolloutStrategy(cachedDaemonSet) { // canary was ok, start real rollout setMaxUnavailable(newDS, daemonSetFastMaxUnavailable) // start rollout again _, err := r.patchDaemonSet(cachedDaemonSet, newDS) if err != nil { return false, fmt.Errorf("unable to update daemonset %+v: %v", newDS, err), CanaryUpgradeStatusFailed } log.Log.V(2).Infof("daemonSet %v updated", newDS.GetName()) status = CanaryUpgradeStatusUpgradingDaemonSet } else { log.Log.V(4).Infof("waiting for all pods of daemonSet %v to be ready", newDS.GetName()) status = CanaryUpgradeStatusWaitingDaemonSetRollout } done = false case updatedAndReadyPods > 0 && updatedAndReadyPods == desiredReadyPods: // rollout has completed and all virt-handlers are ready // revert maxUnavailable to default value setMaxUnavailable(newDS, daemonSetDefaultMaxUnavailable) newDS, err := r.patchDaemonSet(cachedDaemonSet, newDS) if err != nil { return false, err, CanaryUpgradeStatusFailed } SetGeneration(&r.kv.Status.Generations, newDS) log.Log.V(2).Infof("daemonSet %v is ready", newDS.GetName()) done, status = true, CanaryUpgradeStatusSuccessful } return done, nil, status } func getMaxUnavailable(daemonSet *appsv1.DaemonSet) int { update := daemonSet.Spec.UpdateStrategy.RollingUpdate if update == nil { return 0 } if update.MaxUnavailable != nil { return update.MaxUnavailable.IntValue() } return daemonSetDefaultMaxUnavailable.IntValue() } func (r *Reconciler) syncDaemonSet(daemonSet *appsv1.DaemonSet) (bool, error) { kv := r.kv daemonSet = daemonSet.DeepCopy() apps := r.clientset.AppsV1() imageTag, imageRegistry, id := getTargetVersionRegistryID(kv) injectOperatorMetadata(kv, &daemonSet.ObjectMeta, imageTag, imageRegistry, id, true) injectOperatorMetadata(kv, &daemonSet.Spec.Template.ObjectMeta, imageTag, imageRegistry, id, false) InjectPlacementMetadata(kv.Spec.Workloads, &daemonSet.Spec.Template.Spec, AnyNode) if daemonSet.GetName() == "virt-handler" { setMaxDevices(r.kv, daemonSet) } var cachedDaemonSet *appsv1.DaemonSet obj, exists, _ := r.stores.DaemonSetCache.Get(daemonSet) if !exists { r.expectations.DaemonSet.RaiseExpectations(r.kvKey, 1, 0) daemonSet, err := apps.DaemonSets(kv.Namespace).Create(context.Background(), daemonSet, metav1.CreateOptions{}) if err != nil { r.expectations.DaemonSet.LowerExpectations(r.kvKey, 1, 0) return false, fmt.Errorf("unable to create daemonset %+v: %v", daemonSet, err) } SetGeneration(&kv.Status.Generations, daemonSet) return true, nil } cachedDaemonSet = obj.(*appsv1.DaemonSet) modified := resourcemerge.BoolPtr(false) existingCopy := cachedDaemonSet.DeepCopy() expectedGeneration := GetExpectedGeneration(daemonSet, kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, daemonSet.ObjectMeta) // there was no change to metadata, the generation was right if !*modified && existingCopy.GetGeneration() == expectedGeneration { log.Log.V(4).Infof("daemonset %v is up-to-date", daemonSet.GetName()) return true, nil } // canary pod upgrade // first update virt-handler with maxUnavailable=1 // patch daemonSet with new version // wait for a new virt-handler to be ready // set maxUnavailable=10% // start the rollout of the new virt-handler again // wait for all nodes to complete the rollout // set maxUnavailable back to 1 done, err, _ := r.processCanaryUpgrade(cachedDaemonSet, daemonSet, *modified) return done, err } func setMaxDevices(kv *v1.KubeVirt, vh *appsv1.DaemonSet) { if kv.Spec.Configuration.VirtualMachineInstancesPerNode == nil { return } vh.Spec.Template.Spec.Containers[0].Command = append(vh.Spec.Template.Spec.Containers[0].Command, "--max-devices", fmt.Sprintf("%d", *kv.Spec.Configuration.VirtualMachineInstancesPerNode)) } func (r *Reconciler) syncPodDisruptionBudgetForDeployment(deployment *appsv1.Deployment) error { kv := r.kv podDisruptionBudget := components.NewPodDisruptionBudgetForDeployment(deployment) imageTag, imageRegistry, id := getTargetVersionRegistryID(kv) injectOperatorMetadata(kv, &podDisruptionBudget.ObjectMeta, imageTag, imageRegistry, id, true) pdbClient := r.clientset.PolicyV1().PodDisruptionBudgets(deployment.Namespace) var cachedPodDisruptionBudget *policyv1.PodDisruptionBudget obj, exists, _ := r.stores.PodDisruptionBudgetCache.Get(podDisruptionBudget) if podDisruptionBudget.Spec.MinAvailable.IntValue() == 0 { var err error if exists { err = pdbClient.Delete(context.Background(), podDisruptionBudget.Name, metav1.DeleteOptions{}) } return err } if !exists { r.expectations.PodDisruptionBudget.RaiseExpectations(r.kvKey, 1, 0) podDisruptionBudget, err := pdbClient.Create(context.Background(), podDisruptionBudget, metav1.CreateOptions{}) if err != nil { r.expectations.PodDisruptionBudget.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create poddisruptionbudget %+v: %v", podDisruptionBudget, err) } log.Log.V(2).Infof("poddisruptionbudget %v created", podDisruptionBudget.GetName()) SetGeneration(&kv.Status.Generations, podDisruptionBudget) return nil } cachedPodDisruptionBudget = obj.(*policyv1.PodDisruptionBudget) modified := resourcemerge.BoolPtr(false) existingCopy := cachedPodDisruptionBudget.DeepCopy() expectedGeneration := GetExpectedGeneration(podDisruptionBudget, kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, podDisruptionBudget.ObjectMeta) // there was no change to metadata or minAvailable, the generation was right if !*modified && existingCopy.Spec.MinAvailable.IntValue() == podDisruptionBudget.Spec.MinAvailable.IntValue() && existingCopy.ObjectMeta.Generation == expectedGeneration { log.Log.V(4).Infof("poddisruptionbudget %v is up-to-date", cachedPodDisruptionBudget.GetName()) return nil } patchBytes, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{}, &podDisruptionBudget.ObjectMeta, podDisruptionBudget.Spec)...).GeneratePayload() if err != nil { return err } podDisruptionBudget, err = pdbClient.Patch(context.Background(), podDisruptionBudget.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch/delete poddisruptionbudget %+v: %v", podDisruptionBudget, err) } SetGeneration(&kv.Status.Generations, podDisruptionBudget) log.Log.V(2).Infof("poddisruptionbudget %v patched", podDisruptionBudget.GetName()) return nil } func getDesiredApiReplicas(clientset kubecli.KubevirtClient) (replicas int32, err error) { nodeList, err := clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { return 0, fmt.Errorf("failed to get number of nodes to determine virt-api replicas: %v", err) } nodesCount := len(nodeList.Items) // This is a simple heuristic to achieve basic scalability so we could be running on large clusters. // From recent experiments we know that for a 100 nodes cluster, 9 virt-api replicas are enough. // This heuristic is not accurate. It could, and should, be replaced by something more sophisticated and refined // in the future. if nodesCount == 1 { return 1, nil } const minReplicas = 2 replicas = int32(nodesCount) / 10 if replicas < minReplicas { replicas = minReplicas } return replicas, nil } func replicasAlreadyPatched(patches []v1.CustomizeComponentsPatch, deploymentName string) bool { for _, patch := range patches { if patch.ResourceName != deploymentName { continue } decodedPatch, err := jsonpatch.DecodePatch([]byte(patch.Patch)) if err != nil { log.Log.Warningf(err.Error()) continue } for _, operation := range decodedPatch { path, err := operation.Path() if err != nil { log.Log.Warningf(err.Error()) continue } op := operation.Kind() if path == "/spec/replicas" && op == "replace" { return true } } } return false }
package apply import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sv1 "kubevirt.io/api/core/v1" ) func GetCADuration(config *k8sv1.KubeVirtSelfSignConfiguration) *metav1.Duration { defaultDuration := &metav1.Duration{Duration: Duration7d} if config == nil { return defaultDuration } // deprecated, but takes priority to provide a smooth upgrade path if config.CARotateInterval != nil { return config.CARotateInterval } if config.CA != nil && config.CA.Duration != nil { return config.CA.Duration } return defaultDuration } func GetCARenewBefore(config *k8sv1.KubeVirtSelfSignConfiguration) *metav1.Duration { caDuration := GetCADuration(config) defaultDuration := &metav1.Duration{Duration: time.Duration(float64(caDuration.Duration) * 0.2)} if config == nil { return defaultDuration } // deprecated, but takes priority to provide a smooth upgrade path if config.CAOverlapInterval != nil { return config.CAOverlapInterval } if config.CA != nil && config.CA.RenewBefore != nil { return config.CA.RenewBefore } return defaultDuration } func GetCertDuration(config *k8sv1.KubeVirtSelfSignConfiguration) *metav1.Duration { defaultDuration := &metav1.Duration{Duration: Duration1d} if config == nil { return defaultDuration } // deprecated, but takes priority to provide a smooth upgrade path if config.CertRotateInterval != nil { return config.CertRotateInterval } if config.Server != nil && config.Server.Duration != nil { return config.Server.Duration } return defaultDuration } func GetCertRenewBefore(config *k8sv1.KubeVirtSelfSignConfiguration) *metav1.Duration { certDuration := GetCertDuration(config) defaultDuration := &metav1.Duration{Duration: time.Duration(float64(certDuration.Duration) * 0.2)} if config == nil { return defaultDuration } if config.Server != nil && config.Server.RenewBefore != nil { return config.Server.RenewBefore } return defaultDuration }
package apply import ( "context" "crypto/tls" "encoding/json" "fmt" "time" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" typedv1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/util/workqueue" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/certificates/triple/cert" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" "kubevirt.io/kubevirt/pkg/virt-operator/util" ) func (r *Reconciler) syncKubevirtNamespaceLabels() error { targetNamespace := r.kv.ObjectMeta.Namespace obj, exists, err := r.stores.NamespaceCache.GetByKey(targetNamespace) if err != nil { log.Log.Errorf("Failed to retrieve kubevirt namespace from store. Error: %s", err.Error()) return err } if !exists { return fmt.Errorf("Could not find namespace in store. Namespace key: %s", targetNamespace) } cachedNamespace := obj.(*corev1.Namespace) // Prepare namespace metadata patch targetLabels := map[string]string{ "openshift.io/cluster-monitoring": "true", } cachedLabels := cachedNamespace.ObjectMeta.Labels labelsToPatch := make(map[string]string) for targetLabelKey, targetLabelValue := range targetLabels { cachedLabelValue, ok := cachedLabels[targetLabelKey] if ok && cachedLabelValue == targetLabelValue { continue } labelsToPatch[targetLabelKey] = targetLabelValue } if len(labelsToPatch) == 0 { log.Log.Infof("Kubevirt namespace (%s) labels are in sync", targetNamespace) return nil } labelsPatch, err := json.Marshal(labelsToPatch) if err != nil { log.Log.Errorf("Failed to marshal namespace labels: %s", err.Error()) return err } log.Log.Infof("Patching namespace %s with %s", targetNamespace, labelsPatch) _, err = r.clientset.CoreV1().Namespaces().Patch(context.Background(), targetNamespace, types.MergePatchType, []byte(fmt.Sprintf(`{"metadata":{"labels": %s}}`, labelsPatch)), metav1.PatchOptions{}, ) if err != nil { log.Log.Errorf("Could not patch kubevirt namespace labels: %s", err.Error()) return err } log.Log.Infof("kubevirt namespace labels patched") return nil } func (r *Reconciler) createOrUpdateServices() (bool, error) { for _, service := range r.targetStrategy.Services() { pending, err := r.createOrUpdateService(service.DeepCopy()) if pending || err != nil { return pending, err } } return false, nil } func (r *Reconciler) createOrUpdateService(service *corev1.Service) (bool, error) { core := r.clientset.CoreV1() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &service.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.ServiceCache.Get(service) if !exists { r.expectations.Service.RaiseExpectations(r.kvKey, 1, 0) _, err := core.Services(service.Namespace).Create(context.Background(), service, metav1.CreateOptions{}) if err != nil { r.expectations.Service.LowerExpectations(r.kvKey, 1, 0) return false, fmt.Errorf("unable to create service %+v: %v", service, err) } return false, nil } cachedService := obj.(*corev1.Service) deleteAndReplace := hasImmutableFieldChanged(service, cachedService) if deleteAndReplace { err := deleteService(cachedService, r.kvKey, r.expectations, core) if err != nil { return false, err } // waiting for old service to be deleted, // after which the operator will recreate using new spec return true, nil } patchBytes, err := generateServicePatch(cachedService, service) if err != nil { return false, fmt.Errorf("unable to generate service endpoint patch operations for %+v: %v", service, err) } if len(patchBytes) == 0 { log.Log.V(4).Infof("service %v is up-to-date", service.GetName()) return false, nil } _, err = core.Services(service.Namespace).Patch(context.Background(), service.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return false, fmt.Errorf("unable to patch service %+v: %v", service, err) } log.Log.V(2).Infof("service %v patched", service.GetName()) return false, nil } func (r *Reconciler) getSecret(secret *corev1.Secret) (*corev1.Secret, bool, error) { obj, exists, _ := r.stores.SecretCache.Get(secret) if exists { return obj.(*corev1.Secret), exists, nil } cachedSecret, err := r.clientset.CoreV1().Secrets(secret.Namespace).Get(context.Background(), secret.Name, metav1.GetOptions{}) if err != nil { if errors.IsNotFound(err) { return nil, false, nil } return nil, false, err } return cachedSecret, true, nil } func certificationNeedsRotation(secret *corev1.Secret, duration *metav1.Duration, ca *tls.Certificate, renewBefore *metav1.Duration, caRenewBefore *metav1.Duration) bool { crt, err := components.LoadCertificates(secret) if err != nil { log.DefaultLogger().Reason(err).Infof("Failed to load certificate from secret %s, will rotate it.", secret.Name) return true } if secret.Annotations["kubevirt.io/duration"] != duration.String() { return true } rotationTime := components.NextRotationDeadline(crt, ca, renewBefore, caRenewBefore) // We update the certificate if it has passed its renewal timeout if rotationTime.Before(time.Now()) { return true } return false } func deleteService(service *corev1.Service, kvKey string, expectations *util.Expectations, core typedv1.CoreV1Interface) error { if service.DeletionTimestamp != nil { return nil } key, err := controller.KeyFunc(service) if err != nil { return err } gracePeriod := int64(0) deleteOptions := metav1.DeleteOptions{ GracePeriodSeconds: &gracePeriod, } expectations.Service.AddExpectedDeletion(kvKey, key) err = core.Services(service.Namespace).Delete(context.Background(), service.Name, deleteOptions) if err != nil { expectations.Service.DeletionObserved(kvKey, key) log.Log.Errorf("Failed to delete service %+v: %v", service, err) return err } log.Log.V(2).Infof("service %v deleted. It must be re-created", service.GetName()) return nil } func (r *Reconciler) createOrUpdateCertificateSecret(queue workqueue.TypedRateLimitingInterface[string], ca *tls.Certificate, secret *corev1.Secret, duration *metav1.Duration, renewBefore *metav1.Duration, caRenewBefore *metav1.Duration) (*tls.Certificate, error) { var cachedSecret *corev1.Secret var err error secret = secret.DeepCopy() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &secret.ObjectMeta, version, imageRegistry, id, true) log.DefaultLogger().V(4).Infof("checking certificate %v", secret.Name) cachedSecret, exists, err := r.getSecret(secret) if err != nil { return nil, err } rotateCertificate := false if exists { rotateCertificate = certificationNeedsRotation(cachedSecret, duration, ca, renewBefore, caRenewBefore) } // populate the secret with correct certificate if !exists || rotateCertificate { if err := components.PopulateSecretWithCertificate(secret, ca, duration); err != nil { return nil, err } } else { secret.Data = cachedSecret.Data } crt, err := components.LoadCertificates(secret) if err != nil { log.DefaultLogger().Reason(err).Infof("Failed to load certificate from secret %s.", secret.Name) return nil, err } // we need to ensure that we revisit certificates before they expire wakeupDeadline := components.NextRotationDeadline(crt, ca, renewBefore, caRenewBefore).Sub(time.Now()) queue.AddAfter(r.kvKey, wakeupDeadline) if !exists { r.expectations.Secrets.RaiseExpectations(r.kvKey, 1, 0) _, err := r.clientset.CoreV1().Secrets(secret.Namespace).Create(context.Background(), secret, metav1.CreateOptions{}) if err != nil { r.expectations.Secrets.LowerExpectations(r.kvKey, 1, 0) return nil, fmt.Errorf("unable to create secret %+v: %v", secret, err) } return crt, nil } modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &cachedSecret.ObjectMeta, secret.ObjectMeta) if !*modified && !rotateCertificate { log.Log.V(4).Infof("secret %v is up-to-date", secret.GetName()) return crt, nil } patchBytes, err := createSecretPatch(secret) if err != nil { return nil, err } _, err = r.clientset.CoreV1().Secrets(secret.Namespace).Patch(context.Background(), secret.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return nil, fmt.Errorf("unable to patch secret %+v: %v", secret, err) } log.Log.V(2).Infof("secret %v updated", secret.GetName()) return crt, nil } func createSecretPatch(secret *corev1.Secret) ([]byte, error) { // Add Labels and Annotations Patches ops := createLabelsAndAnnotationsPatch(&secret.ObjectMeta) ops = append(ops, patch.WithReplace("/data", secret.Data)) return patch.New(ops...).GeneratePayload() } func (r *Reconciler) createOrUpdateCertificateSecrets(queue workqueue.TypedRateLimitingInterface[string], caCert *tls.Certificate, duration *metav1.Duration, renewBefore *metav1.Duration, caRenewBefore *metav1.Duration) error { for _, secret := range r.targetStrategy.CertificateSecrets() { // The CA certificate needs to be handled separately and before other secrets, and ignore export CA if secret.Name == components.KubeVirtCASecretName || secret.Name == components.KubeVirtExportCASecretName { continue } _, err := r.createOrUpdateCertificateSecret(queue, caCert, secret, duration, renewBefore, caRenewBefore) if err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateComponentsWithCertificates(queue workqueue.TypedRateLimitingInterface[string]) error { caDuration := GetCADuration(r.kv.Spec.CertificateRotationStrategy.SelfSigned) caExportDuration := GetCADuration(r.kv.Spec.CertificateRotationStrategy.SelfSigned) caRenewBefore := GetCARenewBefore(r.kv.Spec.CertificateRotationStrategy.SelfSigned) certDuration := GetCertDuration(r.kv.Spec.CertificateRotationStrategy.SelfSigned) certRenewBefore := GetCertRenewBefore(r.kv.Spec.CertificateRotationStrategy.SelfSigned) caExportRenewBefore := GetCertRenewBefore(r.kv.Spec.CertificateRotationStrategy.SelfSigned) // create/update CA Certificate secret caCert, err := r.createOrUpdateCACertificateSecret(queue, components.KubeVirtCASecretName, caDuration, caRenewBefore) if err != nil { return err } // create/update CA Certificate secret caExportCert, err := r.createOrUpdateCACertificateSecret(queue, components.KubeVirtExportCASecretName, caExportDuration, caExportRenewBefore) if err != nil { return err } // create/update CA config map caBundle, err := r.createOrUpdateKubeVirtCAConfigMap(queue, caCert, caRenewBefore, findRequiredCAConfigMap(components.KubeVirtCASecretName, r.targetStrategy.ConfigMaps())) if err != nil { return err } // create/update export CA config map _, err = r.createOrUpdateKubeVirtCAConfigMap(queue, caExportCert, caExportRenewBefore, findRequiredCAConfigMap(components.KubeVirtExportCASecretName, r.targetStrategy.ConfigMaps())) if err != nil { return err } // create/update ValidatingWebhookConfiguration err = r.createOrUpdateValidatingWebhookConfigurations(caBundle) if err != nil { return err } // create/update MutatingWebhookConfiguration err = r.createOrUpdateMutatingWebhookConfigurations(caBundle) if err != nil { return err } // create/update APIServices err = r.createOrUpdateAPIServices(caBundle) if err != nil { return err } // create/update Routes err = r.createOrUpdateRoutes(caBundle) if err != nil { return err } // create/update Certificate secrets err = r.createOrUpdateCertificateSecrets(queue, caCert, certDuration, certRenewBefore, caRenewBefore) if err != nil { return err } return nil } func shouldEnforceClusterIP(desired, current string) bool { if desired == "" { return false } return desired != current } func getObjectMetaPatch(desired, current metav1.ObjectMeta) []patch.PatchOption { modified := resourcemerge.BoolPtr(false) existingCopy := current.DeepCopy() resourcemerge.EnsureObjectMeta(modified, existingCopy, desired) if *modified { // labels and/or annotations modified add patch return createLabelsAndAnnotationsPatch(&desired) } return nil } func hasImmutableFieldChanged(service, cachedService *corev1.Service) bool { deleteAndReplace := false typeSame := isServiceClusterIP(cachedService) && isServiceClusterIP(service) if !typeSame || shouldEnforceClusterIP(service.Spec.ClusterIP, cachedService.Spec.ClusterIP) { deleteAndReplace = true } return deleteAndReplace } func generateServicePatch( cachedService *corev1.Service, service *corev1.Service) ([]byte, error) { patchOps := getObjectMetaPatch(service.ObjectMeta, cachedService.ObjectMeta) // set these values in the case they are empty service.Spec.ClusterIP = cachedService.Spec.ClusterIP service.Spec.Type = cachedService.Spec.Type if service.Spec.SessionAffinity == "" { service.Spec.SessionAffinity = cachedService.Spec.SessionAffinity } // If the Specs don't equal each other, replace it if !equality.Semantic.DeepEqual(cachedService.Spec, service.Spec) { patchOps = append(patchOps, patch.WithReplace("/spec", service.Spec)) } patchset := patch.New(patchOps...) if patchset.IsEmpty() { return nil, nil } return patchset.GeneratePayload() } func (r *Reconciler) createOrUpdateServiceAccount(sa *corev1.ServiceAccount) error { core := r.clientset.CoreV1() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &sa.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.ServiceAccountCache.Get(sa) if !exists { // Create non existent r.expectations.ServiceAccount.RaiseExpectations(r.kvKey, 1, 0) _, err := core.ServiceAccounts(r.kv.Namespace).Create(context.Background(), sa, metav1.CreateOptions{}) if err != nil { r.expectations.ServiceAccount.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create serviceaccount %+v: %v", sa, err) } log.Log.V(2).Infof("serviceaccount %v created", sa.GetName()) return nil } cachedSa := obj.(*corev1.ServiceAccount) modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &cachedSa.ObjectMeta, sa.ObjectMeta) // there was no change to metadata if !*modified { // Up to date log.Log.V(4).Infof("serviceaccount %v already exists and is up-to-date", sa.GetName()) return nil } // Patch Labels and Annotations labelAnnotationPatch, err := patch.New(createLabelsAndAnnotationsPatch(&sa.ObjectMeta)...).GeneratePayload() if err != nil { return err } _, err = core.ServiceAccounts(r.kv.Namespace).Patch(context.Background(), sa.Name, types.JSONPatchType, labelAnnotationPatch, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch serviceaccount %+v: %v", sa, err) } log.Log.V(2).Infof("serviceaccount %v updated", sa.GetName()) return nil } func (r *Reconciler) createOrUpdateRbac() error { version, imageRegistry, id := getTargetVersionRegistryID(r.kv) // create/update ServiceAccounts for _, sa := range r.targetStrategy.ServiceAccounts() { if err := r.createOrUpdateServiceAccount(sa.DeepCopy()); err != nil { return err } } // create/update ClusterRoles for _, cr := range r.targetStrategy.ClusterRoles() { err := r.createOrUpdateClusterRole(cr, version, imageRegistry, id) if err != nil { return err } } // create/update ClusterRoleBindings for _, crb := range r.targetStrategy.ClusterRoleBindings() { err := r.createOrUpdateClusterRoleBinding(crb, version, imageRegistry, id) if err != nil { return err } } // create/update Roles for _, role := range r.targetStrategy.Roles() { err := r.createOrUpdateRole(role, version, imageRegistry, id) if err != nil { return err } } // create/update RoleBindings for _, rb := range r.targetStrategy.RoleBindings() { err := r.createOrUpdateRoleBinding(rb, version, imageRegistry, id) if err != nil { return err } } return nil } func findRequiredCAConfigMap(name string, configmaps []*corev1.ConfigMap) *corev1.ConfigMap { for _, cm := range configmaps { if cm.Name != name { continue } return cm.DeepCopy() } return nil } func shouldUpdateBundle(required, existing *corev1.ConfigMap, key string, queue workqueue.TypedRateLimitingInterface[string], caCert *tls.Certificate, overlapInterval *metav1.Duration) (bool, error) { bundle, certCount, err := components.MergeCABundle(caCert, []byte(existing.Data[components.CABundleKey]), overlapInterval.Duration) if err != nil { // the only error that can be returned form MergeCABundle is if the CA caBundle // is unable to be parsed. If we can not parse it we should update it return true, err } // ensure that we remove the old CA after the overlap period if certCount > 1 { queue.AddAfter(key, overlapInterval.Duration) } updateBundle := false required.Data = map[string]string{components.CABundleKey: string(bundle)} if !equality.Semantic.DeepEqual(required.Data, existing.Data) { updateBundle = true } return updateBundle, nil } func (r *Reconciler) createOrUpdateKubeVirtCAConfigMap(queue workqueue.TypedRateLimitingInterface[string], caCert *tls.Certificate, overlapInterval *metav1.Duration, configMap *corev1.ConfigMap) (caBundle []byte, err error) { if configMap == nil { return nil, nil } log.DefaultLogger().V(4).Infof("checking ca config map %v", configMap.Name) version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &configMap.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.ConfigMapCache.Get(configMap) if !exists { configMap.Data = map[string]string{components.CABundleKey: string(cert.EncodeCertPEM(caCert.Leaf))} r.expectations.ConfigMap.RaiseExpectations(r.kvKey, 1, 0) _, err := r.clientset.CoreV1().ConfigMaps(configMap.Namespace).Create(context.Background(), configMap, metav1.CreateOptions{}) if err != nil { r.expectations.ConfigMap.LowerExpectations(r.kvKey, 1, 0) return nil, fmt.Errorf("unable to create configMap %+v: %v", configMap, err) } return []byte(configMap.Data[components.CABundleKey]), nil } existing := obj.(*corev1.ConfigMap) updateBundle, err := shouldUpdateBundle(configMap, existing, r.kvKey, queue, caCert, overlapInterval) if err != nil { if !updateBundle { return nil, err } configMap.Data = map[string]string{components.CABundleKey: string(cert.EncodeCertPEM(caCert.Leaf))} log.Log.Reason(err).V(2).Infof("There was an error validating the CA bundle stored in configmap %s. We are updating the bundle.", configMap.GetName()) } modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &existing.DeepCopy().ObjectMeta, configMap.ObjectMeta) if !*modified && !updateBundle { log.Log.V(4).Infof("configMap %v is up-to-date", configMap.GetName()) return []byte(configMap.Data[components.CABundleKey]), nil } patchBytes, err := createConfigMapPatch(configMap) if err != nil { return nil, err } _, err = r.clientset.CoreV1().ConfigMaps(configMap.Namespace).Patch(context.Background(), configMap.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return nil, fmt.Errorf("unable to patch configMap %+v: %v", configMap, err) } log.Log.V(2).Infof("configMap %v updated", configMap.GetName()) return []byte(configMap.Data[components.CABundleKey]), nil } func createConfigMapPatch(configMap *corev1.ConfigMap) ([]byte, error) { // Add Labels and Annotations Patches ops := createLabelsAndAnnotationsPatch(&configMap.ObjectMeta) ops = append(ops, patch.WithReplace("/data", configMap.Data)) return patch.New(ops...).GeneratePayload() } func (r *Reconciler) createOrUpdateCACertificateSecret(queue workqueue.TypedRateLimitingInterface[string], name string, duration *metav1.Duration, renewBefore *metav1.Duration) (caCert *tls.Certificate, err error) { for _, secret := range r.targetStrategy.CertificateSecrets() { // Only work on the ca secrets if secret.Name != name { continue } cert, err := r.createOrUpdateCertificateSecret(queue, nil, secret, duration, renewBefore, nil) if err != nil { return nil, err } caCert = cert } return caCert, nil }
package apply import ( "context" "fmt" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" ) func getSubresourcesForVersion(crd *extv1.CustomResourceDefinition, version string) *extv1.CustomResourceSubresources { for _, v := range crd.Spec.Versions { if version == v.Name { return v.Subresources } } return nil } func needsSubresourceStatusEnable(crd, cachedCrd *extv1.CustomResourceDefinition) bool { for _, version := range crd.Spec.Versions { if version.Subresources != nil && version.Subresources.Status != nil { subresource := getSubresourcesForVersion(cachedCrd, version.Name) if subresource == nil || subresource.Status == nil { return true } } } return false } func needsSubresourceStatusDisable(crdTargetVersion *extv1.CustomResourceDefinitionVersion, cachedCrd *extv1.CustomResourceDefinition) bool { // subresource support needs to be introduced carefully after the control plane roll-over // to avoid creating zombie entities which don't get processed due to ignored status updates cachedSubresource := getSubresourcesForVersion(cachedCrd, crdTargetVersion.Name) return (cachedSubresource == nil || cachedSubresource.Status == nil) && (crdTargetVersion.Subresources != nil && crdTargetVersion.Subresources.Status != nil) } func patchCRD(client clientset.Interface, crd *extv1.CustomResourceDefinition, ops []patch.PatchOption) (*extv1.CustomResourceDefinition, error) { name := crd.GetName() ops = append(ops, patch.WithReplace("/spec", crd.Spec)) patchBytes, err := patch.New(ops...).GeneratePayload() if err != nil { return nil, err } crd, err = client.ApiextensionsV1().CustomResourceDefinitions().Patch(context.Background(), name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return nil, fmt.Errorf("unable to patch crd %+v: %v", crd, err) } log.Log.V(2).Infof("crd %v updated", name) return crd, nil } func (r *Reconciler) createOrUpdateCrds() error { for _, crd := range r.targetStrategy.CRDs() { err := r.createOrUpdateCrd(crd) if err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateCrd(crd *extv1.CustomResourceDefinition) error { client := r.clientset.ExtensionsClient() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) var cachedCrd *extv1.CustomResourceDefinition crd = crd.DeepCopy() injectOperatorMetadata(r.kv, &crd.ObjectMeta, version, imageRegistry, id, true) obj, exists, _ := r.stores.OperatorCrdCache.Get(crd) if !exists { // Create non existent r.expectations.OperatorCrd.RaiseExpectations(r.kvKey, 1, 0) createdCRD, err := client.ApiextensionsV1().CustomResourceDefinitions().Create(context.Background(), crd, metav1.CreateOptions{}) if err != nil { r.expectations.OperatorCrd.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create crd %+v: %v", crd, err) } SetGeneration(&r.kv.Status.Generations, createdCRD) log.Log.V(2).Infof("crd %v created", crd.GetName()) return nil } cachedCrd = obj.(*extv1.CustomResourceDefinition) modified := resourcemerge.BoolPtr(false) expectedGeneration := GetExpectedGeneration(crd, r.kv.Status.Generations) resourcemerge.EnsureObjectMeta(modified, &cachedCrd.ObjectMeta, crd.ObjectMeta) // there was no change to metadata, the generation was right if !*modified && cachedCrd.GetGeneration() == expectedGeneration { log.Log.V(4).Infof("crd %v is up-to-date", crd.GetName()) return nil } // Patch if old version for i := range crd.Spec.Versions { if needsSubresourceStatusDisable(&crd.Spec.Versions[i], cachedCrd) { crd.Spec.Versions[i].Subresources.Status = nil } } // Add Labels and Annotations Patches crd, err := patchCRD(client, crd, createLabelsAndAnnotationsPatch(&crd.ObjectMeta)) if err != nil { return err } SetGeneration(&r.kv.Status.Generations, crd) return nil } func (r *Reconciler) rolloutNonCompatibleCRDChanges() error { for _, crd := range r.targetStrategy.CRDs() { err := r.rolloutNonCompatibleCRDChange(crd) if err != nil { return err } } return nil } func (r *Reconciler) rolloutNonCompatibleCRDChange(crd *extv1.CustomResourceDefinition) error { client := r.clientset.ExtensionsClient() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) var cachedCrd *extv1.CustomResourceDefinition crd = crd.DeepCopy() obj, exists, err := r.stores.OperatorCrdCache.Get(crd) if !exists { return err } cachedCrd = obj.(*extv1.CustomResourceDefinition) injectOperatorMetadata(r.kv, &crd.ObjectMeta, version, imageRegistry, id, true) if objectMatchesVersion(&cachedCrd.ObjectMeta, version, imageRegistry, id, r.kv.GetGeneration()) { // Patch if in the deployed version the subresource is not enabled if !needsSubresourceStatusEnable(crd, cachedCrd) { return nil } // enable the status subresources now, in case that they were disabled before if _, err := patchCRD(client, crd, []patch.PatchOption{}); err != nil { return err } return nil } log.Log.V(4).Infof("crd %v is up-to-date", crd.GetName()) return nil }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2019 Red Hat, Inc. * */ package apply import ( "context" "encoding/json" "fmt" "strings" routev1 "github.com/openshift/api/route/v1" secv1 "github.com/openshift/api/security/v1" promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" apiregv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" v1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/install" "kubevirt.io/kubevirt/pkg/virt-operator/util" ) const ( castFailedFmt = "Cast failed! obj: %+v" deleteFailedFmt = "Failed to delete %s: %v" ) func deleteDummyWebhookValidators(kv *v1.KubeVirt, clientset kubecli.KubevirtClient, stores util.Stores, expectations *util.Expectations) error { kvkey, err := controller.KeyFunc(kv) if err != nil { return err } gracePeriod := int64(0) deleteOptions := metav1.DeleteOptions{ GracePeriodSeconds: &gracePeriod, } objects := stores.ValidationWebhookCache.List() for _, obj := range objects { if webhook, ok := obj.(*admissionregistrationv1.ValidatingWebhookConfiguration); ok { if !strings.HasPrefix(webhook.Name, "virt-operator-tmp-webhook") { continue } if webhook.DeletionTimestamp != nil { continue } if key, err := controller.KeyFunc(webhook); err == nil { expectations.ValidationWebhook.AddExpectedDeletion(kvkey, key) err = clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(context.Background(), webhook.Name, deleteOptions) if err != nil { expectations.ValidationWebhook.DeletionObserved(kvkey, key) return fmt.Errorf("unable to delete validation webhook: %v", err) } log.Log.V(2).Infof("Temporary blocking validation webhook %s deleted", webhook.Name) } } } return nil } func DeleteAll(kv *v1.KubeVirt, stores util.Stores, clientset kubecli.KubevirtClient, aggregatorclient install.APIServiceInterface, expectations *util.Expectations) error { kvkey, err := controller.KeyFunc(kv) if err != nil { return err } gracePeriod := int64(0) deleteOptions := metav1.DeleteOptions{ GracePeriodSeconds: &gracePeriod, } // first delete CRDs only err = crdHandleDeletion(kvkey, stores, clientset, expectations) if err != nil { return err } if !util.IsStoreEmpty(stores.OperatorCrdCache) { // wait until CRDs are gone return nil } // delete daemonsets objects := stores.DaemonSetCache.List() for _, obj := range objects { if ds, ok := obj.(*appsv1.DaemonSet); ok && ds.DeletionTimestamp == nil { if key, err := controller.KeyFunc(ds); err == nil { expectations.DaemonSet.AddExpectedDeletion(kvkey, key) err := clientset.AppsV1().DaemonSets(ds.Namespace).Delete(context.Background(), ds.Name, deleteOptions) if err != nil { expectations.DaemonSet.DeletionObserved(kvkey, key) log.Log.Errorf(deleteFailedFmt, ds.Name, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete podDisruptionBudgets objects = stores.PodDisruptionBudgetCache.List() for _, obj := range objects { if pdb, ok := obj.(*policyv1.PodDisruptionBudget); ok && pdb.DeletionTimestamp == nil { if key, err := controller.KeyFunc(pdb); err == nil { pdbClient := clientset.PolicyV1().PodDisruptionBudgets(pdb.Namespace) expectations.PodDisruptionBudget.AddExpectedDeletion(kvkey, key) err = pdbClient.Delete(context.Background(), pdb.Name, metav1.DeleteOptions{}) if err != nil { expectations.PodDisruptionBudget.DeletionObserved(kvkey, key) log.Log.Errorf(deleteFailedFmt, pdb.Name, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete deployments objects = stores.DeploymentCache.List() for _, obj := range objects { if depl, ok := obj.(*appsv1.Deployment); ok && depl.DeletionTimestamp == nil { if key, err := controller.KeyFunc(depl); err == nil { expectations.Deployment.AddExpectedDeletion(kvkey, key) err = clientset.AppsV1().Deployments(depl.Namespace).Delete(context.Background(), depl.Name, deleteOptions) if err != nil { expectations.Deployment.DeletionObserved(kvkey, key) log.Log.Errorf(deleteFailedFmt, depl.Name, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete validatingwebhooks objects = stores.ValidationWebhookCache.List() for _, obj := range objects { if webhookConfiguration, ok := obj.(*admissionregistrationv1.ValidatingWebhookConfiguration); ok && webhookConfiguration.DeletionTimestamp == nil { if key, err := controller.KeyFunc(webhookConfiguration); err == nil { expectations.ValidationWebhook.AddExpectedDeletion(kvkey, key) err := clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(context.Background(), webhookConfiguration.Name, deleteOptions) if err != nil { expectations.ValidationWebhook.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete validatingwebhook %+v: %v", webhookConfiguration, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete mutatingwebhooks objects = stores.MutatingWebhookCache.List() for _, obj := range objects { if webhookConfiguration, ok := obj.(*admissionregistrationv1.MutatingWebhookConfiguration); ok && webhookConfiguration.DeletionTimestamp == nil { if key, err := controller.KeyFunc(webhookConfiguration); err == nil { expectations.MutatingWebhook.AddExpectedDeletion(kvkey, key) err := clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(context.Background(), webhookConfiguration.Name, deleteOptions) if err != nil { expectations.MutatingWebhook.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete mutatingwebhook %+v: %v", webhookConfiguration, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete apiservices objects = stores.APIServiceCache.List() for _, obj := range objects { if apiservice, ok := obj.(*apiregv1.APIService); ok && apiservice.DeletionTimestamp == nil { if key, err := controller.KeyFunc(apiservice); err == nil { expectations.APIService.AddExpectedDeletion(kvkey, key) err := aggregatorclient.Delete(context.Background(), apiservice.Name, deleteOptions) if err != nil { expectations.APIService.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete apiservice %+v: %v", apiservice, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete services objects = stores.ServiceCache.List() for _, obj := range objects { if svc, ok := obj.(*corev1.Service); ok && svc.DeletionTimestamp == nil { if key, err := controller.KeyFunc(svc); err == nil { expectations.Service.AddExpectedDeletion(kvkey, key) err := clientset.CoreV1().Services(svc.Namespace).Delete(context.Background(), svc.Name, deleteOptions) if err != nil { expectations.Service.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete service %+v: %v", svc, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete serviceMonitor prometheusClient := clientset.PrometheusClient() objects = stores.ServiceMonitorCache.List() for _, obj := range objects { if serviceMonitor, ok := obj.(*promv1.ServiceMonitor); ok && serviceMonitor.DeletionTimestamp == nil { if key, err := controller.KeyFunc(serviceMonitor); err == nil { expectations.ServiceMonitor.AddExpectedDeletion(kvkey, key) err := prometheusClient.MonitoringV1().ServiceMonitors(serviceMonitor.Namespace).Delete(context.Background(), serviceMonitor.Name, deleteOptions) if err != nil { expectations.ServiceMonitor.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete serviceMonitor %+v: %v", serviceMonitor, err) return err } expectations.ServiceMonitor.DeletionObserved(kvkey, key) } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete PrometheusRules objects = stores.PrometheusRuleCache.List() for _, obj := range objects { if prometheusRule, ok := obj.(*promv1.PrometheusRule); ok && prometheusRule.DeletionTimestamp == nil { if key, err := controller.KeyFunc(prometheusRule); err == nil { expectations.PrometheusRule.AddExpectedDeletion(kvkey, key) err := prometheusClient.MonitoringV1().PrometheusRules(prometheusRule.Namespace).Delete(context.Background(), prometheusRule.Name, deleteOptions) if err != nil { log.Log.Errorf("Failed to delete prometheusRule %+v: %v", prometheusRule, err) expectations.PrometheusRule.DeletionObserved(kvkey, key) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } // delete RBAC objects = stores.ClusterRoleBindingCache.List() for _, obj := range objects { if crb, ok := obj.(*rbacv1.ClusterRoleBinding); ok && crb.DeletionTimestamp == nil { if key, err := controller.KeyFunc(crb); err == nil { expectations.ClusterRoleBinding.AddExpectedDeletion(kvkey, key) err := clientset.RbacV1().ClusterRoleBindings().Delete(context.Background(), crb.Name, deleteOptions) if err != nil { expectations.ClusterRoleBinding.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete crb %+v: %v", crb, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.ClusterRoleCache.List() for _, obj := range objects { if cr, ok := obj.(*rbacv1.ClusterRole); ok && cr.DeletionTimestamp == nil { if key, err := controller.KeyFunc(cr); err == nil { expectations.ClusterRole.AddExpectedDeletion(kvkey, key) err := clientset.RbacV1().ClusterRoles().Delete(context.Background(), cr.Name, deleteOptions) if err != nil { expectations.ClusterRole.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete cr %+v: %v", cr, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.RoleBindingCache.List() for _, obj := range objects { if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.DeletionTimestamp == nil { if key, err := controller.KeyFunc(rb); err == nil { expectations.RoleBinding.AddExpectedDeletion(kvkey, key) err := clientset.RbacV1().RoleBindings(kv.Namespace).Delete(context.Background(), rb.Name, deleteOptions) if err != nil { expectations.RoleBinding.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete rb %+v: %v", rb, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.RoleCache.List() for _, obj := range objects { if role, ok := obj.(*rbacv1.Role); ok && role.DeletionTimestamp == nil { if key, err := controller.KeyFunc(role); err == nil { expectations.Role.AddExpectedDeletion(kvkey, key) err := clientset.RbacV1().Roles(kv.Namespace).Delete(context.Background(), role.Name, deleteOptions) if err != nil { expectations.Role.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete role %+v: %v", role, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.ServiceAccountCache.List() for _, obj := range objects { if sa, ok := obj.(*corev1.ServiceAccount); ok && sa.DeletionTimestamp == nil { if key, err := controller.KeyFunc(sa); err == nil { expectations.ServiceAccount.AddExpectedDeletion(kvkey, key) err := clientset.CoreV1().ServiceAccounts(kv.Namespace).Delete(context.Background(), sa.Name, deleteOptions) if err != nil { expectations.ServiceAccount.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete serviceaccount %+v: %v", sa, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.SecretCache.List() for _, obj := range objects { if secret, ok := obj.(*corev1.Secret); ok && secret.DeletionTimestamp == nil { if key, err := controller.KeyFunc(secret); err == nil { expectations.Secrets.AddExpectedDeletion(kvkey, key) err := clientset.CoreV1().Secrets(kv.Namespace).Delete(context.Background(), secret.Name, deleteOptions) if err != nil { expectations.Secrets.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete secret %+v: %v", secret, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.ConfigMapCache.List() for _, obj := range objects { if configMap, ok := obj.(*corev1.ConfigMap); ok && configMap.DeletionTimestamp == nil { if key, err := controller.KeyFunc(configMap); err == nil { expectations.ConfigMap.AddExpectedDeletion(kvkey, key) err := clientset.CoreV1().ConfigMaps(kv.Namespace).Delete(context.Background(), configMap.Name, deleteOptions) if err != nil { expectations.ConfigMap.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete configMap %+v: %v", configMap, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } scc := clientset.SecClient() objects = stores.SCCCache.List() for _, obj := range objects { if s, ok := obj.(*secv1.SecurityContextConstraints); ok && s.DeletionTimestamp == nil { // informer watches all SCC objects, it cannot be changed because of kubevirt updates if !util.IsManagedByOperator(s.GetLabels()) { continue } if key, err := controller.KeyFunc(s); err == nil { expectations.SCC.AddExpectedDeletion(kvkey, key) err := scc.SecurityContextConstraints().Delete(context.Background(), s.Name, deleteOptions) if err != nil { expectations.SCC.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete SecurityContextConstraints %+v: %v", s, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.RouteCache.List() for _, obj := range objects { if route, ok := obj.(*routev1.Route); ok && route.DeletionTimestamp == nil { if key, err := controller.KeyFunc(route); err == nil { expectations.Route.AddExpectedDeletion(kvkey, key) err := clientset.RouteClient().Routes(kv.Namespace).Delete(context.Background(), route.Name, deleteOptions) if err != nil { expectations.Route.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete route %+v: %v", route, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.ValidatingAdmissionPolicyBindingCache.List() for _, obj := range objects { if validatingAdmissionPolicyBinding, ok := obj.(*admissionregistrationv1.ValidatingAdmissionPolicyBinding); ok && validatingAdmissionPolicyBinding.DeletionTimestamp == nil { if key, err := controller.KeyFunc(validatingAdmissionPolicyBinding); err == nil { expectations.ValidatingAdmissionPolicyBinding.AddExpectedDeletion(kvkey, key) err := clientset.AdmissionregistrationV1().ValidatingAdmissionPolicyBindings().Delete(context.Background(), validatingAdmissionPolicyBinding.Name, deleteOptions) if err != nil { expectations.ValidatingAdmissionPolicyBinding.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete validatingAdmissionPolicyBinding %+v: %v", validatingAdmissionPolicyBinding, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } objects = stores.ValidatingAdmissionPolicyCache.List() for _, obj := range objects { if validatingAdmissionPolicy, ok := obj.(*admissionregistrationv1.ValidatingAdmissionPolicy); ok && validatingAdmissionPolicy.DeletionTimestamp == nil { if key, err := controller.KeyFunc(validatingAdmissionPolicy); err == nil { expectations.ValidatingAdmissionPolicy.AddExpectedDeletion(kvkey, key) err := clientset.AdmissionregistrationV1().ValidatingAdmissionPolicies().Delete(context.Background(), validatingAdmissionPolicy.Name, deleteOptions) if err != nil { expectations.ValidatingAdmissionPolicy.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete validatingAdmissionPolicy %+v: %v", validatingAdmissionPolicy, err) return err } } } else if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } } if err = deleteKubeVirtLabelsFromNodes(clientset); err != nil { return err } err = deleteDummyWebhookValidators(kv, clientset, stores, expectations) if err != nil { return err } return nil } func deleteKubeVirtLabelsFromNodes(clientset kubecli.KubevirtClient) error { nodeList, err := clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{LabelSelector: v1.NodeSchedulable}) if err != nil { return fmt.Errorf("failed to list nodes: %v", err) } for _, node := range nodeList.Items { labels := node.GetLabels() if labels == nil { continue } patchSet := patch.New() for labelkey := range labels { if strings.HasPrefix(labelkey, "kubevirt.io/") { patchSet.AddOption(patch.WithRemove(fmt.Sprintf("/metadata/labels/%s", patch.EscapeJSONPointer(labelkey)))) } } if patchSet.IsEmpty() { continue } payload, err := patchSet.GeneratePayload() if err != nil { return fmt.Errorf("failed to generate patch payload: %v", err) } if _, err = clientset.CoreV1().Nodes().Patch(context.Background(), node.Name, types.JSONPatchType, payload, metav1.PatchOptions{}); err != nil { return fmt.Errorf("failed to update labels for node %s: %v", node.Name, err) } log.Log.Infof("removed kubevirt labels from node %s", node.Name) } return nil } func crdInstanceDeletionCompleted(crd *extv1.CustomResourceDefinition) bool { // Below is an example of what is being looked for here. // The CRD will have this condition once a CRD which is being // deleted has all instances removed related to this CRD. // // message: removed all instances // reason: InstanceDeletionCompleted // status: "False" // type: Terminating if crd.DeletionTimestamp == nil { return false } for _, condition := range crd.Status.Conditions { if condition.Type == extv1.Terminating && condition.Status == extv1.ConditionFalse && condition.Reason == "InstanceDeletionCompleted" { return true } } return false } func crdFilterNeedFinalizerAdded(crds []*extv1.CustomResourceDefinition) []*extv1.CustomResourceDefinition { filtered := []*extv1.CustomResourceDefinition{} for _, crd := range crds { if crd.DeletionTimestamp == nil && !controller.HasFinalizer(crd, v1.VirtOperatorComponentFinalizer) { filtered = append(filtered, crd) } } return filtered } func crdFilterNeedDeletion(crds []*extv1.CustomResourceDefinition) []*extv1.CustomResourceDefinition { filtered := []*extv1.CustomResourceDefinition{} for _, crd := range crds { if crd.DeletionTimestamp == nil { filtered = append(filtered, crd) } } return filtered } func crdFilterNeedFinalizerRemoved(crds []*extv1.CustomResourceDefinition) []*extv1.CustomResourceDefinition { filtered := []*extv1.CustomResourceDefinition{} for _, crd := range crds { if !crdInstanceDeletionCompleted(crd) { // All crds must have all crs removed before any CRD finalizer can be removed return []*extv1.CustomResourceDefinition{} } else if controller.HasFinalizer(crd, v1.VirtOperatorComponentFinalizer) { filtered = append(filtered, crd) } } return filtered } func crdHandleDeletion(kvkey string, stores util.Stores, clientset kubecli.KubevirtClient, expectations *util.Expectations) error { ext := clientset.ExtensionsClient() objects := stores.OperatorCrdCache.List() finalizerPath := "/metadata/finalizers" crds := []*extv1.CustomResourceDefinition{} for _, obj := range objects { crd, ok := obj.(*extv1.CustomResourceDefinition) if !ok { log.Log.Errorf(castFailedFmt, obj) return nil } crds = append(crds, crd) } needFinalizerAdded := crdFilterNeedFinalizerAdded(crds) needDeletion := crdFilterNeedDeletion(crds) needFinalizerRemoved := crdFilterNeedFinalizerRemoved(crds) for _, crd := range needFinalizerAdded { crdCopy := crd.DeepCopy() controller.AddFinalizer(crdCopy, v1.VirtOperatorComponentFinalizer) patchBytes, err := json.Marshal(crdCopy.Finalizers) if err != nil { return err } ops := fmt.Sprintf(`[{ "op": "add", "path": "%s", "value": %s }]`, finalizerPath, string(patchBytes)) _, err = ext.ApiextensionsV1().CustomResourceDefinitions().Patch(context.Background(), crd.Name, types.JSONPatchType, []byte(ops), metav1.PatchOptions{}) if err != nil { return err } } for _, crd := range needDeletion { key, err := controller.KeyFunc(crd) if err != nil { return err } expectations.OperatorCrd.AddExpectedDeletion(kvkey, key) err = ext.ApiextensionsV1().CustomResourceDefinitions().Delete(context.Background(), crd.Name, metav1.DeleteOptions{}) if err != nil { expectations.OperatorCrd.DeletionObserved(kvkey, key) log.Log.Errorf("Failed to delete crd %+v: %v", crd, err) return err } } for _, crd := range needFinalizerRemoved { var ops string if len(crd.Finalizers) > 1 { crdCopy := crd.DeepCopy() controller.RemoveFinalizer(crdCopy, v1.VirtOperatorComponentFinalizer) newPatchBytes, err := json.Marshal(crdCopy.Finalizers) if err != nil { return err } oldPatchBytes, err := json.Marshal(crd.Finalizers) if err != nil { return err } ops = fmt.Sprintf(`[{ "op": "test", "path": "%s", "value": %s }, { "op": "replace", "path": "%s", "value": %s }]`, finalizerPath, string(oldPatchBytes), finalizerPath, string(newPatchBytes)) } else { ops = fmt.Sprintf(`[{ "op": "remove", "path": "%s" }]`, finalizerPath) } _, err := ext.ApiextensionsV1().CustomResourceDefinitions().Patch(context.Background(), crd.Name, types.JSONPatchType, []byte(ops), metav1.PatchOptions{}) if err != nil { return err } } return nil }
package apply import ( "fmt" "k8s.io/apimachinery/pkg/runtime" k6tv1 "kubevirt.io/api/core/v1" appsv1 "k8s.io/api/apps/v1" operatorsv1 "github.com/openshift/api/operator/v1" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" policyv1 "k8s.io/api/policy/v1" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" ) func getGroupResource(required runtime.Object) (group string, resource string, err error) { switch required.(type) { case *extv1.CustomResourceDefinition: group = "apiextensions.k8s.io/v1" resource = "customresourcedefinitions" case *admissionregistrationv1.MutatingWebhookConfiguration: group = "admissionregistration.k8s.io" resource = "mutatingwebhookconfigurations" case *admissionregistrationv1.ValidatingWebhookConfiguration: group = "admissionregistration.k8s.io" resource = "validatingwebhookconfigurations" case *policyv1.PodDisruptionBudget: group = "apps" resource = "poddisruptionbudgets" case *appsv1.Deployment: group = "apps" resource = "deployments" case *appsv1.DaemonSet: group = "apps" resource = "daemonsets" default: err = fmt.Errorf("resource type is not known") return } return } func GetExpectedGeneration(required runtime.Object, previousGenerations []k6tv1.GenerationStatus) int64 { group, resource, err := getGroupResource(required) if err != nil { return -1 } operatorGenerations := toOperatorGenerations(previousGenerations) meta := required.(v1.Object) generation := resourcemerge.GenerationFor(operatorGenerations, schema.GroupResource{Group: group, Resource: resource}, meta.GetNamespace(), meta.GetName()) if generation == nil { return -1 } return generation.LastGeneration } func SetGeneration(generations *[]k6tv1.GenerationStatus, actual runtime.Object) { if actual == nil { return } group, resource, err := getGroupResource(actual) if err != nil { return } operatorGenerations := toOperatorGenerations(*generations) meta := actual.(v1.Object) resourcemerge.SetGeneration(&operatorGenerations, operatorsv1.GenerationStatus{ Group: group, Resource: resource, Namespace: meta.GetNamespace(), Name: meta.GetName(), LastGeneration: meta.GetGeneration(), }) newGenerations := toAPIGenerations(operatorGenerations) *generations = newGenerations } func toOperatorGeneration(generation k6tv1.GenerationStatus) operatorsv1.GenerationStatus { return operatorsv1.GenerationStatus{ Group: generation.Group, Resource: generation.Resource, Namespace: generation.Namespace, Name: generation.Name, LastGeneration: generation.LastGeneration, Hash: generation.Hash, } } func toAPIGeneration(generation operatorsv1.GenerationStatus) k6tv1.GenerationStatus { return k6tv1.GenerationStatus{ Group: generation.Group, Resource: generation.Resource, Namespace: generation.Namespace, Name: generation.Name, LastGeneration: generation.LastGeneration, Hash: generation.Hash, } } func toOperatorGenerations(generations []k6tv1.GenerationStatus) (operatorGenerations []operatorsv1.GenerationStatus) { for _, generation := range generations { operatorGenerations = append(operatorGenerations, toOperatorGeneration(generation)) } return operatorGenerations } func toAPIGenerations(generations []operatorsv1.GenerationStatus) (apiGenerations []k6tv1.GenerationStatus) { for _, generation := range generations { apiGenerations = append(apiGenerations, toAPIGeneration(generation)) } return apiGenerations }
package apply import ( "context" "fmt" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" v1 "kubevirt.io/api/core/v1" instancetypev1beta1 "kubevirt.io/api/instancetype/v1beta1" "kubevirt.io/client-go/log" ) func (r *Reconciler) createOrUpdateInstancetypes() error { for _, instancetype := range r.targetStrategy.Instancetypes() { if err := r.createOrUpdateInstancetype(instancetype.DeepCopy()); err != nil { return err } } return nil } func (r *Reconciler) findInstancetype(name string) (*instancetypev1beta1.VirtualMachineClusterInstancetype, error) { obj, exists, err := r.stores.ClusterInstancetype.GetByKey(name) if err != nil { return nil, err } if !exists { return nil, errors.NewNotFound(v1.Resource("VirtualMachineClusterInstancetype"), name) } foundObj, ok := obj.(*instancetypev1beta1.VirtualMachineClusterInstancetype) if !ok { return nil, fmt.Errorf("unknown object within VirtualMachineClusterInstancetype store") } return foundObj, nil } func (r *Reconciler) createOrUpdateInstancetype(instancetype *instancetypev1beta1.VirtualMachineClusterInstancetype) error { foundObj, err := r.findInstancetype(instancetype.Name) if err != nil && !errors.IsNotFound(err) { return err } imageTag, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &instancetype.ObjectMeta, imageTag, imageRegistry, id, true) if errors.IsNotFound(err) { if _, err := r.clientset.VirtualMachineClusterInstancetype().Create(context.Background(), instancetype, metav1.CreateOptions{}); err != nil { return fmt.Errorf("unable to create instancetype %+v: %v", instancetype, err) } log.Log.V(2).Infof("instancetype %v created", instancetype.GetName()) return nil } if equality.Semantic.DeepEqual(foundObj.Annotations, instancetype.Annotations) && equality.Semantic.DeepEqual(foundObj.Labels, instancetype.Labels) && equality.Semantic.DeepEqual(foundObj.Spec, instancetype.Spec) { log.Log.V(4).Infof("instancetype %v is up-to-date", instancetype.GetName()) return nil } instancetype.ResourceVersion = foundObj.ResourceVersion if _, err := r.clientset.VirtualMachineClusterInstancetype().Update(context.Background(), instancetype, metav1.UpdateOptions{}); err != nil { return fmt.Errorf("unable to update instancetype %+v: %v", instancetype, err) } log.Log.V(2).Infof("instancetype %v updated", instancetype.GetName()) return nil } func (r *Reconciler) deleteInstancetypes() error { foundInstancetype := false for _, instancetype := range r.targetStrategy.Instancetypes() { _, exists, err := r.stores.ClusterInstancetype.GetByKey(instancetype.Name) if err != nil { return err } if exists { foundInstancetype = true break } } if !foundInstancetype { return nil } ls := labels.Set{ v1.AppComponentLabel: GetAppComponent(r.kv), v1.ManagedByLabel: v1.ManagedByLabelOperatorValue, } if err := r.clientset.VirtualMachineClusterInstancetype().DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{ LabelSelector: ls.String(), }); err != nil { return fmt.Errorf("unable to delete preferences: %v", err) } return nil } func (r *Reconciler) createOrUpdatePreferences() error { for _, preference := range r.targetStrategy.Preferences() { if err := r.createOrUpdatePreference(preference.DeepCopy()); err != nil { return err } } return nil } func (r *Reconciler) findPreference(name string) (*instancetypev1beta1.VirtualMachineClusterPreference, error) { obj, exists, err := r.stores.ClusterPreference.GetByKey(name) if err != nil { return nil, err } if !exists { return nil, errors.NewNotFound(v1.Resource("VirtualMachineClusterPreference"), name) } foundObj, ok := obj.(*instancetypev1beta1.VirtualMachineClusterPreference) if !ok { return nil, fmt.Errorf("unknown object within VirtualMachineClusterPreference store") } return foundObj, nil } func (r *Reconciler) createOrUpdatePreference(preference *instancetypev1beta1.VirtualMachineClusterPreference) error { foundObj, err := r.findPreference(preference.Name) if err != nil && !errors.IsNotFound(err) { return err } imageTag, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &preference.ObjectMeta, imageTag, imageRegistry, id, true) if errors.IsNotFound(err) { if _, err := r.clientset.VirtualMachineClusterPreference().Create(context.Background(), preference, metav1.CreateOptions{}); err != nil { return fmt.Errorf("unable to create preference %+v: %v", preference, err) } log.Log.V(2).Infof("preference %v created", preference.GetName()) return nil } if equality.Semantic.DeepEqual(foundObj.Annotations, preference.Annotations) && equality.Semantic.DeepEqual(foundObj.Labels, preference.Labels) && equality.Semantic.DeepEqual(foundObj.Spec, preference.Spec) { log.Log.V(4).Infof("preference %v is up-to-date", preference.GetName()) return nil } preference.ResourceVersion = foundObj.ResourceVersion if _, err := r.clientset.VirtualMachineClusterPreference().Update(context.Background(), preference, metav1.UpdateOptions{}); err != nil { return fmt.Errorf("unable to update preference %+v: %v", preference, err) } log.Log.V(2).Infof("preference %v updated", preference.GetName()) return nil } func (r *Reconciler) deletePreferences() error { foundPreference := false for _, preference := range r.targetStrategy.Preferences() { _, exists, err := r.stores.ClusterPreference.GetByKey(preference.Name) if err != nil { return err } if exists { foundPreference = true break } } if !foundPreference { return nil } ls := labels.Set{ v1.AppComponentLabel: GetAppComponent(r.kv), v1.ManagedByLabel: v1.ManagedByLabelOperatorValue, } if err := r.clientset.VirtualMachineClusterPreference().DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{ LabelSelector: ls.String(), }); err != nil { return fmt.Errorf("unable to delete preferences: %v", err) } return nil }
package apply import ( // #nosec sha1 is used to calculate a hash for patches and not for cryptographic "crypto/sha1" "encoding/hex" "encoding/json" "errors" "fmt" "reflect" "sort" "strings" jsonpatch "github.com/evanphx/json-patch" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/strategicpatch" v1 "kubevirt.io/api/core/v1" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/install" ) type Customizer struct { Patches []v1.CustomizeComponentsPatch hash string } func NewCustomizer(customizations v1.CustomizeComponents) (*Customizer, error) { hash, err := getHash(customizations) if err != nil { return &Customizer{}, err } patches := customizations.Patches flagPatches := flagsToPatches(customizations.Flags) patches = append(patches, flagPatches...) return &Customizer{ Patches: patches, hash: hash, }, nil } func flagsToPatches(flags *v1.Flags) []v1.CustomizeComponentsPatch { patches := []v1.CustomizeComponentsPatch{} if flags == nil { return patches } patches = addFlagsPatch(components.VirtAPIName, "Deployment", flags.API, patches) patches = addFlagsPatch(components.VirtControllerName, "Deployment", flags.Controller, patches) patches = addFlagsPatch(components.VirtHandlerName, "DaemonSet", flags.Handler, patches) return patches } func addFlagsPatch(name, resource string, flags map[string]string, patches []v1.CustomizeComponentsPatch) []v1.CustomizeComponentsPatch { if len(flags) == 0 { return patches } return append(patches, v1.CustomizeComponentsPatch{ ResourceName: name, ResourceType: resource, Patch: fmt.Sprintf(`{"spec":{"template":{"spec":{"containers":[{"name":%q,"command":["%s","%s"]}]}}}}`, name, name, strings.Join(flagsToArray(flags), `","`)), Type: v1.StrategicMergePatchType, }) } func flagsToArray(flags map[string]string) []string { farr := make([]string, 0) for flag, v := range flags { farr = append(farr, fmt.Sprintf("--%s", strings.ToLower(flag))) if v != "" { farr = append(farr, v) } } return farr } func (c *Customizer) Hash() string { return c.hash } func (c *Customizer) GenericApplyPatches(objects interface{}) error { switch reflect.TypeOf(objects).Kind() { case reflect.Slice: s := reflect.ValueOf(objects) for i := 0; i < s.Len(); i++ { o := s.Index(i) obj, ok := o.Interface().(runtime.Object) if !ok { return errors.New("Slice must contain objects of type 'runtime.Object'") } kind := obj.GetObjectKind().GroupVersionKind().Kind v := reflect.Indirect(o).FieldByName("ObjectMeta").FieldByName("Name") name := v.String() patches := c.GetPatchesForResource(kind, name) patches = append(patches, v1.CustomizeComponentsPatch{ Patch: fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}}}`, v1.KubeVirtCustomizeComponentAnnotationHash, c.hash), Type: v1.StrategicMergePatchType, }) err := applyPatches(obj, patches) if err != nil { return err } } } return nil } func (c *Customizer) Apply(targetStrategy install.StrategyInterface) error { err := c.GenericApplyPatches(targetStrategy.Deployments()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.Services()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.DaemonSets()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.ValidatingWebhookConfigurations()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.MutatingWebhookConfigurations()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.APIServices()) if err != nil { return err } err = c.GenericApplyPatches(targetStrategy.CertificateSecrets()) if err != nil { return err } return nil } func applyPatches(obj runtime.Object, patches []v1.CustomizeComponentsPatch) error { if len(patches) == 0 { return nil } for _, p := range patches { err := applyPatch(obj, p) if err != nil { return err } } return nil } func applyPatch(obj runtime.Object, patch v1.CustomizeComponentsPatch) error { if obj == nil { return nil } old, err := json.Marshal(obj) if err != nil { return err } // reset the object in preparation to unmarshal, since unmarshal does not guarantee that fields // in obj that are removed by patch are cleared value := reflect.ValueOf(obj) value.Elem().Set(reflect.New(value.Type().Elem()).Elem()) switch patch.Type { case v1.JSONPatchType: patch, err := jsonpatch.DecodePatch([]byte(patch.Patch)) if err != nil { return err } modified, err := patch.Apply(old) if err != nil { return err } if err = json.Unmarshal(modified, obj); err != nil { return err } case v1.MergePatchType: modified, err := jsonpatch.MergePatch(old, []byte(patch.Patch)) if err != nil { return err } if err := json.Unmarshal(modified, obj); err != nil { return err } case v1.StrategicMergePatchType: mergedByte, err := strategicpatch.StrategicMergePatch(old, []byte(patch.Patch), obj) if err != nil { return err } if err = json.Unmarshal(mergedByte, obj); err != nil { return err } default: return fmt.Errorf("PatchType is not supported") } return nil } func (c *Customizer) GetPatches() []v1.CustomizeComponentsPatch { return c.Patches } func (c *Customizer) GetPatchesForResource(resourceType, name string) []v1.CustomizeComponentsPatch { allPatches := c.Patches patches := make([]v1.CustomizeComponentsPatch, 0) for _, p := range allPatches { if valueMatchesKey(p.ResourceType, resourceType) && valueMatchesKey(p.ResourceName, name) { patches = append(patches, p) } } return patches } func valueMatchesKey(value, key string) bool { if value == "*" { return true } return strings.EqualFold(key, value) } func getHash(customizations v1.CustomizeComponents) (string, error) { // #nosec CWE: 326 - Use of weak cryptographic primitive (http://cwe.mitre.org/data/definitions/326.html) // reason: sha1 is not used for encryption but for creating a hash value hasher := sha1.New() sort.SliceStable(customizations.Patches, func(i, j int) bool { return len(customizations.Patches[i].Patch) < len(customizations.Patches[j].Patch) }) values, err := json.Marshal(customizations) if err != nil { return "", err } hasher.Write(values) return hex.EncodeToString(hasher.Sum(nil)), nil }
package apply import ( "context" "fmt" "github.com/imdario/mergo" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" ) func (r *Reconciler) createOrUpdateServiceMonitors() error { if !r.config.ServiceMonitorEnabled { return nil } for _, serviceMonitor := range r.targetStrategy.ServiceMonitors() { if err := r.createOrUpdateServiceMonitor(serviceMonitor.DeepCopy()); err != nil { return err } } return nil } func (r *Reconciler) createOrUpdateServiceMonitor(serviceMonitor *promv1.ServiceMonitor) error { prometheusClient := r.clientset.PrometheusClient() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) obj, exists, _ := r.stores.ServiceMonitorCache.Get(serviceMonitor) injectOperatorMetadata(r.kv, &serviceMonitor.ObjectMeta, version, imageRegistry, id, true) if !exists { // Create non existent r.expectations.ServiceMonitor.RaiseExpectations(r.kvKey, 1, 0) _, err := prometheusClient.MonitoringV1().ServiceMonitors(serviceMonitor.Namespace).Create(context.Background(), serviceMonitor, metav1.CreateOptions{}) if err != nil { r.expectations.ServiceMonitor.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create serviceMonitor %+v: %v", serviceMonitor, err) } log.Log.V(2).Infof("serviceMonitor %v created", serviceMonitor.GetName()) return nil } cachedServiceMonitor := obj.(*promv1.ServiceMonitor) endpointsModified, err := ensureServiceMonitorSpec(serviceMonitor, cachedServiceMonitor) if err != nil { return err } modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &cachedServiceMonitor.ObjectMeta, serviceMonitor.ObjectMeta) // there was no change to metadata and the spec fields are equal if !*modified && !endpointsModified { log.Log.V(4).Infof("serviceMonitor %v is up-to-date", serviceMonitor.GetName()) return nil } patchBytes, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{}, &serviceMonitor.ObjectMeta, serviceMonitor.Spec)...).GeneratePayload() if err != nil { return err } _, err = prometheusClient.MonitoringV1().ServiceMonitors(serviceMonitor.Namespace).Patch(context.Background(), serviceMonitor.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch serviceMonitor %+v: %v", serviceMonitor, err) } log.Log.V(2).Infof("serviceMonitor %v updated", serviceMonitor.GetName()) return nil } func ensureServiceMonitorSpec(required, existing *promv1.ServiceMonitor) (bool, error) { if err := mergo.Merge(&existing.Spec, &required.Spec); err != nil { return false, err } if equality.Semantic.DeepEqual(existing.Spec, required.Spec) { return false, nil } return true, nil } func (r *Reconciler) createOrUpdatePrometheusRules() error { if !r.config.PrometheusRulesEnabled { return nil } for _, prometheusRule := range r.targetStrategy.PrometheusRules() { if err := r.createOrUpdatePrometheusRule(prometheusRule.DeepCopy()); err != nil { return err } } return nil } func (r *Reconciler) createOrUpdatePrometheusRule(prometheusRule *promv1.PrometheusRule) error { prometheusClient := r.clientset.PrometheusClient() version, imageRegistry, id := getTargetVersionRegistryID(r.kv) obj, exists, _ := r.stores.PrometheusRuleCache.Get(prometheusRule) injectOperatorMetadata(r.kv, &prometheusRule.ObjectMeta, version, imageRegistry, id, true) if !exists { // Create non existent r.expectations.PrometheusRule.RaiseExpectations(r.kvKey, 1, 0) _, err := prometheusClient.MonitoringV1().PrometheusRules(prometheusRule.Namespace).Create(context.Background(), prometheusRule, metav1.CreateOptions{}) if err != nil { r.expectations.PrometheusRule.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create PrometheusRule %+v: %v", prometheusRule, err) } log.Log.V(2).Infof("PrometheusRule %v created", prometheusRule.GetName()) return nil } cachedPrometheusRule := obj.(*promv1.PrometheusRule) modified := resourcemerge.BoolPtr(false) existingCopy := cachedPrometheusRule.DeepCopy() resourcemerge.EnsureObjectMeta(modified, &existingCopy.ObjectMeta, prometheusRule.ObjectMeta) if !*modified && equality.Semantic.DeepEqual(cachedPrometheusRule.Spec, prometheusRule.Spec) { log.Log.V(4).Infof("PrometheusRule %v is up-to-date", prometheusRule.GetName()) return nil } patchBytes, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{}, &prometheusRule.ObjectMeta, prometheusRule.Spec)...).GeneratePayload() if err != nil { return err } _, err = prometheusClient.MonitoringV1().PrometheusRules(prometheusRule.Namespace).Patch(context.Background(), prometheusRule.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch PrometheusRule %+v: %v", prometheusRule, err) } log.Log.V(2).Infof("PrometheusRule %v updated", prometheusRule.GetName()) return nil }
package apply import ( "context" "fmt" "k8s.io/apimachinery/pkg/runtime" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" "k8s.io/client-go/tools/cache" "kubevirt.io/kubevirt/pkg/controller" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/rbac" ) func (r *Reconciler) createOrUpdateClusterRole(cr *rbacv1.ClusterRole, imageTag string, imageRegistry string, id string) error { return rbacCreateOrUpdate(r, cr, imageTag, imageRegistry, id) } func (r *Reconciler) createOrUpdateClusterRoleBinding(crb *rbacv1.ClusterRoleBinding, imageTag string, imageRegistry string, id string) error { return rbacCreateOrUpdate(r, crb, imageTag, imageRegistry, id) } func (r *Reconciler) createOrUpdateRole(role *rbacv1.Role, imageTag string, imageRegistry string, id string) error { if !r.config.ServiceMonitorEnabled && (role.Name == rbac.MONITOR_SERVICEACCOUNT_NAME) { return nil } return rbacCreateOrUpdate(r, role, imageTag, imageRegistry, id) } func (r *Reconciler) createOrUpdateRoleBinding(rb *rbacv1.RoleBinding, imageTag string, imageRegistry string, id string) error { if !r.config.ServiceMonitorEnabled && (rb.Name == rbac.MONITOR_SERVICEACCOUNT_NAME) { return nil } return rbacCreateOrUpdate(r, rb, imageTag, imageRegistry, id) } func rbacCreateOrUpdate(r *Reconciler, required runtime.Object, imageTag, imageRegistry, id string) (err error) { roleTypeName := required.GetObjectKind().GroupVersionKind().Kind cachedRoleInterface, exists, _ := getRbacCache(r, required).Get(required) requiredMeta := getRbacMetaObject(required) injectOperatorMetadata(r.kv, requiredMeta, imageTag, imageRegistry, id, true) if !exists { // Create non existent err = getRbacCreateFunction(r, required)() if err != nil { return fmt.Errorf("unable to create %v %+v: %v", roleTypeName, required, err) } log.Log.V(2).Infof("%v %v created", roleTypeName, requiredMeta.GetName()) return nil } metaChanged := resourcemerge.BoolPtr(false) existingCopy := cachedRoleInterface.(runtime.Object).DeepCopyObject() existingCopyMeta := getRbacMetaObject(existingCopy) resourcemerge.EnsureObjectMeta(metaChanged, existingCopyMeta, *requiredMeta) enforceAPIGroup(existingCopy, required) specChanged := changeRbacExistingByRequired(existingCopy, required) if !*metaChanged && !specChanged { log.Log.V(4).Infof("%v %v already exists", roleTypeName, requiredMeta.GetName()) return nil } // Update existing, we don't need to patch for rbac rules. err = getRbacUpdateFunction(r, existingCopy)() if err != nil { return fmt.Errorf("unable to update %v %+v: %v", roleTypeName, required, err) } log.Log.V(2).Infof("%v %v updated", roleTypeName, requiredMeta.GetName()) return nil } func getRbacCreateFunction(r *Reconciler, obj runtime.Object) (createFunc func() error) { rbacObj := r.clientset.RbacV1() namespace := r.kv.Namespace raiseExpectation := func(exp *controller.UIDTrackingControllerExpectations) { exp.RaiseExpectations(r.kvKey, 1, 0) } lowerExpectationIfErr := func(exp *controller.UIDTrackingControllerExpectations, err error) { if err != nil { exp.LowerExpectations(r.kvKey, 1, 0) } } switch obj.(type) { case *rbacv1.Role: role := obj.(*rbacv1.Role) createFunc = func() error { raiseExpectation(r.expectations.Role) _, err := rbacObj.Roles(namespace).Create(context.Background(), role, metav1.CreateOptions{}) lowerExpectationIfErr(r.expectations.Role, err) return err } case *rbacv1.ClusterRole: role := obj.(*rbacv1.ClusterRole) createFunc = func() error { raiseExpectation(r.expectations.ClusterRole) _, err := rbacObj.ClusterRoles().Create(context.Background(), role, metav1.CreateOptions{}) lowerExpectationIfErr(r.expectations.ClusterRole, err) return err } case *rbacv1.RoleBinding: roleBinding := obj.(*rbacv1.RoleBinding) createFunc = func() error { raiseExpectation(r.expectations.RoleBinding) _, err := rbacObj.RoleBindings(namespace).Create(context.Background(), roleBinding, metav1.CreateOptions{}) lowerExpectationIfErr(r.expectations.RoleBinding, err) return err } case *rbacv1.ClusterRoleBinding: roleBinding := obj.(*rbacv1.ClusterRoleBinding) createFunc = func() error { raiseExpectation(r.expectations.ClusterRoleBinding) _, err := rbacObj.ClusterRoleBindings().Create(context.Background(), roleBinding, metav1.CreateOptions{}) lowerExpectationIfErr(r.expectations.ClusterRoleBinding, err) return err } } return } func getRbacUpdateFunction(r *Reconciler, obj runtime.Object) (updateFunc func() (err error)) { rbacObj := r.clientset.RbacV1() namespace := r.kv.Namespace switch obj.(type) { case *rbacv1.Role: role := obj.(*rbacv1.Role) updateFunc = func() (err error) { _, err = rbacObj.Roles(namespace).Update(context.Background(), role, metav1.UpdateOptions{}) return err } case *rbacv1.ClusterRole: role := obj.(*rbacv1.ClusterRole) updateFunc = func() (err error) { _, err = rbacObj.ClusterRoles().Update(context.Background(), role, metav1.UpdateOptions{}) return err } case *rbacv1.RoleBinding: roleBinding := obj.(*rbacv1.RoleBinding) updateFunc = func() (err error) { _, err = rbacObj.RoleBindings(namespace).Update(context.Background(), roleBinding, metav1.UpdateOptions{}) return err } case *rbacv1.ClusterRoleBinding: roleBinding := obj.(*rbacv1.ClusterRoleBinding) updateFunc = func() (err error) { _, err = rbacObj.ClusterRoleBindings().Update(context.Background(), roleBinding, metav1.UpdateOptions{}) return err } } return } func getRbacMetaObject(obj runtime.Object) (meta *metav1.ObjectMeta) { switch obj.(type) { case *rbacv1.Role: role := obj.(*rbacv1.Role) meta = &role.ObjectMeta case *rbacv1.ClusterRole: role := obj.(*rbacv1.ClusterRole) meta = &role.ObjectMeta case *rbacv1.RoleBinding: roleBinding := obj.(*rbacv1.RoleBinding) meta = &roleBinding.ObjectMeta case *rbacv1.ClusterRoleBinding: roleBinding := obj.(*rbacv1.ClusterRoleBinding) meta = &roleBinding.ObjectMeta } return } func enforceAPIGroup(existing runtime.Object, required runtime.Object) { var existingRoleRef *rbacv1.RoleRef var requiredRoleRef *rbacv1.RoleRef var existingSubjects []rbacv1.Subject var requiredSubjects []rbacv1.Subject switch required.(type) { case *rbacv1.RoleBinding: crExisting := existing.(*rbacv1.RoleBinding) crRequired := required.(*rbacv1.RoleBinding) existingRoleRef = &crExisting.RoleRef requiredRoleRef = &crRequired.RoleRef existingSubjects = crExisting.Subjects requiredSubjects = crRequired.Subjects case *rbacv1.ClusterRoleBinding: crbExisting := existing.(*rbacv1.ClusterRoleBinding) crbRequired := required.(*rbacv1.ClusterRoleBinding) existingRoleRef = &crbExisting.RoleRef requiredRoleRef = &crbRequired.RoleRef existingSubjects = crbExisting.Subjects requiredSubjects = crbRequired.Subjects default: return } existingRoleRef.APIGroup = rbacv1.GroupName for i := range existingSubjects { if existingSubjects[i].Kind == "User" { existingSubjects[i].APIGroup = rbacv1.GroupName } } requiredRoleRef.APIGroup = rbacv1.GroupName for i := range requiredSubjects { if requiredSubjects[i].Kind == "User" { requiredSubjects[i].APIGroup = rbacv1.GroupName } } } func changeRbacExistingByRequired(existing runtime.Object, required runtime.Object) (modified bool) { // This is to avoid using reflections for performance reasons arePolicyRulesEqual := func(pr1, pr2 []rbacv1.PolicyRule) bool { if len(pr1) != len(pr2) { return false } areStringListsEqual := func(strList1 []string, strList2 []string) bool { if len(strList1) != len(strList2) { return false } for i := range strList1 { if strList1[i] != strList2[i] { return false } } return true } for i := range pr1 { if !areStringListsEqual(pr1[i].Verbs, pr2[i].Verbs) || !areStringListsEqual(pr1[i].Resources, pr2[i].Resources) || !areStringListsEqual(pr1[i].APIGroups, pr2[i].APIGroups) || !areStringListsEqual(pr1[i].NonResourceURLs, pr2[i].NonResourceURLs) || !areStringListsEqual(pr1[i].ResourceNames, pr2[i].ResourceNames) { return false } } return true } changeExistingPolicyRulesByRequired := func(existing, required *[]rbacv1.PolicyRule) (modified bool) { if !arePolicyRulesEqual(*existing, *required) { *existing = *required return true } return false } changeExistingSubjectsByRequired := func(existingSubjects, requiredSubjects *[]rbacv1.Subject) bool { modified := false if len(*existingSubjects) != len(*requiredSubjects) { *existingSubjects = *requiredSubjects return false } for _, existingSubject := range *existingSubjects { found := false for _, requiredSubject := range *requiredSubjects { if existingSubject == requiredSubject { found = true break } } if !found { modified = true break } } if modified { *existingSubjects = *requiredSubjects } return modified } changeExistingRoleRefByRequired := func(existingRoleRef, requiredRoleRef *rbacv1.RoleRef) (modified bool) { if *existingRoleRef != *requiredRoleRef { *existingRoleRef = *requiredRoleRef return true } return false } switch existing.(type) { case *rbacv1.Role: existingRole := existing.(*rbacv1.Role) requiredRole := required.(*rbacv1.Role) modified = changeExistingPolicyRulesByRequired(&existingRole.Rules, &requiredRole.Rules) case *rbacv1.ClusterRole: existingClusterRole := existing.(*rbacv1.ClusterRole) requiredClusterRole := required.(*rbacv1.ClusterRole) modified = changeExistingPolicyRulesByRequired(&existingClusterRole.Rules, &requiredClusterRole.Rules) case *rbacv1.RoleBinding: existingRoleBinding := existing.(*rbacv1.RoleBinding) requiredRoleBinding := required.(*rbacv1.RoleBinding) modified = changeExistingSubjectsByRequired(&existingRoleBinding.Subjects, &requiredRoleBinding.Subjects) modified = changeExistingRoleRefByRequired(&existingRoleBinding.RoleRef, &requiredRoleBinding.RoleRef) || modified case *rbacv1.ClusterRoleBinding: existingClusterRoleBinding := existing.(*rbacv1.ClusterRoleBinding) requiredClusterRoleBinding := required.(*rbacv1.ClusterRoleBinding) modified = changeExistingSubjectsByRequired(&existingClusterRoleBinding.Subjects, &requiredClusterRoleBinding.Subjects) modified = changeExistingRoleRefByRequired(&existingClusterRoleBinding.RoleRef, &requiredClusterRoleBinding.RoleRef) || modified } return modified } func getRbacCache(r *Reconciler, obj runtime.Object) (cache cache.Store) { switch obj.(type) { case *rbacv1.Role: cache = r.stores.RoleCache case *rbacv1.ClusterRole: cache = r.stores.ClusterRoleCache case *rbacv1.RoleBinding: cache = r.stores.RoleBindingCache case *rbacv1.ClusterRoleBinding: cache = r.stores.ClusterRoleBindingCache } return cache }
package apply import ( rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/cache" v1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/log" ) func (r *Reconciler) backupRBACs() error { // Backup existing ClusterRoles objects := r.stores.ClusterRoleCache.List() for _, obj := range objects { cachedCr, ok := obj.(*rbacv1.ClusterRole) if !ok || !needsBackup(r.kv, r.stores.ClusterRoleCache, &cachedCr.ObjectMeta) { continue } imageTag, imageRegistry, id, ok := getInstallStrategyAnnotations(&cachedCr.ObjectMeta) if !ok { continue } err := r.backupRBAC(cachedCr.DeepCopy(), cachedCr.Name, string(cachedCr.UID), imageTag, imageRegistry, id) if err != nil { return err } } // Backup existing ClusterRoleBindings objects = r.stores.ClusterRoleBindingCache.List() for _, obj := range objects { cachedCrb, ok := obj.(*rbacv1.ClusterRoleBinding) if !ok || !needsBackup(r.kv, r.stores.ClusterRoleBindingCache, &cachedCrb.ObjectMeta) { continue } imageTag, imageRegistry, id, ok := getInstallStrategyAnnotations(&cachedCrb.ObjectMeta) if !ok { continue } err := r.backupRBAC(cachedCrb.DeepCopy(), cachedCrb.Name, string(cachedCrb.UID), imageTag, imageRegistry, id) if err != nil { return err } } // Backup existing Roles objects = r.stores.RoleCache.List() for _, obj := range objects { cachedCr, ok := obj.(*rbacv1.Role) if !ok || !needsBackup(r.kv, r.stores.RoleCache, &cachedCr.ObjectMeta) { continue } imageTag, imageRegistry, id, ok := getInstallStrategyAnnotations(&cachedCr.ObjectMeta) if !ok { continue } err := r.backupRBAC(cachedCr.DeepCopy(), cachedCr.Name, string(cachedCr.UID), imageTag, imageRegistry, id) if err != nil { return err } } // Backup existing RoleBindings objects = r.stores.RoleBindingCache.List() for _, obj := range objects { cachedRb, ok := obj.(*rbacv1.RoleBinding) if !ok || !needsBackup(r.kv, r.stores.RoleBindingCache, &cachedRb.ObjectMeta) { continue } imageTag, imageRegistry, id, ok := getInstallStrategyAnnotations(&cachedRb.ObjectMeta) if ok { continue } err := r.backupRBAC(cachedRb.DeepCopy(), cachedRb.Name, string(cachedRb.UID), imageTag, imageRegistry, id) if err != nil { return err } } return nil } func (r *Reconciler) backupRBAC(obj runtime.Object, name, UID, imageTag, imageRegistry, id string) error { meta := getRbacMetaObject(obj) *meta = metav1.ObjectMeta{ GenerateName: name, } injectOperatorMetadata(r.kv, meta, imageTag, imageRegistry, id, true) meta.Annotations[v1.EphemeralBackupObject] = UID // Create backup createRole := getRbacCreateFunction(r, obj) err := createRole() if err != nil { return err } kind := obj.GetObjectKind().GroupVersionKind().Kind log.Log.V(2).Infof("backup %v %v created", kind, name) return nil } func shouldBackupRBACObject(kv *v1.KubeVirt, objectMeta *metav1.ObjectMeta) bool { curVersion, curImageRegistry, curID := getTargetVersionRegistryID(kv) if objectMatchesVersion(objectMeta, curVersion, curImageRegistry, curID, kv.GetGeneration()) { // matches current target version already, so doesn't need backup return false } if objectMeta.Annotations == nil { return false } _, ok := objectMeta.Annotations[v1.EphemeralBackupObject] if ok { // ephemeral backup objects don't need to be backed up because // they are the backup return false } return true } func needsBackup(kv *v1.KubeVirt, cache cache.Store, meta *metav1.ObjectMeta) bool { shouldBackup := shouldBackupRBACObject(kv, meta) imageTag, imageRegistry, id, ok := getInstallStrategyAnnotations(meta) if !shouldBackup || !ok { return false } // loop through cache and determine if there's an ephemeral backup // for this object already objects := cache.List() for _, obj := range objects { cachedObj, ok := obj.(*metav1.ObjectMeta) if !ok || cachedObj.DeletionTimestamp != nil || meta.Annotations == nil { continue } uid, ok := cachedObj.Annotations[v1.EphemeralBackupObject] if !ok { // this is not an ephemeral backup object continue } if uid == string(meta.UID) && objectMatchesVersion(cachedObj, imageTag, imageRegistry, id, kv.GetGeneration()) { // found backup. UID matches and versions match // note, it's possible for a single UID to have multiple backups with // different versions return false } } return true }
/* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2019 Red Hat, Inc. * */ package apply import ( "context" "fmt" "strconv" "strings" "time" "github.com/coreos/go-semver/semver" secv1 "github.com/openshift/api/security/v1" promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" apiregv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" v1 "kubevirt.io/api/core/v1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/certificates/triple" "kubevirt.io/kubevirt/pkg/certificates/triple/cert" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/virt-config/featuregate" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/install" "kubevirt.io/kubevirt/pkg/virt-operator/util" ) const Duration7d = time.Hour * 24 * 7 const Duration1d = time.Hour * 24 type DefaultInfraComponentsNodePlacement int const ( AnyNode DefaultInfraComponentsNodePlacement = iota RequireControlPlanePreferNonWorker ) func objectMatchesVersion(objectMeta *metav1.ObjectMeta, version, imageRegistry, id string, generation int64) bool { if objectMeta.Annotations == nil { return false } foundVersion, foundImageRegistry, foundID, _ := getInstallStrategyAnnotations(objectMeta) foundGeneration, generationExists := objectMeta.Annotations[v1.KubeVirtGenerationAnnotation] foundLabels := util.IsManagedByOperator(objectMeta.Labels) sGeneration := strconv.FormatInt(generation, 10) if generationExists && foundGeneration != sGeneration { return false } if foundVersion == version && foundImageRegistry == imageRegistry && foundID == id && foundLabels { return true } return false } func injectOperatorMetadata(kv *v1.KubeVirt, objectMeta *metav1.ObjectMeta, version string, imageRegistry string, id string, injectCustomizationMetadata bool) { if objectMeta.Labels == nil { objectMeta.Labels = make(map[string]string) } if kv.Spec.ProductVersion != "" && util.IsValidLabel(kv.Spec.ProductVersion) { objectMeta.Labels[v1.AppVersionLabel] = kv.Spec.ProductVersion } if kv.Spec.ProductName != "" && util.IsValidLabel(kv.Spec.ProductName) { objectMeta.Labels[v1.AppPartOfLabel] = kv.Spec.ProductName } objectMeta.Labels[v1.AppComponentLabel] = GetAppComponent(kv) objectMeta.Labels[v1.ManagedByLabel] = v1.ManagedByLabelOperatorValue if objectMeta.Annotations == nil { objectMeta.Annotations = make(map[string]string) } objectMeta.Annotations[v1.InstallStrategyVersionAnnotation] = version objectMeta.Annotations[v1.InstallStrategyRegistryAnnotation] = imageRegistry objectMeta.Annotations[v1.InstallStrategyIdentifierAnnotation] = id if injectCustomizationMetadata { objectMeta.Annotations[v1.KubeVirtGenerationAnnotation] = strconv.FormatInt(kv.ObjectMeta.GetGeneration(), 10) } } func GetAppComponent(kv *v1.KubeVirt) string { if kv.Spec.ProductComponent != "" && util.IsValidLabel(kv.Spec.ProductComponent) { return kv.Spec.ProductComponent } return v1.AppComponent } const ( kubernetesOSLabel = corev1.LabelOSStable kubernetesOSLinux = "linux" ) // Merge all Tolerations, Affinity and NodeSelectos from NodePlacement into pod spec func InjectPlacementMetadata(componentConfig *v1.ComponentConfig, podSpec *corev1.PodSpec, nodePlacementOption DefaultInfraComponentsNodePlacement) { if podSpec == nil { podSpec = &corev1.PodSpec{} } if componentConfig == nil || componentConfig.NodePlacement == nil { switch nodePlacementOption { case AnyNode: componentConfig = &v1.ComponentConfig{NodePlacement: &v1.NodePlacement{}} case RequireControlPlanePreferNonWorker: componentConfig = &v1.ComponentConfig{ NodePlacement: &v1.NodePlacement{ Affinity: &corev1.Affinity{ NodeAffinity: &corev1.NodeAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ NodeSelectorTerms: []corev1.NodeSelectorTerm{ { MatchExpressions: []corev1.NodeSelectorRequirement{ { Key: "node-role.kubernetes.io/control-plane", Operator: corev1.NodeSelectorOpExists, }, }, }, { MatchExpressions: []corev1.NodeSelectorRequirement{ { Key: "node-role.kubernetes.io/master", Operator: corev1.NodeSelectorOpExists, }, }, }, }, }, PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ { Weight: 100, Preference: corev1.NodeSelectorTerm{ MatchExpressions: []corev1.NodeSelectorRequirement{ { Key: "node-role.kubernetes.io/worker", Operator: corev1.NodeSelectorOpDoesNotExist, }, }, }, }, }, }, }, Tolerations: []corev1.Toleration{ { Key: "node-role.kubernetes.io/control-plane", Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule, }, { Key: "node-role.kubernetes.io/master", Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule, }, }, }, } default: log.Log.Errorf("Unknown nodePlacementOption %d provided to InjectPlacementMetadata. Falling back to the AnyNode option", nodePlacementOption) componentConfig = &v1.ComponentConfig{NodePlacement: &v1.NodePlacement{}} } } nodePlacement := componentConfig.NodePlacement if len(nodePlacement.NodeSelector) == 0 { nodePlacement.NodeSelector = make(map[string]string) } if _, ok := nodePlacement.NodeSelector[kubernetesOSLabel]; !ok { nodePlacement.NodeSelector[kubernetesOSLabel] = kubernetesOSLinux } if len(podSpec.NodeSelector) == 0 { podSpec.NodeSelector = make(map[string]string, len(nodePlacement.NodeSelector)) } // podSpec.NodeSelector for nsKey, nsVal := range nodePlacement.NodeSelector { // Favor podSpec over NodePlacement. This prevents cluster admin from clobbering // node selectors that KubeVirt intentionally set. if _, ok := podSpec.NodeSelector[nsKey]; !ok { podSpec.NodeSelector[nsKey] = nsVal } } // podSpec.Affinity if nodePlacement.Affinity != nil { if podSpec.Affinity == nil { podSpec.Affinity = nodePlacement.Affinity.DeepCopy() } else { // podSpec.Affinity.NodeAffinity if nodePlacement.Affinity.NodeAffinity != nil { if podSpec.Affinity.NodeAffinity == nil { podSpec.Affinity.NodeAffinity = nodePlacement.Affinity.NodeAffinity.DeepCopy() } else { // need to copy all affinity terms one by one if nodePlacement.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { if podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodePlacement.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.DeepCopy() } else { // merge the list of terms from NodePlacement into podSpec for _, term := range nodePlacement.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms { podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, term) } } } //PreferredDuringSchedulingIgnoredDuringExecution for _, term := range nodePlacement.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution { podSpec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, term) } } } // podSpec.Affinity.PodAffinity if nodePlacement.Affinity.PodAffinity != nil { if podSpec.Affinity.PodAffinity == nil { podSpec.Affinity.PodAffinity = nodePlacement.Affinity.PodAffinity.DeepCopy() } else { //RequiredDuringSchedulingIgnoredDuringExecution for _, term := range nodePlacement.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution { podSpec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution, term) } //PreferredDuringSchedulingIgnoredDuringExecution for _, term := range nodePlacement.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution { podSpec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, term) } } } // podSpec.Affinity.PodAntiAffinity if nodePlacement.Affinity.PodAntiAffinity != nil { if podSpec.Affinity.PodAntiAffinity == nil { podSpec.Affinity.PodAntiAffinity = nodePlacement.Affinity.PodAntiAffinity.DeepCopy() } else { //RequiredDuringSchedulingIgnoredDuringExecution for _, term := range nodePlacement.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution { podSpec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, term) } //PreferredDuringSchedulingIgnoredDuringExecution for _, term := range nodePlacement.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution { podSpec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, term) } } } } } //podSpec.Tolerations if len(nodePlacement.Tolerations) != 0 { if len(podSpec.Tolerations) == 0 { podSpec.Tolerations = []corev1.Toleration{} } for _, toleration := range nodePlacement.Tolerations { podSpec.Tolerations = append(podSpec.Tolerations, toleration) } } } func createLabelsAndAnnotationsPatch(objectMeta *metav1.ObjectMeta) []patch.PatchOption { return []patch.PatchOption{patch.WithAdd("/metadata/labels", objectMeta.Labels), patch.WithAdd("/metadata/annotations", objectMeta.Annotations), patch.WithAdd("/metadata/ownerReferences", objectMeta.OwnerReferences)} } func getPatchWithObjectMetaAndSpec(ops []patch.PatchOption, meta *metav1.ObjectMeta, spec interface{}) []patch.PatchOption { // Add Labels and Annotations Patches ops = append(ops, createLabelsAndAnnotationsPatch(meta)...) // and spec replacement to patch return append(ops, patch.WithReplace("/spec", spec)) } func shouldTakeUpdatePath(targetVersion, currentVersion string) bool { // if no current version, then this can't be an update if currentVersion == "" { return false } // semver doesn't like the 'v' prefix targetVersion = strings.TrimPrefix(targetVersion, "v") currentVersion = strings.TrimPrefix(currentVersion, "v") // our default position is that this is an update. // So if the target and current version do not // adhere to the semver spec, we assume by default the // update path is the correct path. shouldTakeUpdatePath := true target, err := semver.NewVersion(targetVersion) if err == nil { current, err := semver.NewVersion(currentVersion) if err == nil { if target.Compare(*current) <= 0 { shouldTakeUpdatePath = false } } } return shouldTakeUpdatePath } func haveApiDeploymentsRolledOver(targetStrategy install.StrategyInterface, kv *v1.KubeVirt, stores util.Stores) bool { for _, deployment := range targetStrategy.ApiDeployments() { if !util.DeploymentIsReady(kv, deployment, stores) { log.Log.V(2).Infof("Waiting on deployment %v to roll over to latest version", deployment.GetName()) // not rolled out yet return false } } return true } func haveControllerDeploymentsRolledOver(targetStrategy install.StrategyInterface, kv *v1.KubeVirt, stores util.Stores) bool { for _, deployment := range targetStrategy.ControllerDeployments() { if !util.DeploymentIsReady(kv, deployment, stores) { log.Log.V(2).Infof("Waiting on deployment %v to roll over to latest version", deployment.GetName()) // not rolled out yet return false } } return true } func haveExportProxyDeploymentsRolledOver(targetStrategy install.StrategyInterface, kv *v1.KubeVirt, stores util.Stores) bool { for _, deployment := range targetStrategy.ExportProxyDeployments() { if !util.DeploymentIsReady(kv, deployment, stores) { log.Log.V(2).Infof("Waiting on deployment %v to roll over to latest version", deployment.GetName()) // not rolled out yet return false } } return true } func haveDaemonSetsRolledOver(targetStrategy install.StrategyInterface, kv *v1.KubeVirt, stores util.Stores) bool { for _, daemonSet := range targetStrategy.DaemonSets() { if !util.DaemonsetIsReady(kv, daemonSet, stores) { log.Log.V(2).Infof("Waiting on daemonset %v to roll over to latest version", daemonSet.GetName()) // not rolled out yet return false } } return true } func (r *Reconciler) createDummyWebhookValidator() error { var webhooks []admissionregistrationv1.ValidatingWebhook version, imageRegistry, id := getTargetVersionRegistryID(r.kv) // If webhook already exists in cache, then exit. objects := r.stores.ValidationWebhookCache.List() for _, obj := range objects { if webhook, ok := obj.(*admissionregistrationv1.ValidatingWebhookConfiguration); ok { if objectMatchesVersion(&webhook.ObjectMeta, version, imageRegistry, id, r.kv.GetGeneration()) { // already created blocking webhook for this version return nil } } } // generate a fake cert. this isn't actually used sideEffectNone := admissionregistrationv1.SideEffectClassNone failurePolicy := admissionregistrationv1.Fail for _, crd := range r.targetStrategy.CRDs() { _, exists, _ := r.stores.OperatorCrdCache.Get(crd) if exists { // this CRD isn't new, it already exists in cache so we don't // need a blocking admission webhook to wait until the new // apiserver is active continue } path := fmt.Sprintf("/fake-path/%s", crd.Name) webhooks = append(webhooks, admissionregistrationv1.ValidatingWebhook{ Name: fmt.Sprintf("%s-tmp-validator", crd.Name), AdmissionReviewVersions: []string{"v1", "v1beta1"}, SideEffects: &sideEffectNone, FailurePolicy: &failurePolicy, Rules: []admissionregistrationv1.RuleWithOperations{{ Operations: []admissionregistrationv1.OperationType{ admissionregistrationv1.Create, }, Rule: admissionregistrationv1.Rule{ APIGroups: []string{crd.Spec.Group}, APIVersions: v1.ApiSupportedWebhookVersions, Resources: []string{crd.Spec.Names.Plural}, }, }}, ClientConfig: admissionregistrationv1.WebhookClientConfig{ Service: &admissionregistrationv1.ServiceReference{ Namespace: r.kv.Namespace, Name: "fake-validation-service", Path: &path, }, }, }) } // nothing to do here if we have no new CRDs to create webhooks for if len(webhooks) == 0 { return nil } // Set some fake signing cert bytes in for each rule so the k8s apiserver will // allow us to create the webhook. caKeyPair, _ := triple.NewCA("fake.kubevirt.io", time.Hour*24) signingCertBytes := cert.EncodeCertPEM(caKeyPair.Cert) for _, webhook := range webhooks { webhook.ClientConfig.CABundle = signingCertBytes } validationWebhook := &admissionregistrationv1.ValidatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "virt-operator-tmp-webhook", }, Webhooks: webhooks, } injectOperatorMetadata(r.kv, &validationWebhook.ObjectMeta, version, imageRegistry, id, true) r.expectations.ValidationWebhook.RaiseExpectations(r.kvKey, 1, 0) _, err := r.clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Create(context.Background(), validationWebhook, metav1.CreateOptions{}) if err != nil { r.expectations.ValidationWebhook.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create validation webhook: %v", err) } log.Log.V(2).Infof("Validation webhook created for image %s and registry %s", version, imageRegistry) return nil } func getTargetVersionRegistryID(kv *v1.KubeVirt) (version string, registry string, id string) { version = kv.Status.TargetKubeVirtVersion registry = kv.Status.TargetKubeVirtRegistry id = kv.Status.TargetDeploymentID return } func isServiceClusterIP(service *corev1.Service) bool { if service.Spec.Type == "" || service.Spec.Type == corev1.ServiceTypeClusterIP { return true } return false } type Reconciler struct { kv *v1.KubeVirt kvKey string targetStrategy install.StrategyInterface stores util.Stores config util.OperatorConfig clientset kubecli.KubevirtClient aggregatorclient install.APIServiceInterface expectations *util.Expectations recorder record.EventRecorder } func NewReconciler(kv *v1.KubeVirt, targetStrategy install.StrategyInterface, stores util.Stores, config util.OperatorConfig, clientset kubecli.KubevirtClient, aggregatorclient install.APIServiceInterface, expectations *util.Expectations, recorder record.EventRecorder) (*Reconciler, error) { kvKey, err := controller.KeyFunc(kv) if err != nil { return nil, err } customizer, err := NewCustomizer(kv.Spec.CustomizeComponents) if err != nil { return nil, err } err = customizer.Apply(targetStrategy) if err != nil { return nil, err } return &Reconciler{ kv: kv, kvKey: kvKey, targetStrategy: targetStrategy, stores: stores, config: config, clientset: clientset, aggregatorclient: aggregatorclient, expectations: expectations, recorder: recorder, }, nil } func (r *Reconciler) Sync(queue workqueue.TypedRateLimitingInterface[string]) (bool, error) { // Avoid log spam by logging this issue once early instead of for once each object created if !util.IsValidLabel(r.kv.Spec.ProductVersion) { log.Log.Errorf("invalid kubevirt.spec.productVersion: labels must be 63 characters or less, begin and end with alphanumeric characters, and contain only dot, hyphen or underscore") } if !util.IsValidLabel(r.kv.Spec.ProductName) { log.Log.Errorf("invalid kubevirt.spec.productName: labels must be 63 characters or less, begin and end with alphanumeric characters, and contain only dot, hyphen or underscore") } if !util.IsValidLabel(r.kv.Spec.ProductComponent) { log.Log.Errorf("invalid kubevirt.spec.productComponent: labels must be 63 characters or less, begin and end with alphanumeric characters, and contain only dot, hyphen or underscore") } targetVersion := r.kv.Status.TargetKubeVirtVersion targetImageRegistry := r.kv.Status.TargetKubeVirtRegistry observedVersion := r.kv.Status.ObservedKubeVirtVersion observedImageRegistry := r.kv.Status.ObservedKubeVirtRegistry apiDeploymentsRolledOver := haveApiDeploymentsRolledOver(r.targetStrategy, r.kv, r.stores) controllerDeploymentsRolledOver := haveControllerDeploymentsRolledOver(r.targetStrategy, r.kv, r.stores) exportProxyEnabled := r.exportProxyEnabled() exportProxyDeploymentsRolledOver := !exportProxyEnabled || haveExportProxyDeploymentsRolledOver(r.targetStrategy, r.kv, r.stores) daemonSetsRolledOver := haveDaemonSetsRolledOver(r.targetStrategy, r.kv, r.stores) infrastructureRolledOver := false if apiDeploymentsRolledOver && controllerDeploymentsRolledOver && exportProxyDeploymentsRolledOver && daemonSetsRolledOver { // infrastructure has rolled over and is available infrastructureRolledOver = true } else if (targetVersion == observedVersion) && (targetImageRegistry == observedImageRegistry) { // infrastructure was observed to have rolled over successfully // in the past infrastructureRolledOver = true } // -------- CREATE AND ROLE OUT UPDATED OBJECTS -------- // creates a blocking webhook for any new CRDs that don't exist previously. // this webhook is removed once the new apiserver is online. if !apiDeploymentsRolledOver { err := r.createDummyWebhookValidator() if err != nil { return false, err } } else { err := deleteDummyWebhookValidators(r.kv, r.clientset, r.stores, r.expectations) if err != nil { return false, err } } // create/update CRDs err := r.createOrUpdateCrds() if err != nil { return false, err } // create/update serviceMonitor err = r.createOrUpdateServiceMonitors() if err != nil { return false, err } // create/update PrometheusRules err = r.createOrUpdatePrometheusRules() if err != nil { return false, err } // backup any old RBAC rules that don't match current version if !infrastructureRolledOver { err = r.backupRBACs() if err != nil { return false, err } } // create/update all RBAC rules err = r.createOrUpdateRbac() if err != nil { return false, err } // create/update SCCs err = r.createOrUpdateSCC() if err != nil { return false, err } // create/update Services pending, err := r.createOrUpdateServices() if err != nil { return false, err } else if pending { // waiting on multi step service change. // During an update, if the 'type' of the service changes then // we have to delete the service, wait for the deletion to be observed, // then create the new service. This is because a service's "type" is // not mutatable. return false, nil } err = r.createOrUpdateValidatingAdmissionPolicyBindings() if err != nil { return false, err } err = r.createOrUpdateValidatingAdmissionPolicies() if err != nil { return false, err } err = r.createOrUpdateComponentsWithCertificates(queue) if err != nil { return false, err } if infrastructureRolledOver { err = r.removeKvServiceAccountsFromDefaultSCC(r.kv.Namespace) if err != nil { return false, err } } if shouldTakeUpdatePath(targetVersion, observedVersion) { finished, err := r.updateKubeVirtSystem(controllerDeploymentsRolledOver) if !finished || err != nil { return false, err } } else { finished, err := r.createOrRollBackSystem(apiDeploymentsRolledOver) if !finished || err != nil { return false, err } } err = r.syncKubevirtNamespaceLabels() if err != nil { return false, err } if !infrastructureRolledOver { // still waiting on roll out before cleaning up. return false, nil } // -------- ROLLOUT INCOMPATIBLE CHANGES WHICH REQUIRE A FULL CONTROL PLANE ROLL OVER -------- // some changes can only be done after the control plane rolled over err = r.rolloutNonCompatibleCRDChanges() if err != nil { return false, err } // -------- CLEAN UP OLD UNUSED OBJECTS -------- // outdated webhooks can potentially block deletes of other objects during the cleanup and need to be removed first err = r.deleteObjectsNotInInstallStrategy() if err != nil { return false, err } if r.commonInstancetypesDeploymentEnabled() { if err := r.createOrUpdateInstancetypes(); err != nil { return false, err } if err := r.createOrUpdatePreferences(); err != nil { return false, err } } else { if err := r.deleteInstancetypes(); err != nil { return false, err } if err := r.deletePreferences(); err != nil { return false, err } } return true, nil } func (r *Reconciler) createOrRollBackSystem(apiDeploymentsRolledOver bool) (bool, error) { // CREATE/ROLLBACK PATH IS // 1. apiserver - ensures validation of objects occur before allowing any control plane to act on them. // 2. wait for apiservers to roll over // 3. controllers and daemonsets // create/update API Deployments for _, deployment := range r.targetStrategy.ApiDeployments() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } // wait on api servers to roll over if !apiDeploymentsRolledOver { // not rolled out yet return false, nil } // create/update Controller Deployments for _, deployment := range r.targetStrategy.ControllerDeployments() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } // create/update ExportProxy Deployments for _, deployment := range r.targetStrategy.ExportProxyDeployments() { if r.exportProxyEnabled() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } else if err := r.deleteDeployment(deployment); err != nil { return false, err } } // create/update Daemonsets for _, daemonSet := range r.targetStrategy.DaemonSets() { finished, err := r.syncDaemonSet(daemonSet) if !finished || err != nil { return false, err } } return true, nil } func (r *Reconciler) deleteDeployment(deployment *appsv1.Deployment) error { obj, exists, err := r.stores.DeploymentCache.Get(deployment) if err != nil { return err } if !exists || obj.(*appsv1.Deployment).DeletionTimestamp != nil { return nil } key, err := controller.KeyFunc(deployment) if err != nil { return err } r.expectations.Deployment.AddExpectedDeletion(r.kvKey, key) if err := r.clientset.AppsV1().Deployments(deployment.Namespace).Delete(context.Background(), deployment.Name, metav1.DeleteOptions{}); err != nil { r.expectations.Deployment.DeletionObserved(r.kvKey, key) return err } return nil } func (r *Reconciler) deleteObjectsNotInInstallStrategy() error { gracePeriod := int64(0) deleteOptions := metav1.DeleteOptions{ GracePeriodSeconds: &gracePeriod, } client := r.clientset.ExtensionsClient() // -------- CLEAN UP OLD UNUSED OBJECTS -------- // outdated webhooks can potentially block deletes of other objects during the cleanup and need to be removed first // remove unused validating webhooks objects := r.stores.ValidationWebhookCache.List() for _, obj := range objects { if webhook, ok := obj.(*admissionregistrationv1.ValidatingWebhookConfiguration); ok && webhook.DeletionTimestamp == nil { found := false if strings.HasPrefix(webhook.Name, "virt-operator-tmp-webhook") { continue } for _, targetWebhook := range r.targetStrategy.ValidatingWebhookConfigurations() { if targetWebhook.Name == webhook.Name { found = true break } } if !found { if key, err := controller.KeyFunc(webhook); err == nil { r.expectations.ValidationWebhook.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(context.Background(), webhook.Name, deleteOptions) if err != nil { r.expectations.ValidationWebhook.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete webhook %+v: %v", webhook, err) return err } } } } } // remove unused mutating webhooks objects = r.stores.MutatingWebhookCache.List() for _, obj := range objects { if webhook, ok := obj.(*admissionregistrationv1.MutatingWebhookConfiguration); ok && webhook.DeletionTimestamp == nil { found := false for _, targetWebhook := range r.targetStrategy.MutatingWebhookConfigurations() { if targetWebhook.Name == webhook.Name { found = true break } } if !found { if key, err := controller.KeyFunc(webhook); err == nil { r.expectations.MutatingWebhook.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(context.Background(), webhook.Name, deleteOptions) if err != nil { r.expectations.MutatingWebhook.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete webhook %+v: %v", webhook, err) return err } } } } } // remove unused APIServices objects = r.stores.APIServiceCache.List() for _, obj := range objects { if apiService, ok := obj.(*apiregv1.APIService); ok && apiService.DeletionTimestamp == nil { found := false for _, targetAPIService := range r.targetStrategy.APIServices() { if targetAPIService.Name == apiService.Name { found = true break } } if !found { if key, err := controller.KeyFunc(apiService); err == nil { r.expectations.APIService.AddExpectedDeletion(r.kvKey, key) err := r.aggregatorclient.Delete(context.Background(), apiService.Name, deleteOptions) if err != nil { r.expectations.APIService.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete apiService %+v: %v", apiService, err) return err } } } } } // remove unused Secrets objects = r.stores.SecretCache.List() for _, obj := range objects { if secret, ok := obj.(*corev1.Secret); ok && secret.DeletionTimestamp == nil { found := false for _, targetSecret := range r.targetStrategy.CertificateSecrets() { if targetSecret.Name == secret.Name { found = true break } } if !found { if key, err := controller.KeyFunc(secret); err == nil { r.expectations.Secrets.AddExpectedDeletion(r.kvKey, key) err := r.clientset.CoreV1().Secrets(secret.Namespace).Delete(context.Background(), secret.Name, deleteOptions) if err != nil { r.expectations.Secrets.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete secret %+v: %v", secret, err) return err } } } } } // remove unused ConfigMaps objects = r.stores.ConfigMapCache.List() for _, obj := range objects { if configMap, ok := obj.(*corev1.ConfigMap); ok && configMap.DeletionTimestamp == nil { found := false for _, targetConfigMap := range r.targetStrategy.ConfigMaps() { if targetConfigMap.Name == configMap.Name { found = true break } } if !found { if key, err := controller.KeyFunc(configMap); err == nil { r.expectations.ConfigMap.AddExpectedDeletion(r.kvKey, key) err := r.clientset.CoreV1().ConfigMaps(configMap.Namespace).Delete(context.Background(), configMap.Name, deleteOptions) if err != nil { r.expectations.ConfigMap.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete configmap %+v: %v", configMap, err) return err } } } } } // remove unused ValidatingAdmissionPolicyBinding objects = r.stores.ValidatingAdmissionPolicyBindingCache.List() for _, obj := range objects { if validatingAdmissionPolicyBinding, ok := obj.(*admissionregistrationv1.ValidatingAdmissionPolicyBinding); ok && validatingAdmissionPolicyBinding.DeletionTimestamp == nil { found := false for _, targetValidatingAdmissionPolicyBinding := range r.targetStrategy.ValidatingAdmissionPolicyBindings() { if targetValidatingAdmissionPolicyBinding.Name == validatingAdmissionPolicyBinding.Name { found = true break } } if !found { if key, err := controller.KeyFunc(validatingAdmissionPolicyBinding); err == nil { r.expectations.ValidatingAdmissionPolicyBinding.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AdmissionregistrationV1().ValidatingAdmissionPolicyBindings().Delete(context.Background(), validatingAdmissionPolicyBinding.Name, deleteOptions) if err != nil { r.expectations.ValidatingAdmissionPolicyBinding.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete validatingAdmissionPolicyBinding %+v: %v", validatingAdmissionPolicyBinding, err) return err } } } } } // remove unused ValidatingAdmissionPolicy objects = r.stores.ValidatingAdmissionPolicyCache.List() for _, obj := range objects { if validatingAdmissionPolicy, ok := obj.(*admissionregistrationv1.ValidatingAdmissionPolicy); ok && validatingAdmissionPolicy.DeletionTimestamp == nil { found := false for _, targetValidatingAdmissionPolicy := range r.targetStrategy.ValidatingAdmissionPolicies() { if targetValidatingAdmissionPolicy.Name == validatingAdmissionPolicy.Name { found = true break } } if !found { if key, err := controller.KeyFunc(validatingAdmissionPolicy); err == nil { r.expectations.ValidatingAdmissionPolicy.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AdmissionregistrationV1().ValidatingAdmissionPolicies().Delete(context.Background(), validatingAdmissionPolicy.Name, deleteOptions) if err != nil { r.expectations.ValidatingAdmissionPolicy.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete validatingAdmissionPolicy %+v: %v", validatingAdmissionPolicy, err) return err } } } } } // remove unused crds objects = r.stores.OperatorCrdCache.List() for _, obj := range objects { if crd, ok := obj.(*extv1.CustomResourceDefinition); ok && crd.DeletionTimestamp == nil { found := false for _, targetCrd := range r.targetStrategy.CRDs() { if targetCrd.Name == crd.Name { found = true break } } if !found { if key, err := controller.KeyFunc(crd); err == nil { r.expectations.OperatorCrd.AddExpectedDeletion(r.kvKey, key) err := client.ApiextensionsV1().CustomResourceDefinitions().Delete(context.Background(), crd.Name, deleteOptions) if err != nil { r.expectations.OperatorCrd.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete crd %+v: %v", crd, err) return err } } } } } // remove unused daemonsets objects = r.stores.DaemonSetCache.List() for _, obj := range objects { if ds, ok := obj.(*appsv1.DaemonSet); ok && ds.DeletionTimestamp == nil { found := false for _, targetDs := range r.targetStrategy.DaemonSets() { if targetDs.Name == ds.Name && targetDs.Namespace == ds.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(ds); err == nil { r.expectations.DaemonSet.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AppsV1().DaemonSets(ds.Namespace).Delete(context.Background(), ds.Name, deleteOptions) if err != nil { r.expectations.DaemonSet.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete daemonset: %v", err) return err } } } } } // remove unused deployments objects = r.stores.DeploymentCache.List() for _, obj := range objects { if deployment, ok := obj.(*appsv1.Deployment); ok && deployment.DeletionTimestamp == nil { found := false for _, targetDeployment := range r.targetStrategy.Deployments() { if targetDeployment.Name == deployment.Name && targetDeployment.Namespace == deployment.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(deployment); err == nil { r.expectations.Deployment.AddExpectedDeletion(r.kvKey, key) err := r.clientset.AppsV1().Deployments(deployment.Namespace).Delete(context.Background(), deployment.Name, deleteOptions) if err != nil { r.expectations.Deployment.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete deployment: %v", err) return err } } } } } // remove unused services objects = r.stores.ServiceCache.List() for _, obj := range objects { if svc, ok := obj.(*corev1.Service); ok && svc.DeletionTimestamp == nil { found := false for _, targetSvc := range r.targetStrategy.Services() { if targetSvc.Name == svc.Name && targetSvc.Namespace == svc.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(svc); err == nil { r.expectations.Service.AddExpectedDeletion(r.kvKey, key) err := r.clientset.CoreV1().Services(svc.Namespace).Delete(context.Background(), svc.Name, deleteOptions) if err != nil { r.expectations.Service.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete service %+v: %v", svc, err) return err } } } } } // remove unused clusterrolebindings objects = r.stores.ClusterRoleBindingCache.List() for _, obj := range objects { if crb, ok := obj.(*rbacv1.ClusterRoleBinding); ok && crb.DeletionTimestamp == nil { found := false for _, targetCrb := range r.targetStrategy.ClusterRoleBindings() { if targetCrb.Name == crb.Name && targetCrb.Namespace == crb.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(crb); err == nil { r.expectations.ClusterRoleBinding.AddExpectedDeletion(r.kvKey, key) err := r.clientset.RbacV1().ClusterRoleBindings().Delete(context.Background(), crb.Name, deleteOptions) if err != nil { r.expectations.ClusterRoleBinding.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete crb %+v: %v", crb, err) return err } } } } } // remove unused clusterroles objects = r.stores.ClusterRoleCache.List() for _, obj := range objects { if cr, ok := obj.(*rbacv1.ClusterRole); ok && cr.DeletionTimestamp == nil { found := false for _, targetCr := range r.targetStrategy.ClusterRoles() { if targetCr.Name == cr.Name && targetCr.Namespace == cr.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(cr); err == nil { r.expectations.ClusterRole.AddExpectedDeletion(r.kvKey, key) err := r.clientset.RbacV1().ClusterRoles().Delete(context.Background(), cr.Name, deleteOptions) if err != nil { r.expectations.ClusterRole.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete cr %+v: %v", cr, err) return err } } } } } // remove unused rolebindings objects = r.stores.RoleBindingCache.List() for _, obj := range objects { if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.DeletionTimestamp == nil { found := false for _, targetRb := range r.targetStrategy.RoleBindings() { if targetRb.Name == rb.Name && targetRb.Namespace == rb.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(rb); err == nil { r.expectations.RoleBinding.AddExpectedDeletion(r.kvKey, key) err := r.clientset.RbacV1().RoleBindings(rb.Namespace).Delete(context.Background(), rb.Name, deleteOptions) if err != nil { r.expectations.RoleBinding.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete rb %+v: %v", rb, err) return err } } } } } // remove unused roles objects = r.stores.RoleCache.List() for _, obj := range objects { if role, ok := obj.(*rbacv1.Role); ok && role.DeletionTimestamp == nil { found := false for _, targetR := range r.targetStrategy.Roles() { if targetR.Name == role.Name && targetR.Namespace == role.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(role); err == nil { r.expectations.Role.AddExpectedDeletion(r.kvKey, key) err := r.clientset.RbacV1().Roles(role.Namespace).Delete(context.Background(), role.Name, deleteOptions) if err != nil { r.expectations.Role.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete role %+v: %v", role, err) return err } } } } } // remove unused serviceaccounts objects = r.stores.ServiceAccountCache.List() for _, obj := range objects { if sa, ok := obj.(*corev1.ServiceAccount); ok && sa.DeletionTimestamp == nil { found := false for _, targetSa := range r.targetStrategy.ServiceAccounts() { if targetSa.Name == sa.Name && targetSa.Namespace == sa.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(sa); err == nil { r.expectations.ServiceAccount.AddExpectedDeletion(r.kvKey, key) err := r.clientset.CoreV1().ServiceAccounts(sa.Namespace).Delete(context.Background(), sa.Name, deleteOptions) if err != nil { r.expectations.ServiceAccount.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete serviceaccount %+v: %v", sa, err) return err } } } } } // remove unused sccs objects = r.stores.SCCCache.List() for _, obj := range objects { if scc, ok := obj.(*secv1.SecurityContextConstraints); ok && scc.DeletionTimestamp == nil { // informer watches all SCC objects, it cannot be changed because of kubevirt updates if !util.IsManagedByOperator(scc.GetLabels()) { continue } found := false for _, targetScc := range r.targetStrategy.SCCs() { if targetScc.Name == scc.Name { found = true break } } if !found { if key, err := controller.KeyFunc(scc); err == nil { r.expectations.SCC.AddExpectedDeletion(r.kvKey, key) err := r.clientset.SecClient().SecurityContextConstraints().Delete(context.Background(), scc.Name, deleteOptions) if err != nil { r.expectations.SCC.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete SecurityContextConstraints %+v: %v", scc, err) return err } } } } } // remove unused prometheus rules objects = r.stores.PrometheusRuleCache.List() for _, obj := range objects { if cachePromRule, ok := obj.(*promv1.PrometheusRule); ok && cachePromRule.DeletionTimestamp == nil { found := false for _, targetPromRule := range r.targetStrategy.PrometheusRules() { if targetPromRule.Name == cachePromRule.Name && targetPromRule.Namespace == cachePromRule.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(cachePromRule); err == nil { r.expectations.PrometheusRule.AddExpectedDeletion(r.kvKey, key) err := r.clientset.PrometheusClient(). MonitoringV1(). PrometheusRules(cachePromRule.Namespace). Delete(context.Background(), cachePromRule.Name, deleteOptions) if err != nil { r.expectations.PrometheusRule.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete prometheusrule %+v: %v", cachePromRule, err) return err } } } } } // remove unused prometheus serviceMonitor obejcts objects = r.stores.ServiceMonitorCache.List() for _, obj := range objects { if cacheServiceMonitor, ok := obj.(*promv1.ServiceMonitor); ok && cacheServiceMonitor.DeletionTimestamp == nil { found := false for _, targetServiceMonitor := range r.targetStrategy.ServiceMonitors() { if targetServiceMonitor.Name == cacheServiceMonitor.Name && targetServiceMonitor.Namespace == cacheServiceMonitor.Namespace { found = true break } } if !found { if key, err := controller.KeyFunc(cacheServiceMonitor); err == nil { r.expectations.ServiceMonitor.AddExpectedDeletion(r.kvKey, key) err := r.clientset.PrometheusClient(). MonitoringV1(). ServiceMonitors(cacheServiceMonitor.Namespace). Delete(context.Background(), cacheServiceMonitor.Name, deleteOptions) if err != nil { r.expectations.ServiceMonitor.DeletionObserved(r.kvKey, key) log.Log.Errorf("Failed to delete prometheusServiceMonitor %+v: %v", cacheServiceMonitor, err) return err } } } } } managedByVirtOperatorLabelSet := labels.Set{ v1.AppComponentLabel: GetAppComponent(r.kv), v1.ManagedByLabel: v1.ManagedByLabelOperatorValue, } // remove unused instancetype objects instancetypes, err := r.clientset.VirtualMachineClusterInstancetype().List(context.Background(), metav1.ListOptions{LabelSelector: managedByVirtOperatorLabelSet.String()}) if err != nil { log.Log.Errorf("Failed to get instancetypes: %v", err) } for _, instancetype := range instancetypes.Items { if instancetype.DeletionTimestamp == nil { found := false for _, targetInstancetype := range r.targetStrategy.Instancetypes() { if targetInstancetype.Name == instancetype.Name { found = true break } } if !found { if err := r.clientset.VirtualMachineClusterInstancetype().Delete(context.Background(), instancetype.Name, metav1.DeleteOptions{}); err != nil { log.Log.Errorf("Failed to delete instancetype %+v: %v", instancetype, err) return err } } } } // remove unused preference objects preferences, err := r.clientset.VirtualMachineClusterPreference().List(context.Background(), metav1.ListOptions{LabelSelector: managedByVirtOperatorLabelSet.String()}) if err != nil { log.Log.Errorf("Failed to get preferences: %v", err) } for _, preference := range preferences.Items { if preference.DeletionTimestamp == nil { found := false for _, targetPreference := range r.targetStrategy.Preferences() { if targetPreference.Name == preference.Name { found = true break } } if !found { if err := r.clientset.VirtualMachineClusterPreference().Delete(context.Background(), preference.Name, metav1.DeleteOptions{}); err != nil { log.Log.Errorf("Failed to delete preference %+v: %v", preference, err) return err } } } } return nil } func (r *Reconciler) isFeatureGateEnabled(featureGate string) bool { if r.kv.Spec.Configuration.DeveloperConfiguration == nil { return false } for _, fg := range r.kv.Spec.Configuration.DeveloperConfiguration.FeatureGates { if fg == featureGate { return true } } return false } func (r *Reconciler) exportProxyEnabled() bool { return r.isFeatureGateEnabled(featuregate.VMExportGate) } func (r *Reconciler) commonInstancetypesDeploymentEnabled() bool { config := r.kv.Spec.Configuration.CommonInstancetypesDeployment if config != nil && config.Enabled != nil { return *config.Enabled } return true } func getInstallStrategyAnnotations(meta *metav1.ObjectMeta) (imageTag, imageRegistry, id string, ok bool) { var exists bool imageTag, exists = meta.Annotations[v1.InstallStrategyVersionAnnotation] if !exists { ok = false } imageRegistry, exists = meta.Annotations[v1.InstallStrategyRegistryAnnotation] if !exists { ok = false } id, exists = meta.Annotations[v1.InstallStrategyIdentifierAnnotation] if !exists { ok = false } return }
package apply import ( "context" "fmt" routev1 "github.com/openshift/api/route/v1" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/apimachinery/patch" "kubevirt.io/kubevirt/pkg/controller" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" ) func (r *Reconciler) createOrUpdateRoutes(caBundle []byte) error { if !r.config.IsOnOpenshift { return nil } for _, route := range r.targetStrategy.Routes() { switch route.Name { case components.VirtExportProxyName: return r.syncExportProxyRoute(route.DeepCopy(), caBundle) default: return fmt.Errorf("unknown route %s", route.Name) } } return nil } func (r *Reconciler) syncExportProxyRoute(route *routev1.Route, caBundle []byte) error { if !r.exportProxyEnabled() { return r.deleteRoute(route) } return r.syncRoute(route, caBundle) } func (r *Reconciler) syncRoute(route *routev1.Route, caBundle []byte) error { version, imageRegistry, id := getTargetVersionRegistryID(r.kv) injectOperatorMetadata(r.kv, &route.ObjectMeta, version, imageRegistry, id, true) route.Spec.TLS.DestinationCACertificate = string(caBundle) var cachedRoute *routev1.Route obj, exists, err := r.stores.RouteCache.Get(route) if err != nil { return err } if !exists { r.expectations.Route.RaiseExpectations(r.kvKey, 1, 0) _, err := r.clientset.RouteClient().Routes(route.Namespace).Create(context.Background(), route, metav1.CreateOptions{}) if err != nil { r.expectations.Route.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create route %+v: %v", route, err) } return nil } cachedRoute = obj.(*routev1.Route).DeepCopy() modified := resourcemerge.BoolPtr(false) resourcemerge.EnsureObjectMeta(modified, &cachedRoute.ObjectMeta, route.ObjectMeta) kindSame := equality.Semantic.DeepEqual(cachedRoute.Spec.To.Kind, route.Spec.To.Kind) nameSame := equality.Semantic.DeepEqual(cachedRoute.Spec.To.Name, route.Spec.To.Name) terminationSame := equality.Semantic.DeepEqual(cachedRoute.Spec.TLS.Termination, route.Spec.TLS.Termination) certSame := equality.Semantic.DeepEqual(cachedRoute.Spec.TLS.DestinationCACertificate, route.Spec.TLS.DestinationCACertificate) if !*modified && kindSame && nameSame && terminationSame && certSame { log.Log.V(4).Infof("route %v is up-to-date", route.GetName()) return nil } patchBytes, err := patch.New(getPatchWithObjectMetaAndSpec([]patch.PatchOption{}, &route.ObjectMeta, route.Spec)...).GeneratePayload() if err != nil { return err } _, err = r.clientset.RouteClient().Routes(route.Namespace).Patch(context.Background(), route.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch route %+v: %v", route, err) } log.Log.V(4).Infof("route %v updated", route.GetName()) return nil } func (r *Reconciler) deleteRoute(route *routev1.Route) error { obj, exists, err := r.stores.RouteCache.Get(route) if err != nil { return err } if !exists || obj.(*routev1.Route).DeletionTimestamp != nil { return nil } key, err := controller.KeyFunc(route) if err != nil { return err } r.expectations.Route.AddExpectedDeletion(r.kvKey, key) if err := r.clientset.RouteClient().Routes(route.Namespace).Delete(context.Background(), route.Name, metav1.DeleteOptions{}); err != nil { r.expectations.Route.DeletionObserved(r.kvKey, key) return err } return nil }
package apply import ( "context" "encoding/json" "fmt" secv1 "github.com/openshift/api/security/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/rbac" ) func (r *Reconciler) createOrUpdateSCC() error { sec := r.clientset.SecClient() if !r.config.IsOnOpenshift { return nil } version, imageRegistry, id := getTargetVersionRegistryID(r.kv) for _, scc := range r.targetStrategy.SCCs() { var cachedSCC *secv1.SecurityContextConstraints scc := scc.DeepCopy() obj, exists, _ := r.stores.SCCCache.GetByKey(scc.Name) if exists { cachedSCC = obj.(*secv1.SecurityContextConstraints) } injectOperatorMetadata(r.kv, &scc.ObjectMeta, version, imageRegistry, id, true) if !exists { r.expectations.SCC.RaiseExpectations(r.kvKey, 1, 0) _, err := sec.SecurityContextConstraints().Create(context.Background(), scc, metav1.CreateOptions{}) if err != nil { r.expectations.SCC.LowerExpectations(r.kvKey, 1, 0) return fmt.Errorf("unable to create SCC %+v: %v", scc, err) } log.Log.V(2).Infof("SCC %v created", scc.Name) } else if !objectMatchesVersion(&cachedSCC.ObjectMeta, version, imageRegistry, id, r.kv.GetGeneration()) { scc.ObjectMeta = *cachedSCC.ObjectMeta.DeepCopy() injectOperatorMetadata(r.kv, &scc.ObjectMeta, version, imageRegistry, id, true) _, err := sec.SecurityContextConstraints().Update(context.Background(), scc, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("Unable to update %s SecurityContextConstraints", scc.Name) } log.Log.V(2).Infof("SecurityContextConstraints %s updated", scc.Name) } else { log.Log.V(4).Infof("SCC %s is up to date", scc.Name) } } return nil } func (r *Reconciler) removeKvServiceAccountsFromDefaultSCC(targetNamespace string) error { var remainedUsersList []string SCCObj, exists, err := r.stores.SCCCache.GetByKey("privileged") if err != nil { return err } else if !exists { return nil } SCC, ok := SCCObj.(*secv1.SecurityContextConstraints) if !ok { return fmt.Errorf("couldn't cast object to SecurityContextConstraints: %+v", SCCObj) } modified := false kvServiceAccounts := rbac.GetKubevirtComponentsServiceAccounts(targetNamespace) for _, acc := range SCC.Users { if _, ok := kvServiceAccounts[acc]; !ok { remainedUsersList = append(remainedUsersList, acc) } else { modified = true } } if modified { oldUserBytes, err := json.Marshal(SCC.Users) if err != nil { return err } userBytes, err := json.Marshal(remainedUsersList) if err != nil { return err } test := fmt.Sprintf(`{ "op": "test", "path": "/users", "value": %s }`, string(oldUserBytes)) patch := fmt.Sprintf(`{ "op": "replace", "path": "/users", "value": %s }`, string(userBytes)) _, err = r.clientset.SecClient().SecurityContextConstraints().Patch(context.Background(), "privileged", types.JSONPatchType, []byte(fmt.Sprintf("[ %s, %s ]", test, patch)), metav1.PatchOptions{}) if err != nil { return fmt.Errorf("unable to patch scc: %v", err) } } return nil }
package apply func (r *Reconciler) updateKubeVirtSystem(controllerDeploymentsRolledOver bool) (bool, error) { // UPDATE PATH IS // 1. daemonsets - ensures all compute nodes are updated to handle new features // 2. wait for daemonsets to roll over // 3. controllers - ensures control plane is ready for new features // 4. wait for controllers to roll over // 5. apiserver - toggles on new features. // create/update Daemonsets for _, daemonSet := range r.targetStrategy.DaemonSets() { finished, err := r.syncDaemonSet(daemonSet) if !finished || err != nil { return false, err } } // create/update Controller Deployments for _, deployment := range r.targetStrategy.ControllerDeployments() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } // wait for controllers if !controllerDeploymentsRolledOver { // not rolled out yet return false, nil } // create/update ExportProxy Deployments for _, deployment := range r.targetStrategy.ExportProxyDeployments() { if r.exportProxyEnabled() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } else if err := r.deleteDeployment(deployment); err != nil { return false, err } } // create/update API Deployments for _, deployment := range r.targetStrategy.ApiDeployments() { deployment, err := r.syncDeployment(deployment) if err != nil { return false, err } err = r.syncPodDisruptionBudgetForDeployment(deployment) if err != nil { return false, err } } return true, nil }
// Automatically generated by MockGen. DO NOT EDIT! // Source: strategy.go package install import ( context "context" gomock "github.com/golang/mock/gomock" v1 "github.com/openshift/api/route/v1" v10 "github.com/openshift/api/security/v1" v11 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" v12 "k8s.io/api/admissionregistration/v1" v13 "k8s.io/api/apps/v1" v14 "k8s.io/api/core/v1" v15 "k8s.io/api/rbac/v1" v16 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" v17 "k8s.io/apimachinery/pkg/apis/meta/v1" types "k8s.io/apimachinery/pkg/types" v18 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" v1beta1 "kubevirt.io/api/instancetype/v1beta1" ) // Mock of APIServiceInterface interface type MockAPIServiceInterface struct { ctrl *gomock.Controller recorder *_MockAPIServiceInterfaceRecorder } // Recorder for MockAPIServiceInterface (not exported) type _MockAPIServiceInterfaceRecorder struct { mock *MockAPIServiceInterface } func NewMockAPIServiceInterface(ctrl *gomock.Controller) *MockAPIServiceInterface { mock := &MockAPIServiceInterface{ctrl: ctrl} mock.recorder = &_MockAPIServiceInterfaceRecorder{mock} return mock } func (_m *MockAPIServiceInterface) EXPECT() *_MockAPIServiceInterfaceRecorder { return _m.recorder } func (_m *MockAPIServiceInterface) Get(ctx context.Context, name string, options v17.GetOptions) (*v18.APIService, error) { ret := _m.ctrl.Call(_m, "Get", ctx, name, options) ret0, _ := ret[0].(*v18.APIService) ret1, _ := ret[1].(error) return ret0, ret1 } func (_mr *_MockAPIServiceInterfaceRecorder) Get(arg0, arg1, arg2 interface{}) *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Get", arg0, arg1, arg2) } func (_m *MockAPIServiceInterface) Create(ctx context.Context, apiService *v18.APIService, opts v17.CreateOptions) (*v18.APIService, error) { ret := _m.ctrl.Call(_m, "Create", ctx, apiService, opts) ret0, _ := ret[0].(*v18.APIService) ret1, _ := ret[1].(error) return ret0, ret1 } func (_mr *_MockAPIServiceInterfaceRecorder) Create(arg0, arg1, arg2 interface{}) *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Create", arg0, arg1, arg2) } func (_m *MockAPIServiceInterface) Delete(ctx context.Context, name string, options v17.DeleteOptions) error { ret := _m.ctrl.Call(_m, "Delete", ctx, name, options) ret0, _ := ret[0].(error) return ret0 } func (_mr *_MockAPIServiceInterfaceRecorder) Delete(arg0, arg1, arg2 interface{}) *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Delete", arg0, arg1, arg2) } func (_m *MockAPIServiceInterface) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v17.PatchOptions, subresources ...string) (*v18.APIService, error) { _s := []interface{}{ctx, name, pt, data, opts} for _, _x := range subresources { _s = append(_s, _x) } ret := _m.ctrl.Call(_m, "Patch", _s...) ret0, _ := ret[0].(*v18.APIService) ret1, _ := ret[1].(error) return ret0, ret1 } func (_mr *_MockAPIServiceInterfaceRecorder) Patch(arg0, arg1, arg2, arg3, arg4 interface{}, arg5 ...interface{}) *gomock.Call { _s := append([]interface{}{arg0, arg1, arg2, arg3, arg4}, arg5...) return _mr.mock.ctrl.RecordCall(_mr.mock, "Patch", _s...) } // Mock of StrategyInterface interface type MockStrategyInterface struct { ctrl *gomock.Controller recorder *_MockStrategyInterfaceRecorder } // Recorder for MockStrategyInterface (not exported) type _MockStrategyInterfaceRecorder struct { mock *MockStrategyInterface } func NewMockStrategyInterface(ctrl *gomock.Controller) *MockStrategyInterface { mock := &MockStrategyInterface{ctrl: ctrl} mock.recorder = &_MockStrategyInterfaceRecorder{mock} return mock } func (_m *MockStrategyInterface) EXPECT() *_MockStrategyInterfaceRecorder { return _m.recorder } func (_m *MockStrategyInterface) ServiceAccounts() []*v14.ServiceAccount { ret := _m.ctrl.Call(_m, "ServiceAccounts") ret0, _ := ret[0].([]*v14.ServiceAccount) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ServiceAccounts() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ServiceAccounts") } func (_m *MockStrategyInterface) ClusterRoles() []*v15.ClusterRole { ret := _m.ctrl.Call(_m, "ClusterRoles") ret0, _ := ret[0].([]*v15.ClusterRole) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ClusterRoles() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ClusterRoles") } func (_m *MockStrategyInterface) ClusterRoleBindings() []*v15.ClusterRoleBinding { ret := _m.ctrl.Call(_m, "ClusterRoleBindings") ret0, _ := ret[0].([]*v15.ClusterRoleBinding) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ClusterRoleBindings() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ClusterRoleBindings") } func (_m *MockStrategyInterface) Roles() []*v15.Role { ret := _m.ctrl.Call(_m, "Roles") ret0, _ := ret[0].([]*v15.Role) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Roles() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Roles") } func (_m *MockStrategyInterface) RoleBindings() []*v15.RoleBinding { ret := _m.ctrl.Call(_m, "RoleBindings") ret0, _ := ret[0].([]*v15.RoleBinding) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) RoleBindings() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "RoleBindings") } func (_m *MockStrategyInterface) Services() []*v14.Service { ret := _m.ctrl.Call(_m, "Services") ret0, _ := ret[0].([]*v14.Service) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Services() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Services") } func (_m *MockStrategyInterface) Deployments() []*v13.Deployment { ret := _m.ctrl.Call(_m, "Deployments") ret0, _ := ret[0].([]*v13.Deployment) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Deployments() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Deployments") } func (_m *MockStrategyInterface) ApiDeployments() []*v13.Deployment { ret := _m.ctrl.Call(_m, "ApiDeployments") ret0, _ := ret[0].([]*v13.Deployment) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ApiDeployments() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ApiDeployments") } func (_m *MockStrategyInterface) ControllerDeployments() []*v13.Deployment { ret := _m.ctrl.Call(_m, "ControllerDeployments") ret0, _ := ret[0].([]*v13.Deployment) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ControllerDeployments() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ControllerDeployments") } func (_m *MockStrategyInterface) ExportProxyDeployments() []*v13.Deployment { ret := _m.ctrl.Call(_m, "ExportProxyDeployments") ret0, _ := ret[0].([]*v13.Deployment) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ExportProxyDeployments() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ExportProxyDeployments") } func (_m *MockStrategyInterface) DaemonSets() []*v13.DaemonSet { ret := _m.ctrl.Call(_m, "DaemonSets") ret0, _ := ret[0].([]*v13.DaemonSet) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) DaemonSets() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "DaemonSets") } func (_m *MockStrategyInterface) ValidatingWebhookConfigurations() []*v12.ValidatingWebhookConfiguration { ret := _m.ctrl.Call(_m, "ValidatingWebhookConfigurations") ret0, _ := ret[0].([]*v12.ValidatingWebhookConfiguration) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ValidatingWebhookConfigurations() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ValidatingWebhookConfigurations") } func (_m *MockStrategyInterface) MutatingWebhookConfigurations() []*v12.MutatingWebhookConfiguration { ret := _m.ctrl.Call(_m, "MutatingWebhookConfigurations") ret0, _ := ret[0].([]*v12.MutatingWebhookConfiguration) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) MutatingWebhookConfigurations() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "MutatingWebhookConfigurations") } func (_m *MockStrategyInterface) APIServices() []*v18.APIService { ret := _m.ctrl.Call(_m, "APIServices") ret0, _ := ret[0].([]*v18.APIService) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) APIServices() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "APIServices") } func (_m *MockStrategyInterface) CertificateSecrets() []*v14.Secret { ret := _m.ctrl.Call(_m, "CertificateSecrets") ret0, _ := ret[0].([]*v14.Secret) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) CertificateSecrets() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "CertificateSecrets") } func (_m *MockStrategyInterface) SCCs() []*v10.SecurityContextConstraints { ret := _m.ctrl.Call(_m, "SCCs") ret0, _ := ret[0].([]*v10.SecurityContextConstraints) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) SCCs() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "SCCs") } func (_m *MockStrategyInterface) ServiceMonitors() []*v11.ServiceMonitor { ret := _m.ctrl.Call(_m, "ServiceMonitors") ret0, _ := ret[0].([]*v11.ServiceMonitor) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ServiceMonitors() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ServiceMonitors") } func (_m *MockStrategyInterface) PrometheusRules() []*v11.PrometheusRule { ret := _m.ctrl.Call(_m, "PrometheusRules") ret0, _ := ret[0].([]*v11.PrometheusRule) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) PrometheusRules() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "PrometheusRules") } func (_m *MockStrategyInterface) ConfigMaps() []*v14.ConfigMap { ret := _m.ctrl.Call(_m, "ConfigMaps") ret0, _ := ret[0].([]*v14.ConfigMap) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ConfigMaps() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ConfigMaps") } func (_m *MockStrategyInterface) CRDs() []*v16.CustomResourceDefinition { ret := _m.ctrl.Call(_m, "CRDs") ret0, _ := ret[0].([]*v16.CustomResourceDefinition) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) CRDs() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "CRDs") } func (_m *MockStrategyInterface) Routes() []*v1.Route { ret := _m.ctrl.Call(_m, "Routes") ret0, _ := ret[0].([]*v1.Route) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Routes() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Routes") } func (_m *MockStrategyInterface) Instancetypes() []*v1beta1.VirtualMachineClusterInstancetype { ret := _m.ctrl.Call(_m, "Instancetypes") ret0, _ := ret[0].([]*v1beta1.VirtualMachineClusterInstancetype) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Instancetypes() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Instancetypes") } func (_m *MockStrategyInterface) Preferences() []*v1beta1.VirtualMachineClusterPreference { ret := _m.ctrl.Call(_m, "Preferences") ret0, _ := ret[0].([]*v1beta1.VirtualMachineClusterPreference) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) Preferences() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "Preferences") } func (_m *MockStrategyInterface) ValidatingAdmissionPolicyBindings() []*v12.ValidatingAdmissionPolicyBinding { ret := _m.ctrl.Call(_m, "ValidatingAdmissionPolicyBindings") ret0, _ := ret[0].([]*v12.ValidatingAdmissionPolicyBinding) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ValidatingAdmissionPolicyBindings() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ValidatingAdmissionPolicyBindings") } func (_m *MockStrategyInterface) ValidatingAdmissionPolicies() []*v12.ValidatingAdmissionPolicy { ret := _m.ctrl.Call(_m, "ValidatingAdmissionPolicies") ret0, _ := ret[0].([]*v12.ValidatingAdmissionPolicy) return ret0 } func (_mr *_MockStrategyInterfaceRecorder) ValidatingAdmissionPolicies() *gomock.Call { return _mr.mock.ctrl.RecordCall(_mr.mock, "ValidatingAdmissionPolicies") }
package install /* * This file is part of the KubeVirt project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2019 Red Hat, Inc. * */ import ( "bufio" "bytes" "compress/gzip" "context" "encoding/base64" "fmt" "io" "strings" "github.com/golang/glog" routev1 "github.com/openshift/api/route/v1" secv1 "github.com/openshift/api/security/v1" promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" ext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" extv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" k8coresv1 "k8s.io/client-go/kubernetes/typed/core/v1" apiregv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" "sigs.k8s.io/yaml" v1 "kubevirt.io/api/core/v1" instancetypev1beta1 "kubevirt.io/api/instancetype/v1beta1" "kubevirt.io/client-go/kubecli" "kubevirt.io/client-go/log" "kubevirt.io/kubevirt/pkg/monitoring/rules" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/rbac" operatorutil "kubevirt.io/kubevirt/pkg/virt-operator/util" marshalutil "kubevirt.io/kubevirt/tools/util" ) const ManifestsEncodingGzipBase64 = "gzip+base64" //go:generate mockgen -source $GOFILE -imports "libvirt=libvirt.org/go/libvirt" -package=$GOPACKAGE -destination=generated_mock_$GOFILE type APIServiceInterface interface { Get(ctx context.Context, name string, options metav1.GetOptions) (*apiregv1.APIService, error) Create(ctx context.Context, apiService *apiregv1.APIService, opts metav1.CreateOptions) (*apiregv1.APIService, error) Delete(ctx context.Context, name string, options metav1.DeleteOptions) error Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *apiregv1.APIService, err error) } type StrategyInterface interface { ServiceAccounts() []*corev1.ServiceAccount ClusterRoles() []*rbacv1.ClusterRole ClusterRoleBindings() []*rbacv1.ClusterRoleBinding Roles() []*rbacv1.Role RoleBindings() []*rbacv1.RoleBinding Services() []*corev1.Service Deployments() []*appsv1.Deployment ApiDeployments() []*appsv1.Deployment ControllerDeployments() []*appsv1.Deployment ExportProxyDeployments() []*appsv1.Deployment DaemonSets() []*appsv1.DaemonSet ValidatingWebhookConfigurations() []*admissionregistrationv1.ValidatingWebhookConfiguration MutatingWebhookConfigurations() []*admissionregistrationv1.MutatingWebhookConfiguration APIServices() []*apiregv1.APIService CertificateSecrets() []*corev1.Secret SCCs() []*secv1.SecurityContextConstraints ServiceMonitors() []*promv1.ServiceMonitor PrometheusRules() []*promv1.PrometheusRule ConfigMaps() []*corev1.ConfigMap CRDs() []*extv1.CustomResourceDefinition Routes() []*routev1.Route Instancetypes() []*instancetypev1beta1.VirtualMachineClusterInstancetype Preferences() []*instancetypev1beta1.VirtualMachineClusterPreference ValidatingAdmissionPolicyBindings() []*admissionregistrationv1.ValidatingAdmissionPolicyBinding ValidatingAdmissionPolicies() []*admissionregistrationv1.ValidatingAdmissionPolicy } type Strategy struct { serviceAccounts []*corev1.ServiceAccount clusterRoles []*rbacv1.ClusterRole clusterRoleBindings []*rbacv1.ClusterRoleBinding roles []*rbacv1.Role roleBindings []*rbacv1.RoleBinding crds []*extv1.CustomResourceDefinition services []*corev1.Service deployments []*appsv1.Deployment daemonSets []*appsv1.DaemonSet validatingWebhookConfigurations []*admissionregistrationv1.ValidatingWebhookConfiguration mutatingWebhookConfigurations []*admissionregistrationv1.MutatingWebhookConfiguration apiServices []*apiregv1.APIService certificateSecrets []*corev1.Secret sccs []*secv1.SecurityContextConstraints serviceMonitors []*promv1.ServiceMonitor prometheusRules []*promv1.PrometheusRule configMaps []*corev1.ConfigMap routes []*routev1.Route instancetypes []*instancetypev1beta1.VirtualMachineClusterInstancetype preferences []*instancetypev1beta1.VirtualMachineClusterPreference validatingAdmissionPolicyBindings []*admissionregistrationv1.ValidatingAdmissionPolicyBinding validatingAdmissionPolicies []*admissionregistrationv1.ValidatingAdmissionPolicy } func (ins *Strategy) ServiceAccounts() []*corev1.ServiceAccount { return ins.serviceAccounts } func (ins *Strategy) ClusterRoles() []*rbacv1.ClusterRole { return ins.clusterRoles } func (ins *Strategy) ClusterRoleBindings() []*rbacv1.ClusterRoleBinding { return ins.clusterRoleBindings } func (ins *Strategy) Roles() []*rbacv1.Role { return ins.roles } func (ins *Strategy) RoleBindings() []*rbacv1.RoleBinding { return ins.roleBindings } func (ins *Strategy) Services() []*corev1.Service { return ins.services } func (ins *Strategy) Deployments() []*appsv1.Deployment { return ins.deployments } func (ins *Strategy) ApiDeployments() []*appsv1.Deployment { var deployments []*appsv1.Deployment for _, deployment := range ins.deployments { if !strings.Contains(deployment.Name, "virt-api") { continue } deployments = append(deployments, deployment) } return deployments } func (ins *Strategy) ControllerDeployments() []*appsv1.Deployment { var deployments []*appsv1.Deployment for _, deployment := range ins.deployments { if !strings.Contains(deployment.Name, "virt-controller") { continue } deployments = append(deployments, deployment) } return deployments } func (ins *Strategy) ExportProxyDeployments() []*appsv1.Deployment { var deployments []*appsv1.Deployment for _, deployment := range ins.deployments { if !strings.Contains(deployment.Name, "virt-exportproxy") { continue } deployments = append(deployments, deployment) } return deployments } func (ins *Strategy) DaemonSets() []*appsv1.DaemonSet { return ins.daemonSets } func (ins *Strategy) ValidatingWebhookConfigurations() []*admissionregistrationv1.ValidatingWebhookConfiguration { return ins.validatingWebhookConfigurations } func (ins *Strategy) MutatingWebhookConfigurations() []*admissionregistrationv1.MutatingWebhookConfiguration { return ins.mutatingWebhookConfigurations } func (ins *Strategy) APIServices() []*apiregv1.APIService { return ins.apiServices } func (ins *Strategy) CertificateSecrets() []*corev1.Secret { return ins.certificateSecrets } func (ins *Strategy) SCCs() []*secv1.SecurityContextConstraints { return ins.sccs } func (ins *Strategy) ServiceMonitors() []*promv1.ServiceMonitor { return ins.serviceMonitors } func (ins *Strategy) PrometheusRules() []*promv1.PrometheusRule { return ins.prometheusRules } func (ins *Strategy) ConfigMaps() []*corev1.ConfigMap { return ins.configMaps } func (ins *Strategy) CRDs() []*extv1.CustomResourceDefinition { return ins.crds } func (ins *Strategy) Routes() []*routev1.Route { return ins.routes } func (ins *Strategy) Instancetypes() []*instancetypev1beta1.VirtualMachineClusterInstancetype { return ins.instancetypes } func (ins *Strategy) Preferences() []*instancetypev1beta1.VirtualMachineClusterPreference { return ins.preferences } func (ins *Strategy) ValidatingAdmissionPolicyBindings() []*admissionregistrationv1.ValidatingAdmissionPolicyBinding { return ins.validatingAdmissionPolicyBindings } func (ins *Strategy) ValidatingAdmissionPolicies() []*admissionregistrationv1.ValidatingAdmissionPolicy { return ins.validatingAdmissionPolicies } func encodeManifests(manifests []byte) (string, error) { var buf bytes.Buffer zw := gzip.NewWriter(&buf) _, err := zw.Write(manifests) if err != nil { return "", err } if err = zw.Close(); err != nil { return "", err } base64Strategy := base64.StdEncoding.EncodeToString(buf.Bytes()) return base64Strategy, nil } func decodeManifests(strategy []byte) (string, error) { var decodedStrategy strings.Builder gzippedStrategy, err := base64.StdEncoding.DecodeString(string(strategy)) if err != nil { return "", err } buf := bytes.NewBuffer(gzippedStrategy) zr, err := gzip.NewReader(buf) if err != nil { return "", err } if _, err := io.Copy(&decodedStrategy, zr); err != nil { return "", err } return decodedStrategy.String(), nil } func NewInstallStrategyConfigMap(config *operatorutil.KubeVirtDeploymentConfig, monitorNamespace string, operatorNamespace string) (*corev1.ConfigMap, error) { strategy, err := GenerateCurrentInstallStrategy(config, monitorNamespace, operatorNamespace) if err != nil { return nil, err } manifests, err := encodeManifests(dumpInstallStrategyToBytes(strategy)) if err != nil { return nil, err } configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "kubevirt-install-strategy-", Namespace: config.GetNamespace(), Labels: map[string]string{ v1.ManagedByLabel: v1.ManagedByLabelOperatorValue, v1.InstallStrategyLabel: "", }, Annotations: map[string]string{ v1.InstallStrategyVersionAnnotation: config.GetKubeVirtVersion(), v1.InstallStrategyRegistryAnnotation: config.GetImageRegistry(), v1.InstallStrategyIdentifierAnnotation: config.GetDeploymentID(), v1.InstallStrategyConfigMapEncoding: ManifestsEncodingGzipBase64, }, }, Data: map[string]string{ "manifests": manifests, }, } return configMap, nil } func getMonitorNamespace(clientset k8coresv1.CoreV1Interface, config *operatorutil.KubeVirtDeploymentConfig) (namespace string, err error) { for _, ns := range config.GetPotentialMonitorNamespaces() { if nsExists, err := isNamespaceExist(clientset, ns); nsExists { // the monitoring service account must be in the monitoring namespace otherwise // we won't be able to create roleBinding for prometheus operator pods if saExists, err := isServiceAccountExist(clientset, ns, config.GetMonitorServiceAccountName()); saExists { return ns, nil } else if err != nil { return "", err } } else if err != nil { return "", err } } return "", nil } func DumpInstallStrategyToConfigMap(clientset kubecli.KubevirtClient, operatorNamespace string) error { config, err := operatorutil.GetConfigFromEnv() if err != nil { return err } monitorNamespace, err := getMonitorNamespace(clientset.CoreV1(), config) if err != nil { return err } configMap, err := NewInstallStrategyConfigMap(config, monitorNamespace, operatorNamespace) if err != nil { return err } _, err = clientset.CoreV1().ConfigMaps(config.GetNamespace()).Create(context.Background(), configMap, metav1.CreateOptions{}) if err != nil { if errors.IsAlreadyExists(err) { // force update if already exists _, err = clientset.CoreV1().ConfigMaps(config.GetNamespace()).Update(context.Background(), configMap, metav1.UpdateOptions{}) if err != nil { return err } } else { return err } } return nil } func dumpInstallStrategyToBytes(strategy *Strategy) []byte { var b bytes.Buffer writer := bufio.NewWriter(&b) for _, entry := range strategy.serviceAccounts { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.clusterRoles { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.clusterRoleBindings { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.roles { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.roleBindings { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.crds { b, _ := yaml.Marshal(entry) writer.Write([]byte("---\n")) writer.Write(b) } for _, entry := range strategy.services { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.certificateSecrets { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.validatingWebhookConfigurations { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.mutatingWebhookConfigurations { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.validatingAdmissionPolicyBindings { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.validatingAdmissionPolicies { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.apiServices { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.deployments { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.daemonSets { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.sccs { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.serviceMonitors { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.prometheusRules { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.configMaps { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.routes { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.instancetypes { marshalutil.MarshallObject(entry, writer) } for _, entry := range strategy.preferences { marshalutil.MarshallObject(entry, writer) } writer.Flush() return b.Bytes() } func GenerateCurrentInstallStrategy(config *operatorutil.KubeVirtDeploymentConfig, monitorNamespace string, operatorNamespace string) (*Strategy, error) { strategy := &Strategy{} functions := []func() (*extv1.CustomResourceDefinition, error){ components.NewVirtualMachineInstanceCrd, components.NewPresetCrd, components.NewReplicaSetCrd, components.NewVirtualMachineCrd, components.NewVirtualMachineInstanceMigrationCrd, components.NewVirtualMachineSnapshotCrd, components.NewVirtualMachineSnapshotContentCrd, components.NewVirtualMachineRestoreCrd, components.NewVirtualMachineInstancetypeCrd, components.NewVirtualMachineClusterInstancetypeCrd, components.NewVirtualMachinePoolCrd, components.NewMigrationPolicyCrd, components.NewVirtualMachinePreferenceCrd, components.NewVirtualMachineClusterPreferenceCrd, components.NewVirtualMachineExportCrd, components.NewVirtualMachineCloneCrd, } for _, f := range functions { crd, err := f() if err != nil { return nil, err } strategy.crds = append(strategy.crds, crd) } rbaclist := make([]runtime.Object, 0) rbaclist = append(rbaclist, rbac.GetAllCluster()...) rbaclist = append(rbaclist, rbac.GetAllApiServer(config.GetNamespace())...) rbaclist = append(rbaclist, rbac.GetAllController(config.GetNamespace())...) rbaclist = append(rbaclist, rbac.GetAllHandler(config.GetNamespace())...) rbaclist = append(rbaclist, rbac.GetAllExportProxy(config.GetNamespace())...) monitorServiceAccount := config.GetMonitorServiceAccountName() isServiceAccountFound := monitorNamespace != "" if isServiceAccountFound { serviceMonitorNamespace := config.GetServiceMonitorNamespace() if serviceMonitorNamespace == "" { serviceMonitorNamespace = monitorNamespace } rbaclist = append(rbaclist, rbac.GetAllServiceMonitor(config.GetNamespace(), monitorNamespace, monitorServiceAccount)...) strategy.serviceMonitors = append(strategy.serviceMonitors, components.NewServiceMonitorCR(config.GetNamespace(), serviceMonitorNamespace, true)) err := rules.SetupRules(config.GetNamespace()) if err != nil { return nil, err } prometheusRule, err := rules.BuildPrometheusRule(config.GetNamespace()) if err != nil { return nil, err } strategy.prometheusRules = append(strategy.prometheusRules, prometheusRule) } else { glog.Warningf("failed to create ServiceMonitor resources because couldn't find ServiceAccount %v in any monitoring namespaces : %v", monitorServiceAccount, strings.Join(config.GetPotentialMonitorNamespaces(), ", ")) } for _, entry := range rbaclist { cr, ok := entry.(*rbacv1.ClusterRole) if ok { strategy.clusterRoles = append(strategy.clusterRoles, cr) } crb, ok := entry.(*rbacv1.ClusterRoleBinding) if ok { strategy.clusterRoleBindings = append(strategy.clusterRoleBindings, crb) } r, ok := entry.(*rbacv1.Role) if ok { strategy.roles = append(strategy.roles, r) } rb, ok := entry.(*rbacv1.RoleBinding) if ok { strategy.roleBindings = append(strategy.roleBindings, rb) } sa, ok := entry.(*corev1.ServiceAccount) if ok { strategy.serviceAccounts = append(strategy.serviceAccounts, sa) } } var productName string var productVersion string var productComponent string invalidLabelPatternErrorMessage := "invalid %s: labels must be 63 characters or less, begin and end with alphanumeric characters, and contain only dot, hyphen or dash" if operatorutil.IsValidLabel(config.GetProductName()) { productName = config.GetProductName() } else { log.Log.Errorf(fmt.Sprintf(invalidLabelPatternErrorMessage, "kubevirt.spec.productName")) } if operatorutil.IsValidLabel(config.GetProductVersion()) { productVersion = config.GetProductVersion() } else { log.Log.Errorf(fmt.Sprintf(invalidLabelPatternErrorMessage, "kubevirt.spec.productVersion")) } if operatorutil.IsValidLabel(config.GetProductComponent()) { productComponent = config.GetProductComponent() } else { log.Log.Errorf(fmt.Sprintf(invalidLabelPatternErrorMessage, "kubevirt.spec.productComponent")) } strategy.validatingWebhookConfigurations = append(strategy.validatingWebhookConfigurations, components.NewOpertorValidatingWebhookConfiguration(operatorNamespace)) strategy.validatingWebhookConfigurations = append(strategy.validatingWebhookConfigurations, components.NewVirtAPIValidatingWebhookConfiguration(config.GetNamespace())) strategy.mutatingWebhookConfigurations = append(strategy.mutatingWebhookConfigurations, components.NewVirtAPIMutatingWebhookConfiguration(config.GetNamespace())) strategy.services = append(strategy.services, components.NewPrometheusService(config.GetNamespace())) strategy.services = append(strategy.services, components.NewApiServerService(config.GetNamespace())) strategy.services = append(strategy.services, components.NewOperatorWebhookService(operatorNamespace)) strategy.services = append(strategy.services, components.NewExportProxyService(config.GetNamespace())) apiDeployment := components.NewApiServerDeployment(config.GetNamespace(), config.GetImageRegistry(), config.GetImagePrefix(), config.GetApiVersion(), productName, productVersion, productComponent, config.VirtApiImage, config.GetImagePullPolicy(), config.GetImagePullSecrets(), config.GetVerbosity(), config.GetExtraEnv()) strategy.deployments = append(strategy.deployments, apiDeployment) controller := components.NewControllerDeployment(config.GetNamespace(), config.GetImageRegistry(), config.GetImagePrefix(), config.GetControllerVersion(), config.GetLauncherVersion(), config.GetExportServerVersion(), config.GetSidecarShimVersion(), productName, productVersion, productComponent, config.VirtControllerImage, config.VirtLauncherImage, config.VirtExportServerImage, config.SidecarShimImage, config.GetImagePullPolicy(), config.GetImagePullSecrets(), config.GetVerbosity(), config.GetExtraEnv()) strategy.deployments = append(strategy.deployments, controller) strategy.configMaps = append(strategy.configMaps, components.NewCAConfigMaps(operatorNamespace)...) exportProxyDeployment := components.NewExportProxyDeployment(config.GetNamespace(), config.GetImageRegistry(), config.GetImagePrefix(), config.GetExportProxyVersion(), productName, productVersion, productComponent, config.VirtExportProxyImage, config.GetImagePullPolicy(), config.GetImagePullSecrets(), config.GetVerbosity(), config.GetExtraEnv()) strategy.deployments = append(strategy.deployments, exportProxyDeployment) handler := components.NewHandlerDaemonSet(config.GetNamespace(), config.GetImageRegistry(), config.GetImagePrefix(), config.GetHandlerVersion(), config.GetLauncherVersion(), config.GetPrHelperVersion(), config.GetSidecarShimVersion(), productName, productVersion, productComponent, config.VirtHandlerImage, config.VirtLauncherImage, config.PrHelperImage, config.SidecarShimImage, config.GetImagePullPolicy(), config.GetImagePullSecrets(), config.GetMigrationNetwork(), config.GetVerbosity(), config.GetExtraEnv(), config.PersistentReservationEnabled()) strategy.daemonSets = append(strategy.daemonSets, handler) strategy.sccs = append(strategy.sccs, components.GetAllSCC(config.GetNamespace())...) strategy.apiServices = components.NewVirtAPIAPIServices(config.GetNamespace()) strategy.certificateSecrets = components.NewCertSecrets(config.GetNamespace(), operatorNamespace) strategy.certificateSecrets = append(strategy.certificateSecrets, components.NewCACertSecrets(operatorNamespace)...) strategy.configMaps = append(strategy.configMaps, components.NewCAConfigMaps(operatorNamespace)...) strategy.routes = append(strategy.routes, components.GetAllRoutes(operatorNamespace)...) strategy.validatingAdmissionPolicyBindings = append(strategy.validatingAdmissionPolicyBindings, components.NewHandlerV1ValidatingAdmissionPolicyBinding()) virtHandlerServiceAccount := getVirtHandlerServiceAccount(config.GetNamespace()) strategy.validatingAdmissionPolicies = append(strategy.validatingAdmissionPolicies, components.NewHandlerV1ValidatingAdmissionPolicy(virtHandlerServiceAccount)) instancetypes, err := components.NewClusterInstancetypes() if err != nil { return nil, fmt.Errorf("error generating instancetypes for environment %v", err) } strategy.instancetypes = instancetypes preferences, err := components.NewClusterPreferences() if err != nil { return nil, fmt.Errorf("error generating preferences for environment %v", err) } strategy.preferences = preferences return strategy, nil } func getVirtHandlerServiceAccount(namespace string) string { prefix := fmt.Sprintf("system:serviceaccount:%s", namespace) return fmt.Sprintf("%s:%s", prefix, components.HandlerServiceAccountName) } func mostRecentConfigMap(configMaps []*corev1.ConfigMap) *corev1.ConfigMap { var configMap *corev1.ConfigMap // choose the most recent configmap if multiple match. mostRecentTime := metav1.Time{} for _, config := range configMaps { if configMap == nil || mostRecentTime.Before(&config.ObjectMeta.CreationTimestamp) { configMap = config mostRecentTime = config.ObjectMeta.CreationTimestamp } } return configMap } func isEncoded(configMap *corev1.ConfigMap) bool { _, ok := configMap.Annotations[v1.InstallStrategyConfigMapEncoding] return ok } func getManifests(configMap *corev1.ConfigMap) (string, error) { manifests, ok := configMap.Data["manifests"] if !ok { return "", fmt.Errorf("install strategy configmap %s does not contain 'manifests' key", configMap.Name) } if isEncoded(configMap) { var err error manifests, err = decodeManifests([]byte(manifests)) if err != nil { return "", err } } return manifests, nil } func LoadInstallStrategyFromCache(stores operatorutil.Stores, config *operatorutil.KubeVirtDeploymentConfig) (*Strategy, error) { var matchingConfigMaps []*corev1.ConfigMap for _, obj := range stores.InstallStrategyConfigMapCache.List() { cm, ok := obj.(*corev1.ConfigMap) if !ok { continue } else if cm.ObjectMeta.Annotations == nil { continue } else if cm.ObjectMeta.Namespace != config.GetNamespace() { continue } // deprecated, keep it for backwards compatibility version, _ := cm.ObjectMeta.Annotations[v1.InstallStrategyVersionAnnotation] // deprecated, keep it for backwards compatibility registry, _ := cm.ObjectMeta.Annotations[v1.InstallStrategyRegistryAnnotation] id, _ := cm.ObjectMeta.Annotations[v1.InstallStrategyIdentifierAnnotation] if id == config.GetDeploymentID() || (id == "" && version == config.GetKubeVirtVersion() && registry == config.GetImageRegistry()) { matchingConfigMaps = append(matchingConfigMaps, cm) } } if len(matchingConfigMaps) == 0 { return nil, fmt.Errorf("no install strategy configmap found for version %s with registry %s", config.GetKubeVirtVersion(), config.GetImageRegistry()) } manifests, err := getManifests(mostRecentConfigMap(matchingConfigMaps)) if err != nil { return nil, err } strategy, err := loadInstallStrategyFromBytes(manifests) if err != nil { return nil, err } return strategy, nil } func loadInstallStrategyFromBytes(data string) (*Strategy, error) { strategy := &Strategy{} entries := strings.Split(data, "---") for _, entry := range entries { entry := strings.TrimSpace(entry) if entry == "" { continue } var obj metav1.TypeMeta if err := yaml.Unmarshal([]byte(entry), &obj); err != nil { return nil, err } switch obj.Kind { case "ValidatingWebhookConfiguration": webhook := &admissionregistrationv1.ValidatingWebhookConfiguration{} if err := yaml.Unmarshal([]byte(entry), &webhook); err != nil { return nil, err } webhook.TypeMeta = obj strategy.validatingWebhookConfigurations = append(strategy.validatingWebhookConfigurations, webhook) case "MutatingWebhookConfiguration": webhook := &admissionregistrationv1.MutatingWebhookConfiguration{} if err := yaml.Unmarshal([]byte(entry), &webhook); err != nil { return nil, err } webhook.TypeMeta = obj strategy.mutatingWebhookConfigurations = append(strategy.mutatingWebhookConfigurations, webhook) case "ValidatingAdmissionPolicyBinding": validatingAdmissionPolicyBinding := &admissionregistrationv1.ValidatingAdmissionPolicyBinding{} if err := yaml.Unmarshal([]byte(entry), &validatingAdmissionPolicyBinding); err != nil { return nil, err } validatingAdmissionPolicyBinding.TypeMeta = obj strategy.validatingAdmissionPolicyBindings = append(strategy.validatingAdmissionPolicyBindings, validatingAdmissionPolicyBinding) case "ValidatingAdmissionPolicy": validatingAdmissionPolicy := &admissionregistrationv1.ValidatingAdmissionPolicy{} if err := yaml.Unmarshal([]byte(entry), &validatingAdmissionPolicy); err != nil { return nil, err } validatingAdmissionPolicy.TypeMeta = obj strategy.validatingAdmissionPolicies = append(strategy.validatingAdmissionPolicies, validatingAdmissionPolicy) case "APIService": apiService := &apiregv1.APIService{} if err := yaml.Unmarshal([]byte(entry), &apiService); err != nil { return nil, err } strategy.apiServices = append(strategy.apiServices, apiService) case "Secret": secret := &corev1.Secret{} if err := yaml.Unmarshal([]byte(entry), &secret); err != nil { return nil, err } strategy.certificateSecrets = append(strategy.certificateSecrets, secret) case "ServiceAccount": sa := &corev1.ServiceAccount{} if err := yaml.Unmarshal([]byte(entry), &sa); err != nil { return nil, err } strategy.serviceAccounts = append(strategy.serviceAccounts, sa) case "ClusterRole": cr := &rbacv1.ClusterRole{} if err := yaml.Unmarshal([]byte(entry), &cr); err != nil { return nil, err } strategy.clusterRoles = append(strategy.clusterRoles, cr) case "ClusterRoleBinding": crb := &rbacv1.ClusterRoleBinding{} if err := yaml.Unmarshal([]byte(entry), &crb); err != nil { return nil, err } strategy.clusterRoleBindings = append(strategy.clusterRoleBindings, crb) case "Role": r := &rbacv1.Role{} if err := yaml.Unmarshal([]byte(entry), &r); err != nil { return nil, err } strategy.roles = append(strategy.roles, r) case "RoleBinding": rb := &rbacv1.RoleBinding{} if err := yaml.Unmarshal([]byte(entry), &rb); err != nil { return nil, err } strategy.roleBindings = append(strategy.roleBindings, rb) case "Service": s := &corev1.Service{} if err := yaml.Unmarshal([]byte(entry), &s); err != nil { return nil, err } strategy.services = append(strategy.services, s) case "Deployment": d := &appsv1.Deployment{} if err := yaml.Unmarshal([]byte(entry), &d); err != nil { return nil, err } strategy.deployments = append(strategy.deployments, d) case "DaemonSet": d := &appsv1.DaemonSet{} if err := yaml.Unmarshal([]byte(entry), &d); err != nil { return nil, err } strategy.daemonSets = append(strategy.daemonSets, d) case "CustomResourceDefinition": crdv1 := &extv1.CustomResourceDefinition{} switch obj.APIVersion { case extv1beta1.SchemeGroupVersion.String(): crd := &ext.CustomResourceDefinition{} crdv1beta1 := &extv1beta1.CustomResourceDefinition{} if err := yaml.Unmarshal([]byte(entry), &crdv1beta1); err != nil { return nil, err } err := extv1beta1.Convert_v1beta1_CustomResourceDefinition_To_apiextensions_CustomResourceDefinition(crdv1beta1, crd, nil) if err != nil { return nil, err } err = extv1.Convert_apiextensions_CustomResourceDefinition_To_v1_CustomResourceDefinition(crd, crdv1, nil) if err != nil { return nil, err } case extv1.SchemeGroupVersion.String(): if err := yaml.Unmarshal([]byte(entry), &crdv1); err != nil { return nil, err } default: return nil, fmt.Errorf("crd ApiVersion %s not supported", obj.APIVersion) } strategy.crds = append(strategy.crds, crdv1) case "SecurityContextConstraints": s := &secv1.SecurityContextConstraints{} if err := yaml.Unmarshal([]byte(entry), &s); err != nil { return nil, err } strategy.sccs = append(strategy.sccs, s) case "ServiceMonitor": sm := &promv1.ServiceMonitor{} if err := yaml.Unmarshal([]byte(entry), &sm); err != nil { return nil, err } strategy.serviceMonitors = append(strategy.serviceMonitors, sm) case "PrometheusRule": pr := &promv1.PrometheusRule{} if err := yaml.Unmarshal([]byte(entry), &pr); err != nil { return nil, err } strategy.prometheusRules = append(strategy.prometheusRules, pr) case "ConfigMap": configMap := &corev1.ConfigMap{} if err := yaml.Unmarshal([]byte(entry), &configMap); err != nil { return nil, err } strategy.configMaps = append(strategy.configMaps, configMap) case "Route": route := &routev1.Route{} if err := yaml.Unmarshal([]byte(entry), &route); err != nil { return nil, err } strategy.routes = append(strategy.routes, route) case "VirtualMachineClusterInstancetype": instancetype := &instancetypev1beta1.VirtualMachineClusterInstancetype{} if err := yaml.Unmarshal([]byte(entry), &instancetype); err != nil { return nil, err } strategy.instancetypes = append(strategy.instancetypes, instancetype) case "VirtualMachineClusterPreference": preference := &instancetypev1beta1.VirtualMachineClusterPreference{} if err := yaml.Unmarshal([]byte(entry), &preference); err != nil { return nil, err } strategy.preferences = append(strategy.preferences, preference) default: return nil, fmt.Errorf("UNKNOWN TYPE %s detected", obj.Kind) } log.Log.Infof("%s loaded", obj.Kind) } return strategy, nil } func isNamespaceExist(clientset k8coresv1.CoreV1Interface, ns string) (bool, error) { _, err := clientset.Namespaces().Get(context.Background(), ns, metav1.GetOptions{}) if err == nil { return true, nil } if errors.IsNotFound(err) { return false, nil } return false, err } func isServiceAccountExist(clientset k8coresv1.CoreV1Interface, ns string, serviceAccount string) (bool, error) { _, err := clientset.ServiceAccounts(ns).Get(context.Background(), serviceAccount, metav1.GetOptions{}) if err == nil { return true, nil } if errors.IsNotFound(err) { return false, nil } return false, err }