diff --git a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go index 9e417ba741..6cfd4fdd99 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go @@ -178,9 +178,11 @@ func (s *Service) createInternal(ctx context.Context, request *task.CreateTaskRe // Get EnableScratchEncryption option. var enableScratchEncryption bool + var liveMigrationAllowed bool sandboxOpts := s.vmController.SandboxOptions() if sandboxOpts != nil { enableScratchEncryption = sandboxOpts.EnableScratchEncryption + liveMigrationAllowed = sandboxOpts.LiveMigrationAllowed } // Call Create on the container controller. @@ -190,6 +192,7 @@ func (s *Service) createInternal(ctx context.Context, request *task.CreateTaskRe request, &container.CreateOpts{ IsScratchEncryptionEnabled: enableScratchEncryption, + LiveMigrationAllowed: liveMigrationAllowed, }, ); err != nil { return nil, fmt.Errorf("failed to create container %s: %w", request.ID, err) diff --git a/internal/builder/vm/lcow/kernel_args.go b/internal/builder/vm/lcow/kernel_args.go index 21222761e1..de3e974b5b 100644 --- a/internal/builder/vm/lcow/kernel_args.go +++ b/internal/builder/vm/lcow/kernel_args.go @@ -26,6 +26,7 @@ func buildKernelArgs( kernelDirect bool, hasConsole bool, rootFsFile string, + liveMigrationAllowed bool, ) (string, error) { log.G(ctx).WithField("rootFsFile", rootFsFile).Debug("buildKernelArgs: starting kernel arguments construction") @@ -80,7 +81,7 @@ func buildKernelArgs( args = append(args, "brd.rd_nr=0", "pmtmr=0") // 8. Init arguments (passed after "--" separator) - initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole) + initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole, liveMigrationAllowed) args = append(args, "--", initArgs) result := strings.Join(args, " ") @@ -149,6 +150,7 @@ func buildInitArgs( processDumpLocation string, rootFsFile string, hasConsole bool, + liveMigrationAllowed bool, ) string { log.G(ctx).WithFields(logrus.Fields{ "rootFsFile": rootFsFile, @@ -158,7 +160,7 @@ func buildInitArgs( entropyArgs := fmt.Sprintf("-e %d", vmutils.LinuxEntropyVsockPort) // Build GCS execution command - gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation) + gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation, liveMigrationAllowed) // Construct init arguments var initArgsList []string @@ -192,14 +194,8 @@ func buildGCSCommand( opts *runhcsoptions.Options, disableTimeSyncService bool, processDumpLocation string, + liveMigrationAllowed bool, ) string { - // Start with vsockexec wrapper - var cmdParts []string - cmdParts = append(cmdParts, "/bin/vsockexec") - - // Add logging vsock port - cmdParts = append(cmdParts, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) - // Determine log level logLevel := "info" if opts != nil && opts.LogLevel != "" { @@ -227,8 +223,15 @@ func buildGCSCommand( gcsParts = append(gcsParts, "-core-dump-location", processDumpLocation) } - // Combine vsockexec and GCS command - cmdParts = append(cmdParts, strings.Join(gcsParts, " ")) + gcsCmd := strings.Join(gcsParts, " ") + + // Live-migratable pods skip vsockexec: the host does not run a log + // listener, so connect on LinuxLogVsockPort would stall init. + if liveMigrationAllowed { + return gcsCmd + } - return strings.Join(cmdParts, " ") + // vsockexec `-e ` wires gcs's stderr to LinuxLogVsockPort, which + // the host listener reads and republishes. + return fmt.Sprintf("/bin/vsockexec -e %d %s", vmutils.LinuxLogVsockPort, gcsCmd) } diff --git a/internal/builder/vm/lcow/migration.go b/internal/builder/vm/lcow/migration.go new file mode 100644 index 0000000000..1e0698793d --- /dev/null +++ b/internal/builder/vm/lcow/migration.go @@ -0,0 +1,130 @@ +//go:build windows && lcow + +package lcow + +import ( + "fmt" + "strings" + + iannotations "github.com/Microsoft/hcsshim/internal/annotations" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" +) + +// vmAnnotationPrefix scopes the live-migration policy check to UVM-shape +// annotations. Anything under this prefix that is not in one of the two +// allow-lists below is rejected when [shimannotations.LiveMigrationAllowed] +// is set on the sandbox. +const vmAnnotationPrefix = "io.microsoft.virtualmachine" + +// liveMigrationAllowAnyVMAnnotations is the set of UVM-prefixed annotations +// that may be set to any value when LM is enabled. +var liveMigrationAllowAnyVMAnnotations = map[string]struct{}{ + // Boot/kernel selection: pure host-side knobs that don't change the UVM's + // migratable shape. + shimannotations.BootFilesRootPath: {}, + // Additional kernel command-line options: applied at boot inside the + // guest; doesn't bind the UVM to host-physical resources. + shimannotations.KernelBootOptions: {}, + // CPU/memory shaping. + shimannotations.AllowOvercommit: {}, + shimannotations.ProcessorCount: {}, + shimannotations.ProcessorLimit: {}, + shimannotations.ProcessorWeight: {}, + shimannotations.MemorySizeInMB: {}, + shimannotations.MemoryLowMMIOGapInMB: {}, + shimannotations.MemoryHighMMIOBaseInMB: {}, + shimannotations.MemoryHighMMIOGapInMB: {}, + // Storage QoS knobs are pure per-host rate limits; + shimannotations.StorageQoSIopsMaximum: {}, + shimannotations.StorageQoSBandwidthMaximum: {}, + // Scratch-disk encryption: a per-UVM crypto setting applied at scratch + // creation; identical on both sides of a migration. + shimannotations.LCOWEncryptedScratchDisk: {}, + // In-guest networking policy: applied inside the UVM, host-agnostic. + iannotations.NetworkingPolicyBasedRouting: {}, + // In-guest chronyd toggle: pure guest-side service control. + shimannotations.DisableLCOWTimeSyncService: {}, + // Writable overlay dirs are an in-guest tmpfs overlay; they don't bind + // the UVM to host-backed writable file shares. + iannotations.WritableOverlayDirs: {}, +} + +// liveMigrationLockedVMAnnotations is the map of UVM-prefixed annotations that +// may be set under LM, but only to one specific raw value. +var liveMigrationLockedVMAnnotations = map[string]string{ + // LM-capable UVMs must boot from an initrd rootfs. + shimannotations.PreferredRootFSType: "initrd", + // LM-capable UVMs must use direct kernel boot. + shimannotations.KernelDirectBoot: "true", + // VPCI passthrough binds the UVM to a host-physical device. + shimannotations.VPCIEnabled: "false", + // VPMem-backed layers can't be re-attached identically post-migration; + // VPMemCount=0 forces SCSI for layers. + shimannotations.VPMemCount: "0", + // Writable file shares can't be re-mapped identically on the destination + // host; they must be disabled for an LM-capable UVM. + shimannotations.DisableWritableFileShares: "true", +} + +// validateLiveMigrationAnnotations enforces the live-migration annotation +// policy on a sandbox spec. It must only be invoked when the sandbox has +// opted into live migration via [shimannotations.LiveMigrationAllowed]. +// +// Policy: +// +// - Annotations outside "io.microsoft.virtualmachine.*" scope are allowed. +// - Annotations matching [iannotations.UVMHyperVSocketConfigPrefix] are +// always rejected: each entry binds the UVM to a host-side service +// registration that cannot move with the VM. +// - Annotations in [liveMigrationAllowAnyVMAnnotations] pass with any value. +// - Annotations in [liveMigrationLockedVMAnnotations] pass only when their +// raw value equals the locked value. +// - All other annotations under "io.microsoft.virtualmachine.*" are rejected +// (default-deny). +// +// Map iteration order is non-deterministic, so when multiple annotations are +// in violation the returned error names only one of them. +func validateLiveMigrationAnnotations(annotations map[string]string) error { + for key, val := range annotations { + // Reject per-GUID HvSocket service-table entries explicitly so the + // rationale lives next to the policy. They would also be caught by + // default-deny below. + if strings.HasPrefix(key, iannotations.UVMHyperVSocketConfigPrefix) { + return fmt.Errorf("annotation %q is not supported when %s is enabled", + key, shimannotations.LiveMigrationAllowed) + } + if !strings.HasPrefix(key, vmAnnotationPrefix) { + continue + } + if _, ok := liveMigrationAllowAnyVMAnnotations[key]; ok { + continue + } + if want, ok := liveMigrationLockedVMAnnotations[key]; ok { + if strings.ToLower(val) != want { + return fmt.Errorf( + "annotation %q has an unsupported value when %s is enabled: must be %q, got %q", + key, shimannotations.LiveMigrationAllowed, want, val, + ) + } + continue + } + return fmt.Errorf("annotation %q is not supported when %s is enabled", + key, shimannotations.LiveMigrationAllowed) + } + return nil +} + +// applyLiveMigrationLockedDefaults fills in any annotation in +// [liveMigrationLockedVMAnnotations] that is not already present on the +// sandbox with its required value. +func applyLiveMigrationLockedDefaults(annotations map[string]string) map[string]string { + if annotations == nil { + annotations = make(map[string]string, len(liveMigrationLockedVMAnnotations)) + } + for key, val := range liveMigrationLockedVMAnnotations { + if _, ok := annotations[key]; !ok { + annotations[key] = val + } + } + return annotations +} diff --git a/internal/builder/vm/lcow/migration_test.go b/internal/builder/vm/lcow/migration_test.go new file mode 100644 index 0000000000..880eaeec84 --- /dev/null +++ b/internal/builder/vm/lcow/migration_test.go @@ -0,0 +1,496 @@ +//go:build windows && lcow + +package lcow + +import ( + "strings" + "testing" + + iannotations "github.com/Microsoft/hcsshim/internal/annotations" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" +) + +func TestValidateLiveMigrationAnnotations(t *testing.T) { + tests := []struct { + name string + annotations map[string]string + wantErr bool + // errSubstr, when wantErr is true, must appear in the returned error. + errSubstr string + }{ + // ----- happy paths ------------------------------------------------ + { + name: "empty annotations", + annotations: map[string]string{}, + wantErr: false, + }, + { + name: "non-VM-prefixed annotations are ignored", + annotations: map[string]string{ + // migration namespace is outside io.microsoft.virtualmachine + shimannotations.LiveMigrationAllowed: "true", + // io.microsoft.container.* is outside the VM prefix and is + // therefore implicitly permitted. + shimannotations.ContainerProcessDumpLocation: `C:\dumps`, + // arbitrary user annotations outside the VM namespace + "io.kubernetes.cri.sandbox-id": "abc123", + "foo.bar": "baz", + }, + wantErr: false, + }, + { + name: "all allow-any annotations accepted with arbitrary values", + annotations: map[string]string{ + shimannotations.BootFilesRootPath: `C:\boot`, + shimannotations.KernelBootOptions: "console=ttyS0", + shimannotations.AllowOvercommit: "false", + shimannotations.ProcessorCount: "4", + shimannotations.ProcessorLimit: "75000", + shimannotations.ProcessorWeight: "200", + shimannotations.MemorySizeInMB: "2048", + shimannotations.MemoryLowMMIOGapInMB: "128", + shimannotations.MemoryHighMMIOBaseInMB: "65536", + shimannotations.MemoryHighMMIOGapInMB: "1024", + shimannotations.StorageQoSIopsMaximum: "5000", + shimannotations.StorageQoSBandwidthMaximum: "1000000", + shimannotations.LCOWEncryptedScratchDisk: "true", + shimannotations.DisableLCOWTimeSyncService: "true", + iannotations.NetworkingPolicyBasedRouting: "true", + iannotations.WritableOverlayDirs: "true", + }, + wantErr: false, + }, + { + name: "locked-value annotations accepted with required values", + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "initrd", + shimannotations.KernelDirectBoot: "true", + shimannotations.VPCIEnabled: "false", + shimannotations.VPMemCount: "0", + shimannotations.DisableWritableFileShares: "true", + }, + wantErr: false, + }, + + // ----- locked-value violations ------------------------------------ + { + name: "PreferredRootFSType=vhd is rejected", + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "vhd", + }, + wantErr: true, + errSubstr: shimannotations.PreferredRootFSType, + }, + { + name: "PreferredRootFSType empty is rejected", + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "", + }, + wantErr: true, + errSubstr: shimannotations.PreferredRootFSType, + }, + { + name: "KernelDirectBoot=false is rejected", + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "false", + }, + wantErr: true, + errSubstr: shimannotations.KernelDirectBoot, + }, + { + name: "KernelDirectBoot empty is rejected", + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "", + }, + wantErr: true, + errSubstr: shimannotations.KernelDirectBoot, + }, + { + name: "VPCIEnabled=true is rejected", + annotations: map[string]string{ + shimannotations.VPCIEnabled: "true", + }, + wantErr: true, + errSubstr: shimannotations.VPCIEnabled, + }, + { + name: "DisableWritableFileShares=false is rejected", + annotations: map[string]string{ + shimannotations.DisableWritableFileShares: "false", + }, + wantErr: true, + errSubstr: shimannotations.DisableWritableFileShares, + }, + { + name: "DisableWritableFileShares empty is rejected", + annotations: map[string]string{ + shimannotations.DisableWritableFileShares: "", + }, + wantErr: true, + errSubstr: shimannotations.DisableWritableFileShares, + }, + { + name: "VPMemCount > 0 is rejected", + annotations: map[string]string{ + shimannotations.VPMemCount: "4", + }, + wantErr: true, + errSubstr: shimannotations.VPMemCount, + }, + + // ----- locked values use case-insensitive match ------------------ + // strings.ToLower is applied to the user-supplied value before + // comparison, so non-canonical casing is accepted, but values that + // don't match after lowercasing are rejected. + { + name: "VPCIEnabled=False (capitalized) is accepted", + annotations: map[string]string{ + shimannotations.VPCIEnabled: "False", + }, + wantErr: false, + }, + { + name: "PreferredRootFSType=INITRD (uppercased) is accepted", + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "INITRD", + }, + wantErr: false, + }, + { + name: "KernelDirectBoot=True (capitalized) is accepted", + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "True", + }, + wantErr: false, + }, + { + name: "VPCIEnabled with garbage value is rejected", + annotations: map[string]string{ + shimannotations.VPCIEnabled: "maybe", + }, + wantErr: true, + errSubstr: shimannotations.VPCIEnabled, + }, + { + name: "VPMemCount with non-numeric value is rejected", + annotations: map[string]string{ + shimannotations.VPMemCount: "lots", + }, + wantErr: true, + errSubstr: shimannotations.VPMemCount, + }, + + // ----- previously "disable to default" annotations are now ------- + // ----- rejected outright (default-deny). To disable a feature, ---- + // ----- omit its annotation. --------------------------------------- + { + name: "EnableDeferredCommit set at all is rejected (even =false)", + annotations: map[string]string{ + shimannotations.EnableDeferredCommit: "false", + }, + wantErr: true, + errSubstr: shimannotations.EnableDeferredCommit, + }, + { + name: "EnableDeferredCommit=true is rejected", + annotations: map[string]string{ + shimannotations.EnableDeferredCommit: "true", + }, + wantErr: true, + errSubstr: shimannotations.EnableDeferredCommit, + }, + { + name: "EnableColdDiscardHint set at all is rejected (even =false)", + annotations: map[string]string{ + shimannotations.EnableColdDiscardHint: "false", + }, + wantErr: true, + errSubstr: shimannotations.EnableColdDiscardHint, + }, + { + name: "NumaMaximumProcessorsPerNode set at all is rejected (even =0)", + annotations: map[string]string{ + shimannotations.NumaMaximumProcessorsPerNode: "0", + }, + wantErr: true, + errSubstr: shimannotations.NumaMaximumProcessorsPerNode, + }, + { + name: "NumaMaximumProcessorsPerNode > 0 rejected", + annotations: map[string]string{ + shimannotations.NumaMaximumProcessorsPerNode: "8", + }, + wantErr: true, + errSubstr: shimannotations.NumaMaximumProcessorsPerNode, + }, + { + name: "NumaMaximumMemorySizePerNode set at all is rejected", + annotations: map[string]string{ + shimannotations.NumaMaximumMemorySizePerNode: "0", + }, + wantErr: true, + errSubstr: shimannotations.NumaMaximumMemorySizePerNode, + }, + { + name: "NumaCountOfProcessors set at all is rejected (even empty)", + annotations: map[string]string{ + shimannotations.NumaCountOfProcessors: "", + }, + wantErr: true, + errSubstr: shimannotations.NumaCountOfProcessors, + }, + { + name: "non-empty NUMA list rejected", + annotations: map[string]string{ + shimannotations.NumaCountOfProcessors: "2,2", + }, + wantErr: true, + errSubstr: shimannotations.NumaCountOfProcessors, + }, + { + name: "NumaPreferredPhysicalNodes rejected", + annotations: map[string]string{ + shimannotations.NumaPreferredPhysicalNodes: "0,1", + }, + wantErr: true, + errSubstr: shimannotations.NumaPreferredPhysicalNodes, + }, + + // ----- explicitly disallowed VM annotations ----------------------- + { + name: "VirtualMachineKernelDrivers rejected", + annotations: map[string]string{ + shimannotations.VirtualMachineKernelDrivers: `C:\drivers`, + }, + wantErr: true, + errSubstr: shimannotations.VirtualMachineKernelDrivers, + }, + { + name: "VPMemNoMultiMapping rejected", + annotations: map[string]string{ + shimannotations.VPMemNoMultiMapping: "true", + }, + wantErr: true, + errSubstr: shimannotations.VPMemNoMultiMapping, + }, + { + name: "UVMConsolePipe rejected", + annotations: map[string]string{ + iannotations.UVMConsolePipe: `\\.\pipe\foo`, + }, + wantErr: true, + errSubstr: iannotations.UVMConsolePipe, + }, + { + name: "CPUGroupID rejected", + annotations: map[string]string{ + shimannotations.CPUGroupID: "00000000-0000-0000-0000-000000000000", + }, + wantErr: true, + errSubstr: shimannotations.CPUGroupID, + }, + { + name: "ResourcePartitionID rejected", + annotations: map[string]string{ + shimannotations.ResourcePartitionID: "00000000-0000-0000-0000-000000000000", + }, + wantErr: true, + errSubstr: shimannotations.ResourcePartitionID, + }, + { + name: "FullyPhysicallyBacked default-denied", + annotations: map[string]string{ + shimannotations.FullyPhysicallyBacked: "true", + }, + wantErr: true, + errSubstr: shimannotations.FullyPhysicallyBacked, + }, + + // ----- confidential annotations all rejected ---------------------- + { + name: "LCOWSecurityPolicy rejected", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "policy-blob", + }, + wantErr: true, + errSubstr: shimannotations.LCOWSecurityPolicy, + }, + { + name: "LCOWSecurityPolicyEnforcer rejected", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicyEnforcer: "rego", + }, + wantErr: true, + errSubstr: shimannotations.LCOWSecurityPolicyEnforcer, + }, + { + name: "LCOWGuestStateFile rejected", + annotations: map[string]string{ + shimannotations.LCOWGuestStateFile: `C:\path\to.vmgs`, + }, + wantErr: true, + errSubstr: shimannotations.LCOWGuestStateFile, + }, + { + name: "LCOWHclEnabled rejected", + annotations: map[string]string{ + shimannotations.LCOWHclEnabled: "true", + }, + wantErr: true, + errSubstr: shimannotations.LCOWHclEnabled, + }, + { + name: "LCOWReferenceInfoFile rejected", + annotations: map[string]string{ + shimannotations.LCOWReferenceInfoFile: `C:\info.bin`, + }, + wantErr: true, + errSubstr: shimannotations.LCOWReferenceInfoFile, + }, + { + name: "NoSecurityHardware rejected", + annotations: map[string]string{ + shimannotations.NoSecurityHardware: "true", + }, + wantErr: true, + errSubstr: shimannotations.NoSecurityHardware, + }, + { + name: "DmVerityMode rejected", + annotations: map[string]string{ + shimannotations.DmVerityMode: "true", + }, + wantErr: true, + errSubstr: shimannotations.DmVerityMode, + }, + { + name: "DmVerityCreateArgs rejected", + annotations: map[string]string{ + shimannotations.DmVerityCreateArgs: "args", + }, + wantErr: true, + errSubstr: shimannotations.DmVerityCreateArgs, + }, + { + name: "DmVerityRootFsVhd rejected", + annotations: map[string]string{ + shimannotations.DmVerityRootFsVhd: `C:\rootfs.vhd`, + }, + wantErr: true, + errSubstr: shimannotations.DmVerityRootFsVhd, + }, + { + name: "ExtraVSockPorts rejected", + annotations: map[string]string{ + iannotations.ExtraVSockPorts: "5000,5001", + }, + wantErr: true, + errSubstr: iannotations.ExtraVSockPorts, + }, + + // ----- HvSocket service-table prefix ------------------------------ + { + name: "UVMHyperVSocketConfigPrefix entry rejected", + annotations: map[string]string{ + iannotations.UVMHyperVSocketConfigPrefix + "00000000-0000-0000-0000-000000000000": `{"BindSecurityDescriptor":"D:P"}`, + }, + wantErr: true, + errSubstr: iannotations.UVMHyperVSocketConfigPrefix, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := validateLiveMigrationAnnotations(tc.annotations) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + if tc.errSubstr != "" && !strings.Contains(err.Error(), tc.errSubstr) { + t.Fatalf("error %q does not mention offending annotation %q", err, tc.errSubstr) + } + if !strings.Contains(err.Error(), shimannotations.LiveMigrationAllowed) { + t.Fatalf("error %q does not reference %s", err, shimannotations.LiveMigrationAllowed) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + } +} + +func TestApplyLiveMigrationLockedDefaults(t *testing.T) { + t.Run("nil map is replaced and populated with all locked defaults", func(t *testing.T) { + got := applyLiveMigrationLockedDefaults(nil) + if got == nil { + t.Fatalf("expected non-nil map, got nil") + } + if len(got) != len(liveMigrationLockedVMAnnotations) { + t.Fatalf("expected %d entries, got %d (%v)", + len(liveMigrationLockedVMAnnotations), len(got), got) + } + for key, want := range liveMigrationLockedVMAnnotations { + if got[key] != want { + t.Errorf("key %q: want %q, got %q", key, want, got[key]) + } + } + }) + + t.Run("empty map is populated with all locked defaults", func(t *testing.T) { + in := map[string]string{} + got := applyLiveMigrationLockedDefaults(in) + for key, want := range liveMigrationLockedVMAnnotations { + if got[key] != want { + t.Errorf("key %q: want %q, got %q", key, want, got[key]) + } + } + }) + + t.Run("missing locked annotations are filled in", func(t *testing.T) { + in := map[string]string{ + // Only set one of the locked annotations; the rest must be defaulted. + shimannotations.PreferredRootFSType: "initrd", + // Plus an unrelated annotation that must be preserved untouched. + "io.kubernetes.cri.sandbox-id": "abc123", + } + got := applyLiveMigrationLockedDefaults(in) + for key, want := range liveMigrationLockedVMAnnotations { + if got[key] != want { + t.Errorf("key %q: want %q, got %q", key, want, got[key]) + } + } + if got["io.kubernetes.cri.sandbox-id"] != "abc123" { + t.Errorf("unrelated annotation was modified: got %q", got["io.kubernetes.cri.sandbox-id"]) + } + }) + + t.Run("user-supplied locked values are not overwritten", func(t *testing.T) { + // Validation is case-insensitive and accepts non-canonical casing, + // so a user could legitimately supply e.g. "True" or "INITRD". The + // defaulter must not clobber such values. + in := map[string]string{ + shimannotations.KernelDirectBoot: "True", + shimannotations.PreferredRootFSType: "INITRD", + } + got := applyLiveMigrationLockedDefaults(in) + if got[shimannotations.KernelDirectBoot] != "True" { + t.Errorf("KernelDirectBoot: want %q (preserved), got %q", + "True", got[shimannotations.KernelDirectBoot]) + } + if got[shimannotations.PreferredRootFSType] != "INITRD" { + t.Errorf("PreferredRootFSType: want %q (preserved), got %q", + "INITRD", got[shimannotations.PreferredRootFSType]) + } + }) + + t.Run("output passes validation", func(t *testing.T) { + // The defaulter's output must round-trip through validation: starting + // from an empty map, applying defaults, then validating must succeed. + got := applyLiveMigrationLockedDefaults(nil) + if err := validateLiveMigrationAnnotations(got); err != nil { + t.Fatalf("defaulted map failed validation: %v", err) + } + }) +} diff --git a/internal/builder/vm/lcow/sandbox_options.go b/internal/builder/vm/lcow/sandbox_options.go index b0b71e00c7..4e528441da 100644 --- a/internal/builder/vm/lcow/sandbox_options.go +++ b/internal/builder/vm/lcow/sandbox_options.go @@ -32,6 +32,13 @@ type SandboxOptions struct { // ConfidentialConfig carries confidential computing fields that are not // part of the HCS document but are needed for confidential VM setup. ConfidentialConfig *ConfidentialConfig + + // LiveMigrationAllowed is a sandbox-scoped gate indicating that the sandbox is + // intended to be live-migratable. When true, any container subsequently + // created in the pod that requests a feature incompatible with live + // migration (e.g. host-backed mounts via Plan9 or SCSI hot-add, or vPCI + // device assignment) will be rejected at create time. + LiveMigrationAllowed bool } // ConfidentialConfig carries confidential computing configuration that is not diff --git a/internal/builder/vm/lcow/specs.go b/internal/builder/vm/lcow/specs.go index 792e80ca43..4736c176f6 100644 --- a/internal/builder/vm/lcow/specs.go +++ b/internal/builder/vm/lcow/specs.go @@ -66,6 +66,19 @@ func BuildSandboxConfig( return nil, nil, fmt.Errorf("failed to parse sandbox options: %w", err) } + // If the sandbox has opted into live migration, enforce the allow-list of + // UVM-shape annotations up front so that we fail fast (before any boot/ + // device/kernel-args parsing) rather than producing a non-migratable UVM. + // After validation succeeds, fill in any locked annotation that the caller + // did not specify with its required value, so that downstream parsing + // always sees the LM-mandated configuration. + if sandboxOptions.LiveMigrationAllowed { + if err := validateLiveMigrationAnnotations(spec.Annotations); err != nil { + return nil, nil, fmt.Errorf("live-migration annotation validation failed: %w", err) + } + spec.Annotations = applyLiveMigrationLockedDefaults(spec.Annotations) + } + // ================== Parse Topology (CPU, Memory, NUMA) options ================= // =============================================================================== @@ -210,6 +223,7 @@ func BuildSandboxConfig( bootOptions.LinuxKernelDirect != nil, // isKernelDirectBoot comPorts != nil, // hasConsole filepath.Base(rootFsFullPath), + sandboxOptions.LiveMigrationAllowed, ) if err != nil { return nil, nil, fmt.Errorf("failed to build kernel args: %w", err) @@ -312,6 +326,10 @@ func parseSandboxOptions(ctx context.Context, platform string, annotations map[s NoWritableFileShares: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableWritableFileShares, false), // Multi-mapping is enabled by default on 19H1+, can be disabled via annotation. VPMEMMultiMapping: !(oci.ParseAnnotationsBool(ctx, annotations, shimannotations.VPMemNoMultiMapping, osversion.Build() < osversion.V19H1)), + // LiveMigrationAllowed gates per-container creation: when true, any + // container in the sandbox requesting LM-incompatible features (host + // mounts, vPCI devices) is rejected. + LiveMigrationAllowed: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.LiveMigrationAllowed, false), } // Parse the list of additional kernel drivers to be injected into the VM. diff --git a/internal/builder/vm/lcow/specs_test.go b/internal/builder/vm/lcow/specs_test.go index 1c168b32df..c50bba07e5 100644 --- a/internal/builder/vm/lcow/specs_test.go +++ b/internal/builder/vm/lcow/specs_test.go @@ -323,6 +323,37 @@ func TestBuildSandboxConfig(t *testing.T) { } }, }, + { + name: "live migration allowed annotation parsed", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed true, got %v", sandboxOpts.LiveMigrationAllowed) + } + }, + }, + { + name: "live migration defaults to false", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed false by default, got %v", sandboxOpts.LiveMigrationAllowed) + } + }, + }, { name: "boot options with kernel direct", opts: &runhcsoptions.Options{ @@ -1404,6 +1435,20 @@ func TestBuildSandboxConfig_BootOptions(t *testing.T) { } } + // Configuration 5: Live-migration capable boot files (kernel direct + + // initrd). LM forces PreferredRootFSType=initrd and KernelDirectBoot=true, + // so the boot files path must contain both the (uncompressed) kernel and + // the initrd. + lmBootPath := filepath.Join(tempDir, "lm_boot") + if err := os.MkdirAll(lmBootPath, 0755); err != nil { + t.Fatalf("failed to create lm boot dir: %v", err) + } + for _, f := range []string{vmutils.KernelFile, vmutils.UncompressedKernelFile, vmutils.InitrdFile, vmutils.VhdFile} { + if err := os.WriteFile(filepath.Join(lmBootPath, f), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create file %s: %v", f, err) + } + } + tests := []specTestCase{ { name: "boot with VHD only (no initrd)", @@ -1651,6 +1696,136 @@ func TestBuildSandboxConfig_BootOptions(t *testing.T) { } }, }, + { + // Default (non-LM) pod: gcs is wrapped by /bin/vsockexec on + // LinuxLogVsockPort so its stderr is forwarded to the host log + // listener. + name: "default vsockexec wraps gcs on log vsock port", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + wantWrapper := fmt.Sprintf("/bin/vsockexec -e %d /bin/gcs", vmutils.LinuxLogVsockPort) + if !strings.Contains(getKernelArgs(doc), wantWrapper) { + t.Errorf("expected %q in kernel args, got %q", wantWrapper, getKernelArgs(doc)) + } + }, + }, + { + // Live-migratable pod: vsockexec wrapping must be dropped because + // the host does not run a log listener for migratable pods (a + // connect on LinuxLogVsockPort would stall init). Entropy + // injection is unrelated to log forwarding and must remain. + name: "live migration drops vsockexec wrapper", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: lmBootPath, + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + args := getKernelArgs(doc) + if !sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected sandboxOpts.LiveMigrationAllowed=true") + } + if strings.Contains(args, "vsockexec") { + t.Errorf("LM kernel args must not contain vsockexec, got %q", args) + } + if strings.Contains(args, fmt.Sprintf("%d", vmutils.LinuxLogVsockPort)) { + t.Errorf("LM kernel args must not reference LinuxLogVsockPort, got %q", args) + } + if !strings.Contains(args, "/bin/gcs") { + t.Errorf("expected bare /bin/gcs in LM kernel args, got %q", args) + } + // Entropy is one-shot at boot; LM must keep it. + wantEntropy := fmt.Sprintf("-e %d", vmutils.LinuxEntropyVsockPort) + if !strings.Contains(args, wantEntropy) { + t.Errorf("expected entropy arg %q in LM kernel args, got %q", wantEntropy, args) + } + }, + }, + { + // LM only drops the vsockexec wrapper, not the gcs flags + // themselves: -loglevel, -scrub-logs, etc. must still flow into + // the bare /bin/gcs invocation. + name: "live migration preserves gcs flags", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: lmBootPath, + LogLevel: "debug", + ScrubLogs: true, + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + args := getKernelArgs(doc) + for _, sub := range []string{"-loglevel debug", "-scrub-logs"} { + if !strings.Contains(args, sub) { + t.Errorf("expected %q in LM kernel args, got %q", sub, args) + } + } + if strings.Contains(args, "vsockexec") { + t.Errorf("LM kernel args must not contain vsockexec, got %q", args) + } + }, + }, + { + // User-supplied KernelBootOptions must be appended verbatim and + // must appear before the `--` init-args separator (the host-side + // kernel cmdline section, not the init args section). + name: "kernel boot options appended verbatim before separator", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.KernelBootOptions: "extra=foo other=bar", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + args := getKernelArgs(doc) + if !strings.Contains(args, "extra=foo other=bar") { + t.Errorf("expected user kernel boot options verbatim, got %q", args) + } + sepIdx := strings.Index(args, " -- ") + userIdx := strings.Index(args, "extra=foo") + if sepIdx < 0 || userIdx < 0 || userIdx > sepIdx { + t.Errorf("user kernel boot options must appear before `--`, got %q", args) + } + }, + }, + { + // nr_cpus reflects the resolved processor count (here pinned via + // annotation rather than relying on host CPU count). + name: "nr_cpus reflects processor count", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.ProcessorCount: "3", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !strings.Contains(getKernelArgs(doc), "nr_cpus=3") { + t.Errorf("expected nr_cpus=3 in kernel args, got %q", getKernelArgs(doc)) + } + }, + }, } runTestCases(t, ctx, nil, tests) diff --git a/internal/controller/linuxcontainer/container.go b/internal/controller/linuxcontainer/container.go index 5fb7cc67f3..ced6e12cb9 100644 --- a/internal/controller/linuxcontainer/container.go +++ b/internal/controller/linuxcontainer/container.go @@ -90,6 +90,13 @@ type Controller struct { // ioRetryTimeout is the duration to retry IO relay operations before giving up. ioRetryTimeout time.Duration + + // liveMigrationAllowed mirrors the pod-scoped LM gate. When true, the + // container has been validated to use only LM-compatible features + // (no host-backed mounts, no vPCI devices). It is captured at create + // time and used by downstream code that needs to know whether the + // container can participate in live migration. + liveMigrationAllowed bool } // New creates a ready-to-use Controller. @@ -132,6 +139,10 @@ func (c *Controller) Create(ctx context.Context, spec *specs.Spec, opts *task.Cr return fmt.Errorf("container %s is in state %s; cannot create: %w", c.containerID, c.state, errdefs.ErrFailedPrecondition) } + // Capture the pod-scoped live-migration gate so downstream code can + // query it via the container controller. + c.liveMigrationAllowed = copts.LiveMigrationAllowed + // Parse the runtime options from the request. shimOpts, err := vmutils.UnmarshalRuntimeOptions(ctx, opts.Options) if err != nil { diff --git a/internal/controller/linuxcontainer/devices.go b/internal/controller/linuxcontainer/devices.go index 38bf8e29f5..f818000db3 100644 --- a/internal/controller/linuxcontainer/devices.go +++ b/internal/controller/linuxcontainer/devices.go @@ -10,12 +10,20 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/containerd/errdefs" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) // allocateDevices reserves and maps vPCI devices for the container. func (c *Controller) allocateDevices(ctx context.Context, spec *specs.Spec) error { + // vPCI assignments are hot-attached to the source VM and cannot be + // transferred, so reject them up-front when the pod is + // gated for live migration. + if c.liveMigrationAllowed && len(spec.Windows.Devices) > 0 { + return fmt.Errorf("vpci device assignment not allowed in live-migratable pod: %w", errdefs.ErrFailedPrecondition) + } + for idx := range spec.Windows.Devices { device := &spec.Windows.Devices[idx] diff --git a/internal/controller/linuxcontainer/devices_test.go b/internal/controller/linuxcontainer/devices_test.go index da17a813d0..14ce15cb02 100644 --- a/internal/controller/linuxcontainer/devices_test.go +++ b/internal/controller/linuxcontainer/devices_test.go @@ -10,6 +10,7 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/linuxcontainer/mocks" "github.com/Microsoft/go-winio/pkg/guid" + "github.com/containerd/errdefs" "github.com/opencontainers/runtime-spec/specs-go" "go.uber.org/mock/gomock" ) @@ -353,3 +354,41 @@ func TestAllocateDevices_MultipleDevicesPartialFailure(t *testing.T) { }) } } + +// --- live-migration gating --- + +// TestAllocateDevices_LiveMigrationRejectsAnyDevice verifies that any vPCI +// device assignment is rejected when the pod is gated for live migration, +// and that no Reserve/AddToVM call is attempted. +func TestAllocateDevices_LiveMigrationRejectsAnyDevice(t *testing.T) { + t.Parallel() + c, spec, _ := newTestControllerAndSpec(t, + specs.WindowsDevice{ID: `PCI\VEN_1234&DEV_5678\0`, IDType: vpci.DeviceIDType}, + ) + c.liveMigrationAllowed = true + + // No Reserve / AddToVM calls expected. + + err := c.allocateDevices(t.Context(), spec) + if err == nil { + t.Fatal("expected error rejecting vPCI device in LM-gated pod") + } + if !errors.Is(err, errdefs.ErrFailedPrecondition) { + t.Errorf("error = %v, want ErrFailedPrecondition", err) + } + if len(c.devices) != 0 { + t.Errorf("expected 0 tracked devices, got %d", len(c.devices)) + } +} + +// TestAllocateDevices_LiveMigrationAllowsNoDevices verifies that a spec with +// no vPCI devices is accepted even when the pod is gated for live migration. +func TestAllocateDevices_LiveMigrationAllowsNoDevices(t *testing.T) { + t.Parallel() + c, spec, _ := newTestControllerAndSpec(t) + c.liveMigrationAllowed = true + + if err := c.allocateDevices(t.Context(), spec); err != nil { + t.Fatalf("unexpected error for empty device list in LM-gated pod: %v", err) + } +} diff --git a/internal/controller/linuxcontainer/mounts.go b/internal/controller/linuxcontainer/mounts.go index feaa791a11..fa4d726364 100644 --- a/internal/controller/linuxcontainer/mounts.go +++ b/internal/controller/linuxcontainer/mounts.go @@ -15,6 +15,8 @@ import ( scsiMount "github.com/Microsoft/hcsshim/internal/controller/device/scsi/mount" "github.com/Microsoft/hcsshim/internal/guestpath" "github.com/Microsoft/hcsshim/internal/log" + + "github.com/containerd/errdefs" "github.com/opencontainers/runtime-spec/specs-go" ) @@ -53,6 +55,11 @@ func (c *Controller) allocateMounts(ctx context.Context, spec *specs.Spec) error // Dispatch to a mount-type-specific handler. switch mount.Type { case mountTypeVirtualDisk, mountTypePhysicalDisk, mountTypeExtensibleVirtualDisk: + // SCSI hot-add of a host disk is not transferable across hosts. + if c.liveMigrationAllowed { + return fmt.Errorf("scsi mount %s not allowed in live-migratable pod: %w", mount.Source, errdefs.ErrFailedPrecondition) + } + if err := c.allocateSCSIMount(ctx, mount, isReadOnly); err != nil { return err } @@ -71,13 +78,20 @@ func (c *Controller) allocateMounts(ctx context.Context, spec *specs.Spec) error } // All remaining bind mounts are host directories served via Plan9. - // Allocate them. + // A Plan9 share holds host-side state that cannot be transferred, + // so reject them when the pod is gated for live migration. + if c.liveMigrationAllowed { + return fmt.Errorf("host bind mount %s not allowed in live-migratable pod: %w", mount.Source, errdefs.ErrFailedPrecondition) + } + if err := c.allocatePlan9Mount(ctx, mount, isReadOnly); err != nil { return err } default: // Unknown mount types (e.g. tmpfs, devpts, proc) are passed through // to the guest without host-side resource reservation/allocation. + // These are LM-safe because they are resolved entirely by the + // guest kernel. } } diff --git a/internal/controller/linuxcontainer/mounts_test.go b/internal/controller/linuxcontainer/mounts_test.go index 200519d169..de8b34d4bc 100644 --- a/internal/controller/linuxcontainer/mounts_test.go +++ b/internal/controller/linuxcontainer/mounts_test.go @@ -17,6 +17,7 @@ import ( "github.com/Microsoft/hcsshim/internal/guestpath" "github.com/Microsoft/go-winio/pkg/guid" + "github.com/containerd/errdefs" "github.com/opencontainers/runtime-spec/specs-go" "go.uber.org/mock/gomock" ) @@ -1020,3 +1021,100 @@ func TestAllocateMounts_EVDInvalidPath(t *testing.T) { t.Fatal("expected error for invalid EVD path") } } + +// --- allocateMounts: live-migration gating --- + +// TestAllocateMounts_LiveMigrationRejectsSCSI verifies that a SCSI-backed +// mount (virtual-disk / physical-disk / extensible-virtual-disk) is rejected +// when the pod is gated for live migration, and that no host-side allocation +// is attempted. +func TestAllocateMounts_LiveMigrationRejectsSCSI(t *testing.T) { + t.Parallel() + for _, mountType := range []string{ + mountTypeVirtualDisk, + mountTypePhysicalDisk, + mountTypeExtensibleVirtualDisk, + } { + t.Run(mountType, func(t *testing.T) { + t.Parallel() + c, _, _ := newMountsTestController(t) + c.liveMigrationAllowed = true + spec := &specs.Spec{ + Mounts: []specs.Mount{ + {Source: `C:\disk.vhd`, Destination: "/mnt", Type: mountType}, + }, + } + + // No mock calls expected — the LM gate must short-circuit + // before any Reserve/MapToGuest call. + + err := c.allocateMounts(t.Context(), spec) + if err == nil { + t.Fatalf("expected error for %s mount in LM-gated pod", mountType) + } + if !errors.Is(err, errdefs.ErrFailedPrecondition) { + t.Errorf("error = %v, want ErrFailedPrecondition", err) + } + if len(c.scsiResources) != 0 { + t.Errorf("expected 0 SCSI reservations, got %d", len(c.scsiResources)) + } + }) + } +} + +// TestAllocateMounts_LiveMigrationRejectsHostBindMount verifies that a +// host-backed bind mount (which would become a Plan9 share) is rejected +// when the pod is gated for live migration. +func TestAllocateMounts_LiveMigrationRejectsHostBindMount(t *testing.T) { + t.Parallel() + c, _, _ := newMountsTestController(t) + c.liveMigrationAllowed = true + spec := &specs.Spec{ + Mounts: []specs.Mount{ + {Source: `C:\hostdir`, Destination: "/mnt", Type: mountTypeBind}, + }, + } + + // No Plan9 mock calls expected. + + err := c.allocateMounts(t.Context(), spec) + if err == nil { + t.Fatal("expected error for host bind mount in LM-gated pod") + } + if !errors.Is(err, errdefs.ErrFailedPrecondition) { + t.Errorf("error = %v, want ErrFailedPrecondition", err) + } + if len(c.plan9Resources) != 0 { + t.Errorf("expected 0 Plan9 reservations, got %d", len(c.plan9Resources)) + } +} + +// TestAllocateMounts_LiveMigrationAllowsUVMInternal verifies that mounts whose +// sources are entirely UVM-internal (sandbox://, sandbox-tmp://, uvm://, +// hugepages://) and kernel-only mount types do NOT get rejected when the +// pod is gated for live migration. None of them require host-side +// allocations, so no mock calls are expected either. +func TestAllocateMounts_LiveMigrationAllowsUVMInternal(t *testing.T) { + t.Parallel() + c, _, _ := newMountsTestController(t) + c.liveMigrationAllowed = true + spec := &specs.Spec{ + Mounts: []specs.Mount{ + {Source: guestpath.SandboxMountPrefix + "a", Destination: "/sb", Type: mountTypeBind}, + {Source: guestpath.SandboxTmpfsMountPrefix + "b", Destination: "/tmp/sb", Type: mountTypeBind}, + {Source: guestpath.UVMMountPrefix + "c", Destination: "/uvm", Type: mountTypeBind}, + {Source: guestpath.HugePagesMountPrefix + "2M/d", Destination: "/hp", Type: mountTypeBind}, + {Source: "tmpfs", Destination: "/tmp", Type: "tmpfs"}, + {Source: "proc", Destination: "/proc", Type: "proc"}, + {Source: "sysfs", Destination: "/sys", Type: "sysfs"}, + }, + } + + if err := c.allocateMounts(t.Context(), spec); err != nil { + t.Fatalf("UVM-internal/kernel-only mounts should not be rejected: %v", err) + } + if len(c.scsiResources) != 0 || len(c.plan9Resources) != 0 { + t.Errorf("expected no host-side reservations, got scsi=%d plan9=%d", + len(c.scsiResources), len(c.plan9Resources)) + } +} diff --git a/internal/controller/linuxcontainer/types.go b/internal/controller/linuxcontainer/types.go index d49a959ecc..4f530b8310 100644 --- a/internal/controller/linuxcontainer/types.go +++ b/internal/controller/linuxcontainer/types.go @@ -19,6 +19,11 @@ import ( // CreateOpts holds additional options for container creation. type CreateOpts struct { IsScratchEncryptionEnabled bool + + // LiveMigrationAllowed propagates the sandbox-scoped LM gate. When true, the + // container creation must reject any feature that is incompatible with + // live migration (host-backed mounts, vPCI device assignment, etc.). + LiveMigrationAllowed bool } // guest abstracts the UVM guest connection for container lifecycle operations. diff --git a/internal/controller/vm/vm_lcow.go b/internal/controller/vm/vm_lcow.go index 72b8edbf87..abb26f49f1 100644 --- a/internal/controller/vm/vm_lcow.go +++ b/internal/controller/vm/vm_lcow.go @@ -12,6 +12,7 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/device/plan9" "github.com/Microsoft/hcsshim/internal/controller/network" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/vm/vmmanager" "github.com/Microsoft/hcsshim/internal/vm/vmutils" @@ -150,6 +151,13 @@ func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.G // running inside the Linux VM. The logs are parsed and // forwarded to the host's logging system for monitoring and debugging. func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.Group) { + // For live-migratable sandboxes, we skip logging socket. + if c.sandboxOptions != nil && c.sandboxOptions.LiveMigrationAllowed { + log.G(ctx).Info("skipping GCS log listener: pod is live-migratable") + close(c.logOutputDone) + return + } + group.Go(func() error { // The GCS will connect to this port to stream log output. logConn, err := winio.ListenHvsock(&winio.HvsockAddr{