From cf50569e31c509af8edc36961e005f2429f84134 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Wed, 17 Jun 2026 01:10:28 +0530 Subject: [PATCH] [internal/hcs] Migrate package from HCS V1 to V2 Presently, we were using HCS V1 (vmcompute) within internal/hcs but it has been deprecated and therefore, we are moving to use computecore within hcs v2 package. Signed-off-by: Harsh Rawat --- cmd/containerd-shim-lcow-v2/manager.go | 2 +- .../service/service_task_internal.go | 2 +- cmd/containerd-shim-runhcs-v1/delete.go | 2 +- cmd/containerd-shim-runhcs-v1/exec_hcs.go | 2 +- cmd/containerd-shim-runhcs-v1/task.go | 2 +- cmd/containerd-shim-runhcs-v1/task_hcs.go | 2 +- internal/computecore/computecore.go | 58 +- .../controller/device/plan9/controller.go | 2 +- .../linuxcontainer/container_test.go | 2 +- .../controller/network/network_lcow_test.go | 2 +- .../controller/network/network_wcow_test.go | 2 +- internal/controller/process/process.go | 2 +- internal/controller/process/process_test.go | 2 +- internal/cpugroup/cpugroup.go | 2 +- internal/credentials/credentials.go | 2 +- internal/gcs/process.go | 8 +- internal/hcs/migration.go | 396 ------- internal/hcs/system.go | 48 - internal/hcs/v2/doc.go | 1 + internal/hcs/v2/errors.go | 369 ++++++ internal/hcs/v2/errors_test.go | 152 +++ internal/hcs/v2/migration.go | 254 +++++ internal/hcs/{ => v2}/migration_test.go | 154 ++- internal/hcs/v2/notification.go | 176 +++ internal/hcs/v2/operation.go | 120 ++ internal/hcs/v2/process.go | 550 +++++++++ internal/hcs/v2/service.go | 51 + internal/hcs/v2/system.go | 1015 +++++++++++++++++ internal/hcs/v2/utils.go | 72 ++ internal/hcsoci/cpuaffinity.go | 2 +- internal/hcsoci/create.go | 2 +- internal/jobcontainers/jobcontainer.go | 2 +- internal/jobcontainers/process.go | 2 +- internal/processorinfo/host_information.go | 2 +- internal/uvm/create.go | 2 +- internal/uvm/plan9.go | 2 +- internal/uvm/scsi/backend.go | 2 +- internal/uvm/types.go | 2 +- internal/uvm/vsmb.go | 2 +- internal/vm/vmmanager/uvm.go | 2 +- internal/vm/vmutils/gcs_logs.go | 2 +- internal/vm/vmutils/utils.go | 2 +- test/functional/uvm_plannine_test.go | 2 +- test/functional/uvm_vsmb_test.go | 2 +- test/internal/scratch.go | 2 +- test/pkg/definitions/hcs/hcs.go | 2 +- 46 files changed, 2919 insertions(+), 567 deletions(-) delete mode 100644 internal/hcs/migration.go create mode 100644 internal/hcs/v2/doc.go create mode 100644 internal/hcs/v2/errors.go create mode 100644 internal/hcs/v2/errors_test.go create mode 100644 internal/hcs/v2/migration.go rename internal/hcs/{ => v2}/migration_test.go (62%) create mode 100644 internal/hcs/v2/notification.go create mode 100644 internal/hcs/v2/operation.go create mode 100644 internal/hcs/v2/process.go create mode 100644 internal/hcs/v2/service.go create mode 100644 internal/hcs/v2/system.go create mode 100644 internal/hcs/v2/utils.go diff --git a/cmd/containerd-shim-lcow-v2/manager.go b/cmd/containerd-shim-lcow-v2/manager.go index d17397a8da..1ec68ff63e 100644 --- a/cmd/containerd-shim-lcow-v2/manager.go +++ b/cmd/containerd-shim-lcow-v2/manager.go @@ -14,7 +14,7 @@ import ( "time" runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shim" diff --git a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go index 677b52f4c5..0ae2ab31e5 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go @@ -13,8 +13,8 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/pod" "github.com/Microsoft/hcsshim/internal/controller/process" "github.com/Microsoft/hcsshim/internal/controller/vm" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/memory" diff --git a/cmd/containerd-shim-runhcs-v1/delete.go b/cmd/containerd-shim-runhcs-v1/delete.go index 5c8f8313e4..c1b36416f8 100644 --- a/cmd/containerd-shim-runhcs-v1/delete.go +++ b/cmd/containerd-shim-runhcs-v1/delete.go @@ -16,7 +16,7 @@ import ( "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/winapi" diff --git a/cmd/containerd-shim-runhcs-v1/exec_hcs.go b/cmd/containerd-shim-runhcs-v1/exec_hcs.go index 034e554802..e3c1804ea4 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/exec_hcs.go @@ -20,7 +20,7 @@ import ( "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" diff --git a/cmd/containerd-shim-runhcs-v1/task.go b/cmd/containerd-shim-runhcs-v1/task.go index 1d8c386145..c16a051e97 100644 --- a/cmd/containerd-shim-runhcs-v1/task.go +++ b/cmd/containerd-shim-runhcs-v1/task.go @@ -9,7 +9,7 @@ import ( "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/shimdiag" "github.com/Microsoft/hcsshim/pkg/ctrdtaskapi" task "github.com/containerd/containerd/api/runtime/task/v2" diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 7adc6fc389..0e00292e70 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -29,10 +29,10 @@ import ( "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/guestpath" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/jobcontainers" "github.com/Microsoft/hcsshim/internal/layers" diff --git a/internal/computecore/computecore.go b/internal/computecore/computecore.go index e00a287d15..577cbc3081 100644 --- a/internal/computecore/computecore.go +++ b/internal/computecore/computecore.go @@ -8,7 +8,6 @@ import ( "time" "unsafe" - "github.com/sirupsen/logrus" "go.opencensus.io/trace" "github.com/Microsoft/hcsshim/internal/interop" @@ -94,39 +93,40 @@ import ( // errVmcomputeOperationPending is an error encountered when the operation is being completed asynchronously const errVmcomputeOperationPending = syscall.Errno(0xC0370103) +// execute runs f synchronously to completion. ctx and timeout are +// watchdogs only: each emits a one-shot warning when exceeded and the +// wait continues until f returns. +// +// Callers of the wrapping HCS API typically release the handles they +// passed in via defer once the call returns. Abandoning f mid-flight +// would let those defers tear down handles the syscall is still using, +// which has been observed to crash the shim with EXCEPTION_ACCESS_VIOLATION +// inside computecore.dll. Long-running operations should be bounded via +// HcsCancelOperation rather than by returning early. func execute(ctx gcontext.Context, timeout time.Duration, f func() error) error { - now := time.Now() - if timeout > 0 { - var cancel gcontext.CancelFunc - ctx, cancel = gcontext.WithTimeout(ctx, timeout) - defer cancel() - } + done := make(chan error, 1) + go func() { done <- f() }() - deadline, ok := ctx.Deadline() - trueTimeout := timeout - if ok { - trueTimeout = deadline.Sub(now) - log.G(ctx).WithFields(logrus.Fields{ - logfields.Timeout: trueTimeout, - "desiredTimeout": timeout, - }).Trace("Executing syscall with deadline") + var watcher <-chan time.Time + if timeout > 0 { + t := time.NewTimer(timeout) + defer t.Stop() + watcher = t.C } - done := make(chan error, 1) - go func() { - done <- f() - }() - select { - case <-ctx.Done(): - if ctx.Err() == gcontext.DeadlineExceeded { - log.G(ctx).WithField(logfields.Timeout, trueTimeout). - Warning("Syscall did not complete within operation timeout. This may indicate a platform issue. " + - "If it appears to be making no forward progress, obtain the stacks and see if there is a syscall " + - "stuck in the platform API for a significant length of time.") + for { + select { + case err := <-done: + return err + case <-watcher: + log.G(ctx).WithField(logfields.Timeout, timeout). + Warning("HCS syscall exceeded timeout; still waiting to avoid use-after-free in computecore.dll") + watcher = nil + case <-ctx.Done(): + log.G(ctx).WithError(ctx.Err()). + Warning("HCS syscall context canceled; still waiting to avoid use-after-free in computecore.dll") + ctx = gcontext.WithoutCancel(ctx) } - return ctx.Err() - case err := <-done: - return err } } diff --git a/internal/controller/device/plan9/controller.go b/internal/controller/device/plan9/controller.go index d0a7c2f236..9bd4ce1700 100644 --- a/internal/controller/device/plan9/controller.go +++ b/internal/controller/device/plan9/controller.go @@ -11,7 +11,7 @@ import ( "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/controller/device/plan9/mount" "github.com/Microsoft/hcsshim/internal/controller/device/plan9/share" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/sirupsen/logrus" diff --git a/internal/controller/linuxcontainer/container_test.go b/internal/controller/linuxcontainer/container_test.go index 1eb20d2269..08169890e9 100644 --- a/internal/controller/linuxcontainer/container_test.go +++ b/internal/controller/linuxcontainer/container_test.go @@ -14,8 +14,8 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/linuxcontainer/mocks" "github.com/Microsoft/hcsshim/internal/controller/process" "github.com/Microsoft/hcsshim/internal/gcs" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/signals" diff --git a/internal/controller/network/network_lcow_test.go b/internal/controller/network/network_lcow_test.go index 0ae57eaf42..c52bae178a 100644 --- a/internal/controller/network/network_lcow_test.go +++ b/internal/controller/network/network_lcow_test.go @@ -14,8 +14,8 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/network/mocks" "github.com/Microsoft/hcsshim/internal/gcs" "github.com/Microsoft/hcsshim/internal/guest/prot" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) diff --git a/internal/controller/network/network_wcow_test.go b/internal/controller/network/network_wcow_test.go index 5eb77e31e7..acc07bfe9d 100644 --- a/internal/controller/network/network_wcow_test.go +++ b/internal/controller/network/network_wcow_test.go @@ -13,9 +13,9 @@ import ( "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/controller/network/mocks" "github.com/Microsoft/hcsshim/internal/gcs" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" ) diff --git a/internal/controller/process/process.go b/internal/controller/process/process.go index 43ff044d33..a3cc52e4b8 100644 --- a/internal/controller/process/process.go +++ b/internal/controller/process/process.go @@ -10,7 +10,7 @@ import ( "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" eventstypes "github.com/containerd/containerd/api/events" diff --git a/internal/controller/process/process_test.go b/internal/controller/process/process_test.go index 96459a6eac..cff0dcbe8a 100644 --- a/internal/controller/process/process_test.go +++ b/internal/controller/process/process_test.go @@ -14,7 +14,7 @@ import ( "go.uber.org/mock/gomock" "github.com/Microsoft/hcsshim/internal/controller/process/mocks" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" ) const ( diff --git a/internal/cpugroup/cpugroup.go b/internal/cpugroup/cpugroup.go index 3abaa9c439..02064b2912 100644 --- a/internal/cpugroup/cpugroup.go +++ b/internal/cpugroup/cpugroup.go @@ -8,8 +8,8 @@ import ( "fmt" "strings" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/pkg/errors" ) diff --git a/internal/credentials/credentials.go b/internal/credentials/credentials.go index 18f29820cb..959e87b0d6 100644 --- a/internal/credentials/credentials.go +++ b/internal/credentials/credentials.go @@ -9,8 +9,8 @@ import ( "errors" "fmt" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" ) diff --git a/internal/gcs/process.go b/internal/gcs/process.go index 4c2428a657..c047d3c195 100644 --- a/internal/gcs/process.go +++ b/internal/gcs/process.go @@ -187,7 +187,10 @@ func (p *Process) ExitCode() (_ int, err error) { return -1, errors.New("process not exited") } if err := p.waitCall.Err(); err != nil { - return -1, err + var rerr *rpcError + if !errors.As(err, &rerr) || uint32(rerr.result) != hrNotFound { + return -1, err + } } return int(p.waitResp.ExitCode), nil } @@ -274,7 +277,8 @@ func (p *Process) Stdio() (stdin io.Writer, stdout, stderr io.Reader) { // Wait waits for the process (or guest connection) to terminate. func (p *Process) Wait() error { p.waitCall.Wait() - return p.waitCall.Err() + _, err := p.ExitCode() + return err } func (p *Process) waitBackground() { diff --git a/internal/hcs/migration.go b/internal/hcs/migration.go deleted file mode 100644 index 5328ba5cca..0000000000 --- a/internal/hcs/migration.go +++ /dev/null @@ -1,396 +0,0 @@ -//go:build windows - -package hcs - -import ( - "context" - "encoding/json" - "errors" - "syscall" - "unsafe" - - "github.com/Microsoft/hcsshim/internal/computecore" - "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - "github.com/Microsoft/hcsshim/internal/oc" - - "github.com/sirupsen/logrus" - "go.opencensus.io/trace" - "golang.org/x/sys/windows" -) - -// migrationNotificationBufferSize is the capacity of the LM notification channel. -const migrationNotificationBufferSize = 16 - -// MigrationConfig holds parameters for starting a compute system as a live migration -// destination, or for initiating the source side of a live migration. -type MigrationConfig struct { - // Socket is the handle to the live migration transport socket. - Socket syscall.Handle - // SessionID identifies the migration session. - SessionID uint32 -} - -// migrationCallback is the syscall callback registered with HcsSetComputeSystemCallback -// for live migration events. It receives events and dispatches them to the channel -// stored in the System via the callbackContext pointer. -var migrationCallback = syscall.NewCallback(migrationCallbackHandler) - -// migrationCallbackHandler is invoked by computecore.dll for live migration events. -// ctx is &computeSystem.migrationNotifyCh, kept alive across the cgo boundary by -// computeSystem.migrationPinner (unpinned only after HcsCloseComputeSystem has -// drained any in-flight callbacks). The notification channel is never closed. -// Skipping the close keeps tear-down trivially safe and removes the only -// thing that could turn a channel send into a panic. -func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { - if eventPtr == 0 || ctx == 0 { - return 0 - } - - e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr)) - ch := *(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(ctx)) - - eventData := "" - if e.EventData != nil { - eventData = windows.UTF16PtrToString(e.EventData) - } - - logrus.WithFields(logrus.Fields{ - "event-type": e.Type.String(), - "event-data": eventData, - }).Debug("HCS migration notification") - - var info hcsschema.OperationSystemMigrationNotificationInfo - if eventData != "" { - if err := json.Unmarshal([]byte(eventData), &info); err != nil { - logrus.WithFields(logrus.Fields{ - "event-type": e.Type.String(), - "event-data": eventData, - logrus.ErrorKey: err, - }).Warn("failed to unmarshal migration notification payload, dropping event") - return 0 - } - } - - // Non-blocking send to avoid blocking the HCS callback thread. - select { - case ch <- info: - default: - logrus.WithField("event-type", e.Type.String()).Warn("migration notification channel full, dropping event") - } - - return 0 -} - -// openMigrationHandle opens a computecore handle to the same system and -// registers a callback for live migration events. It populates -// computeSystem.migrationHandle and computeSystem.migrationNotifyCh. -// -// The caller MUST hold computeSystem.handleLock. -func (computeSystem *System) openMigrationHandle(ctx context.Context) error { - if computeSystem.migrationHandle != 0 { - // Already open — idempotent. - return nil - } - - // Sanity check: the primary handle must be valid. - if computeSystem.handle == 0 { - return ErrAlreadyClosed - } - - // Open a second handle via computecore for LM operations and events. - handle, err := computecore.HcsOpenComputeSystem(ctx, computeSystem.id, syscall.GENERIC_ALL) - if err != nil { - return err - } - - // Create the notification channel and store it on the struct. - computeSystem.migrationHandle = handle - computeSystem.migrationNotifyCh = make(chan hcsschema.OperationSystemMigrationNotificationInfo, migrationNotificationBufferSize) - - // Pin the address of the notification channel field so it stays visible - // to the GC while HCS holds it as a uintptr callback context. Without - // pinning, this would violate cgo's pointer-passing rules. - computeSystem.migrationPinner.Pin(&computeSystem.migrationNotifyCh) - - // Register the callback. - if err := computecore.HcsSetComputeSystemCallback(ctx, handle, computecore.HcsEventOptionEnableLiveMigrationEvents, uintptr(unsafe.Pointer(&computeSystem.migrationNotifyCh)), migrationCallback); err != nil { - computeSystem.migrationPinner.Unpin() - computeSystem.migrationNotifyCh = nil - computeSystem.migrationHandle = 0 - computecore.HcsCloseComputeSystem(ctx, handle) - return err - } - return nil -} - -// closeMigrationHandle unregisters the LM callback and closes the migration -// handle. -// -// The caller MUST hold computeSystem.handleLock. -func (computeSystem *System) closeMigrationHandle(ctx context.Context) { - if computeSystem.migrationHandle == 0 { - return - } - - // Unregister callback by passing zeros, then close the compute system. - // HcsCloseComputeSystem waits for any in-flight callbacks to return, so - // after it completes no callback can still be reading the pinned - // channel pointer and it is safe to Unpin. - _ = computecore.HcsSetComputeSystemCallback(ctx, computeSystem.migrationHandle, computecore.HcsEventOptionNone, 0, 0) - computecore.HcsCloseComputeSystem(ctx, computeSystem.migrationHandle) - computeSystem.migrationHandle = 0 - - computeSystem.migrationPinner.Unpin() - - // Drop the channel reference. The channel is intentionally not closed: - // consumers signal end-of-stream via the System's context, so a close - // would add no information and would only complicate tear-down. - computeSystem.migrationNotifyCh = nil -} - -// StartWithMigrationOptions synchronously starts the compute system as a live -// migration destination using the provided configuration. -func (computeSystem *System) StartWithMigrationOptions(ctx context.Context, config *MigrationConfig) (err error) { - if config == nil { - return errors.New("live migration config must not be nil") - } - - operation := "hcs::System::Start" - - computeSystem.handleLock.Lock() - defer computeSystem.handleLock.Unlock() - - if computeSystem.handle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) - } - - // Open the migration handle for LM events and operations. - if err := computeSystem.openMigrationHandle(ctx); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer func() { - if err != nil { - computeSystem.closeMigrationHandle(ctx) - } - }() - - // Create a computecore operation to track the start request. - op, err := computecore.HcsCreateOperation(ctx, 0, 0) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer computecore.HcsCloseOperation(ctx, op) - - // Attach the live migration socket to the operation. - if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - // Build start options with destination migration settings. - options := hcsschema.StartOptions{ - DestinationMigrationOptions: &hcsschema.MigrationStartOptions{ - NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, - }, - } - raw, err := json.Marshal(options) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - return computeSystem.startV2(ctx, op, string(raw)) -} - -// InitializeLiveMigrationOnSource initializes a live migration on the source side with the given options. -func (computeSystem *System) InitializeLiveMigrationOnSource(ctx context.Context, options *hcsschema.MigrationInitializeOptions) (err error) { - operation := "hcs::System::InitializeLiveMigrationOnSource" - - ctx, span := oc.StartSpan(ctx, operation) - defer span.End() - defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - - computeSystem.handleLock.Lock() - defer computeSystem.handleLock.Unlock() - - // Open the migration handle for LM events and operations. - if err = computeSystem.openMigrationHandle(ctx); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer func() { - if err != nil { - computeSystem.closeMigrationHandle(ctx) - } - }() - - if options == nil { - options = &hcsschema.MigrationInitializeOptions{} - } - optionsJSON, err := json.Marshal(options) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - op, err := computecore.HcsCreateOperation(ctx, 0, 0) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer computecore.HcsCloseOperation(ctx, op) - - // Issue the initialize call and wait for completion. - if err = computecore.HcsInitializeLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - if _, err = computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - return nil -} - -// StartLiveMigrationOnSource starts the live migration on the source side using the provided -// transport socket and session ID. -func (computeSystem *System) StartLiveMigrationOnSource(ctx context.Context, config *MigrationConfig) (err error) { - if config == nil { - return errors.New("migration config must not be nil") - } - - operation := "hcs::System::StartLiveMigrationOnSource" - - ctx, span := oc.StartSpan(ctx, operation) - defer span.End() - defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - - computeSystem.handleLock.Lock() - defer computeSystem.handleLock.Unlock() - - if computeSystem.migrationHandle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) - } - - op, err := computecore.HcsCreateOperation(ctx, 0, 0) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer computecore.HcsCloseOperation(ctx, op) - - // Attach the migration socket to the operation before starting. - if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - options := hcsschema.MigrationStartOptions{ - NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, - } - optionsJSON, err := json.Marshal(options) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - // Issue the start call and wait for completion. - if err := computecore.HcsStartLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - return nil -} - -// StartLiveMigrationTransfer starts the memory transfer phase of a live migration. -func (computeSystem *System) StartLiveMigrationTransfer(ctx context.Context, options *hcsschema.MigrationTransferOptions) (err error) { - operation := "hcs::System::StartLiveMigrationTransfer" - - ctx, span := oc.StartSpan(ctx, operation) - defer span.End() - defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - - computeSystem.handleLock.Lock() - defer computeSystem.handleLock.Unlock() - - if computeSystem.migrationHandle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) - } - - if options == nil { - options = &hcsschema.MigrationTransferOptions{} - } - optionsJSON, err := json.Marshal(options) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - op, err := computecore.HcsCreateOperation(ctx, 0, 0) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer computecore.HcsCloseOperation(ctx, op) - - // Begin the memory transfer and wait for completion. - if err := computecore.HcsStartLiveMigrationTransfer(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - return nil -} - -// FinalizeLiveMigration completes the live migration workflow. If resume is true the VM -// is resumed on the destination; otherwise it is stopped. -func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, resume bool) (err error) { - operation := "hcs::System::FinalizeLiveMigration" - - ctx, span := oc.StartSpan(ctx, operation) - defer span.End() - defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - - computeSystem.handleLock.Lock() - defer computeSystem.handleLock.Unlock() - - if computeSystem.migrationHandle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) - } - - // Choose whether to resume or stop the VM after migration. - finalOp := hcsschema.MigrationFinalOperationStop - if resume { - finalOp = hcsschema.MigrationFinalOperationResume - } - optionsJSON, err := json.Marshal(hcsschema.MigrationFinalizedOptions{FinalizedOperation: finalOp}) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - op, err := computecore.HcsCreateOperation(ctx, 0, 0) - if err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - defer computecore.HcsCloseOperation(ctx, op) - - // Finalize the migration and wait for completion. - if err := computecore.HcsFinalizeLiveMigration(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - // Migration is complete — release the migration handle and callback. - computeSystem.closeMigrationHandle(ctx) - return nil -} - -// MigrationNotifications returns a read-only channel that receives live migration -// event payloads. Returns an error if no migration handle is open. -func (computeSystem *System) MigrationNotifications() (<-chan hcsschema.OperationSystemMigrationNotificationInfo, error) { - computeSystem.handleLock.RLock() - defer computeSystem.handleLock.RUnlock() - - if computeSystem.migrationHandle == 0 { - return nil, errors.New("migration handle not open; call StartWithMigrationOptions or InitializeLiveMigrationOnSource first") - } - return computeSystem.migrationNotifyCh, nil -} diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 49e9d785fb..5a434a6a9c 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -7,13 +7,11 @@ import ( "encoding/json" "errors" "fmt" - "runtime" "strings" "sync" "syscall" "time" - "github.com/Microsoft/hcsshim/internal/computecore" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" @@ -40,15 +38,6 @@ type System struct { os, typ, owner string startTime time.Time stopTime time.Time - - // Live Migration specific fields. - migrationHandle computecore.HcsSystem - migrationNotifyCh chan hcsschema.OperationSystemMigrationNotificationInfo - // migrationPinner pins &migrationNotifyCh while it is registered as the - // callback context with HCS, so the GC sees the cgo-held uintptr as a - // live reference. Unpinned in closeMigrationHandle after HCS guarantees - // no further callbacks will fire. - migrationPinner runtime.Pinner } var _ cow.Container = &System{} @@ -232,40 +221,6 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { return nil } -// startV2 is the implementation used by StartWithMigrationOptions to start the compute system -// using HCS V2 APIs. -// The caller provides a pre-created computecore operation (with any resources already -// attached) and the JSON-encoded options string to pass to HcsStartComputeSystem. -// -// The caller MUST hold computeSystem.handleLock and verify the handle is valid -// before calling this method. -func (computeSystem *System) startV2(ctx context.Context, op computecore.HcsOperation, opts string) (err error) { - operation := "hcs::System::Start" - - // hcsStartComputeSystemContext is an async operation. Start the outer span - // here to measure the full start time. - ctx, span := oc.StartSpan(ctx, operation) - defer span.End() - defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - - if err := computecore.HcsStartComputeSystem( - ctx, - computecore.HcsSystem(computeSystem.handle), - op, - opts, - ); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { - return makeSystemError(computeSystem, operation, err, nil) - } - - computeSystem.startTime = time.Now() - return nil -} - // ID returns the compute system's identifier. func (computeSystem *System) ID() string { return computeSystem.id @@ -929,9 +884,6 @@ func (computeSystem *System) CloseCtx(ctx context.Context) (err error) { close(computeSystem.waitBlock) }) - // Clean up migration handle if it was opened. - computeSystem.closeMigrationHandle(ctx) - return nil } diff --git a/internal/hcs/v2/doc.go b/internal/hcs/v2/doc.go new file mode 100644 index 0000000000..dd988e19f4 --- /dev/null +++ b/internal/hcs/v2/doc.go @@ -0,0 +1 @@ +package hcsv2 diff --git a/internal/hcs/v2/errors.go b/internal/hcs/v2/errors.go new file mode 100644 index 0000000000..34ffc6fe32 --- /dev/null +++ b/internal/hcs/v2/errors.go @@ -0,0 +1,369 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net" + "syscall" + + "github.com/Microsoft/hcsshim/internal/log" +) + +var ( + // ErrComputeSystemDoesNotExist is an error encountered when the container being operated on no longer exists + ErrComputeSystemDoesNotExist = syscall.Errno(0xc037010e) + + // ErrElementNotFound is an error encountered when the object being referenced does not exist + ErrElementNotFound = syscall.Errno(0x490) + + // ErrElementNotFound is an error encountered when the object being referenced does not exist + ErrNotSupported = syscall.Errno(0x32) + + // ErrInvalidData is an error encountered when the request being sent to hcs is invalid/unsupported + // decimal -2147024883 / hex 0x8007000d + ErrInvalidData = syscall.Errno(0xd) + + // ErrHandleClose is an error encountered when the handle generating the notification being waited on has been closed + ErrHandleClose = errors.New("hcsshim: the handle generating this notification has been closed") + + // ErrAlreadyClosed is an error encountered when using a handle that has been closed by the Close method + ErrAlreadyClosed = errors.New("hcsshim: the handle has already been closed") + + // ErrInvalidNotificationType is an error encountered when an invalid notification type is used + ErrInvalidNotificationType = errors.New("hcsshim: invalid notification type") + + // ErrInvalidProcessState is an error encountered when the process is not in a valid state for the requested operation + ErrInvalidProcessState = errors.New("the process is in an invalid state for the attempted operation") + + // ErrTimeout is an error encountered when waiting on a notification times out + ErrTimeout = errors.New("hcsshim: timeout waiting for notification") + + // ErrUnexpectedContainerExit is the error encountered when a container exits while waiting for + // a different expected notification + ErrUnexpectedContainerExit = errors.New("unexpected container exit") + + // ErrUnexpectedProcessAbort is the error encountered when communication with the compute service + // is lost while waiting for a notification + ErrUnexpectedProcessAbort = errors.New("lost communication with compute service") + + // ErrUnexpectedValue is an error encountered when hcs returns an invalid value + ErrUnexpectedValue = errors.New("unexpected value returned from hcs") + + // ErrOperationDenied is an error when hcs attempts an operation that is explicitly denied + ErrOperationDenied = errors.New("operation denied") + + // ErrVmcomputeAlreadyStopped is an error encountered when a shutdown or terminate request is made on a stopped container + ErrVmcomputeAlreadyStopped = syscall.Errno(0xc0370110) + + // ErrVmcomputeOperationPending is an error encountered when the operation is being completed asynchronously + ErrVmcomputeOperationPending = syscall.Errno(0xC0370103) + + // ErrVmcomputeOperationInvalidState is an error encountered when the compute system is not in a valid state for the requested operation + ErrVmcomputeOperationInvalidState = syscall.Errno(0xc0370105) + + // ErrProcNotFound is an error encountered when a procedure look up fails. + ErrProcNotFound = syscall.Errno(0x7f) + + // ErrVmcomputeOperationAccessIsDenied is an error which can be encountered when enumerating compute systems in RS1/RS2 + // builds when the underlying silo might be in the process of terminating. HCS was fixed in RS3. + ErrVmcomputeOperationAccessIsDenied = syscall.Errno(0x5) + + // ErrVmcomputeInvalidJSON is an error encountered when the compute system does not support/understand the messages sent by management + ErrVmcomputeInvalidJSON = syscall.Errno(0xc037010d) + + // ErrVmcomputeUnknownMessage is an error encountered guest compute system doesn't support the message + ErrVmcomputeUnknownMessage = syscall.Errno(0xc037010b) + + // ErrVmcomputeUnexpectedExit is an error encountered when the compute system terminates unexpectedly + ErrVmcomputeUnexpectedExit = syscall.Errno(0xC0370106) + + // ErrNotSupported is an error encountered when hcs doesn't support the request + ErrPlatformNotSupported = errors.New("unsupported platform request") + + // ErrProcessAlreadyStopped is returned by hcs if the process we're trying to kill has already been stopped. + ErrProcessAlreadyStopped = syscall.Errno(0x8037011f) + + // ErrInvalidHandle is an error that can be encountered when querying the properties of a compute system when the handle to that + // compute system has already been closed. + ErrInvalidHandle = syscall.Errno(0x6) +) + +type ErrorEvent struct { + Message string `json:"Message,omitempty"` // Fully formated error message + StackTrace string `json:"StackTrace,omitempty"` // Stack trace in string form + Provider string `json:"Provider,omitempty"` + EventID uint16 `json:"EventId,omitempty"` + Flags uint32 `json:"Flags,omitempty"` + Source string `json:"Source,omitempty"` + //Data []EventData `json:"Data,omitempty"` // Omit this as HCS doesn't encode this well. It's more confusing to include. +} + +// hcsResult mirrors the HCS [ResultError] document and implements [error] +// so callers can recover it via [errors.As]. The unexported cause keeps +// [errors.Is] working against the underlying syscall errno. +// +// [ResultError]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#ResultError +type hcsResult struct { + // ErrorCode mirrors the "Error" JSON field; renamed to avoid colliding + // with the [hcsResult.Error] method. + ErrorCode int32 `json:"Error,omitempty"` + ErrorMessage string `json:"ErrorMessage,omitempty"` + ErrorEvents []ErrorEvent `json:"ErrorEvents,omitempty"` + + cause error +} + +func (r *hcsResult) Error() string { + if r.cause != nil { + return r.cause.Error() + } + if r.ErrorMessage != "" { + return r.ErrorMessage + } + return fmt.Sprintf("hcs result: 0x%08x", uint32(r.ErrorCode)) +} + +func (r *hcsResult) Unwrap() error { return r.cause } + +func (ev *ErrorEvent) String() string { + evs := "[Event Detail: " + ev.Message + if ev.StackTrace != "" { + evs += " Stack Trace: " + ev.StackTrace + } + if ev.Provider != "" { + evs += " Provider: " + ev.Provider + } + if ev.EventID != 0 { + evs = fmt.Sprintf("%s EventID: %d", evs, ev.EventID) + } + if ev.Flags != 0 { + evs = fmt.Sprintf("%s flags: %d", evs, ev.Flags) + } + if ev.Source != "" { + evs += " Source: " + ev.Source + } + evs += "]" + return evs +} + +// wrapHcsResult attaches the parsed HCS ResultError document to err so +// callers can recover it via `errors.As(err, new(*hcsResult))`. Returns +// err unchanged when the document is empty or unparseable. +func wrapHcsResult(ctx context.Context, err error, resultJSON string) error { + if err == nil || resultJSON == "" { + return err + } + r := &hcsResult{cause: err} + if jerr := json.Unmarshal([]byte(resultJSON), r); jerr != nil { + log.G(ctx).WithError(jerr).Warning("Could not unmarshal HCS result") + return err + } + return r +} + +// eventsFromError returns the ErrorEvents from any wrapped HCS result in err. +func eventsFromError(err error) []ErrorEvent { + var r *hcsResult + if errors.As(err, &r) { + return r.ErrorEvents + } + return nil +} + +type HcsError struct { + Op string + Err error + Events []ErrorEvent +} + +var _ net.Error = &HcsError{} + +func (e *HcsError) Error() string { + s := e.Op + ": " + e.Err.Error() + for _, ev := range e.Events { + s += "\n" + ev.String() + } + return s +} + +func (e *HcsError) Is(target error) bool { + return errors.Is(e.Err, target) +} + +// unwrap isnt really needed, but helpful convince function + +func (e *HcsError) Unwrap() error { + return e.Err +} + +// Deprecated: net.Error.Temporary is deprecated. +func (e *HcsError) Temporary() bool { + err := e.netError() + return (err != nil) && err.Temporary() +} + +func (e *HcsError) Timeout() bool { + err := e.netError() + return (err != nil) && err.Timeout() +} + +func (e *HcsError) netError() (err net.Error) { + if errors.As(e.Unwrap(), &err) { + return err + } + return nil +} + +// SystemError is an error encountered in HCS during an operation on a Container object +type SystemError struct { + HcsError + ID string +} + +var _ net.Error = &SystemError{} + +func (e *SystemError) Error() string { + s := e.Op + " " + e.ID + ": " + e.Err.Error() + for _, ev := range e.Events { + s += "\n" + ev.String() + } + return s +} + +func makeSystemError(system *System, op string, err error) error { + // Don't double wrap errors + var e *SystemError + if errors.As(err, &e) { + return err + } + + return &SystemError{ + ID: system.ID(), + HcsError: HcsError{ + Op: op, + Err: err, + Events: eventsFromError(err), + }, + } +} + +// ProcessError is an error encountered in HCS during an operation on a Process object +type ProcessError struct { + HcsError + SystemID string + Pid int +} + +var _ net.Error = &ProcessError{} + +func (e *ProcessError) Error() string { + s := fmt.Sprintf("%s %s:%d: %s", e.Op, e.SystemID, e.Pid, e.Err.Error()) + for _, ev := range e.Events { + s += "\n" + ev.String() + } + return s +} + +func makeProcessError(process *Process, op string, err error) error { + // Don't double wrap errors + var e *ProcessError + if errors.As(err, &e) { + return err + } + return &ProcessError{ + Pid: process.Pid(), + SystemID: process.SystemID(), + HcsError: HcsError{ + Op: op, + Err: err, + Events: eventsFromError(err), + }, + } +} + +// IsNotExist checks if an error is caused by the Container or Process not existing. +// Note: Currently, ErrElementNotFound can mean that a Process has either +// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist +// will currently return true when the error is ErrElementNotFound. +func IsNotExist(err error) bool { + return IsAny(err, ErrComputeSystemDoesNotExist, ErrElementNotFound) +} + +// IsErrorInvalidHandle checks whether the error is the result of an operation carried +// out on a handle that is invalid/closed. This error popped up while trying to query +// stats on a container in the process of being stopped. +func IsErrorInvalidHandle(err error) bool { + return errors.Is(err, ErrInvalidHandle) +} + +// IsAlreadyClosed checks if an error is caused by the Container or Process having been +// already closed by a call to the Close() method. +func IsAlreadyClosed(err error) bool { + return errors.Is(err, ErrAlreadyClosed) +} + +// IsPending returns a boolean indicating whether the error is that +// the requested operation is being completed in the background. +func IsPending(err error) bool { + return errors.Is(err, ErrVmcomputeOperationPending) +} + +// IsTimeout returns a boolean indicating whether the error is caused by +// a timeout waiting for the operation to complete. +func IsTimeout(err error) bool { + // HcsError and co. implement Timeout regardless of whether the errors they wrap do, + // so `errors.As(err, net.Error)`` will always be true. + // Using `errors.As(err.Unwrap(), net.Err)` wont work for general errors. + // So first check if there an `ErrTimeout` in the chain, then convert to a net error. + if errors.Is(err, ErrTimeout) { + return true + } + + var nerr net.Error + return errors.As(err, &nerr) && nerr.Timeout() +} + +// IsAlreadyStopped returns a boolean indicating whether the error is caused by +// a Container or Process being already stopped. +// Note: Currently, ErrElementNotFound can mean that a Process has either +// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist +// will currently return true when the error is ErrElementNotFound. +func IsAlreadyStopped(err error) bool { + return IsAny(err, ErrVmcomputeAlreadyStopped, ErrProcessAlreadyStopped, ErrElementNotFound) +} + +// IsNotSupported returns a boolean indicating whether the error is caused by +// unsupported platform requests +// Note: Currently Unsupported platform requests can be mean either +// ErrVmcomputeInvalidJSON, ErrInvalidData, ErrNotSupported or ErrVmcomputeUnknownMessage +// is thrown from the Platform +func IsNotSupported(err error) bool { + // If Platform doesn't recognize or support the request sent, below errors are seen + return IsAny(err, ErrVmcomputeInvalidJSON, ErrInvalidData, ErrNotSupported, ErrVmcomputeUnknownMessage) +} + +// IsOperationInvalidState returns true when err is caused by +// `ErrVmcomputeOperationInvalidState`. +func IsOperationInvalidState(err error) bool { + return errors.Is(err, ErrVmcomputeOperationInvalidState) +} + +// IsAccessIsDenied returns true when err is caused by +// `ErrVmcomputeOperationAccessIsDenied`. +func IsAccessIsDenied(err error) bool { + return errors.Is(err, ErrVmcomputeOperationAccessIsDenied) +} + +// IsAny is a vectorized version of [errors.Is], it returns true if err is one of targets. +func IsAny(err error, targets ...error) bool { + for _, e := range targets { + if errors.Is(err, e) { + return true + } + } + return false +} diff --git a/internal/hcs/v2/errors_test.go b/internal/hcs/v2/errors_test.go new file mode 100644 index 0000000000..7ed2c44d37 --- /dev/null +++ b/internal/hcs/v2/errors_test.go @@ -0,0 +1,152 @@ +//go:build windows + +package hcsv2 + +import ( + "errors" + "fmt" + "net" + "testing" +) + +type MyError struct { + S string +} + +func (e *MyError) Error() string { + return fmt.Sprintf("error happened: %s", e.S) +} + +func TestHcsErrorUnwrap(t *testing.T) { + err := &MyError{"test test"} + herr := HcsError{ + Op: t.Name(), + Err: err, + } + + for _, nerr := range []net.Error{ + &herr, + &SystemError{ + ID: t.Name(), + HcsError: herr, + }, + &ProcessError{ + SystemID: t.Name(), + HcsError: herr, + }, + } { + t.Run(fmt.Sprintf("%T", nerr), func(t *testing.T) { + if !errors.Is(nerr, err) { + t.Errorf("error '%v' did not unwrap to %v", nerr, err) + } + + var e *MyError + if !errors.As(nerr, &e) || e.S != err.S { + t.Errorf("error '%v' did not unwrap '%v' properly", errors.Unwrap(nerr), e) + } + + if nerr.Timeout() { + t.Errorf("expected .Timeout() on '%v' to be false", nerr) + } + + //nolint:staticcheck // Temporary() is deprecated + if nerr.Temporary() { + t.Errorf("expected .Temporary() on '%v' to be false", nerr) + } + }) + } +} + +func TestHcsErrorUnwrapTimeout(t *testing.T) { + err := fmt.Errorf("error: %w", ErrTimeout) + herr := HcsError{ + Op: "test", + Err: err, + } + + for _, nerr := range []net.Error{ + &herr, + &SystemError{ + ID: t.Name(), + HcsError: herr, + }, + &ProcessError{ + SystemID: t.Name(), + HcsError: herr, + }, + } { + t.Run(fmt.Sprintf("%T", nerr), func(t *testing.T) { + if !errors.Is(nerr, ErrTimeout) { + t.Errorf("error '%v' did not unwrap to %v", nerr, ErrTimeout) + } + + if !errors.Is(nerr, err) { + t.Errorf("error '%v' did not unwrap to %v", nerr, err) + } + + if !IsTimeout(nerr) { + t.Errorf("expected error '%v' to be timeout", nerr) + } + + if nerr.Timeout() { + t.Errorf("expected .Timeout() on '%v' to be false", nerr) + } + + //nolint:staticcheck // Temporary() is deprecated + if nerr.Temporary() { + t.Errorf("expected .Temporary() on '%v' to be false", nerr) + } + }) + } +} + +var errNet = netError{} + +type netError struct{} + +func (e netError) Error() string { return "temporary timeout" } +func (e netError) Timeout() bool { return true } +func (e netError) Temporary() bool { return true } + +func TestHcsErrorUnwrapNet(t *testing.T) { + err := fmt.Errorf("error: %w", errNet) + herr := HcsError{ + Op: "test", + Err: err, + } + + for _, nerr := range []net.Error{ + &herr, + &SystemError{ + ID: t.Name(), + HcsError: herr, + }, + &ProcessError{ + SystemID: t.Name(), + HcsError: herr, + }, + } { + t.Run(fmt.Sprintf("%T", nerr), func(t *testing.T) { + if !errors.Is(nerr, errNet) { + t.Errorf("error '%v' did not unwrap to %v", nerr, errNet) + } + + if !errors.Is(nerr, err) { + t.Errorf("error '%v' did not unwrap to %v", nerr, err) + } + + if !IsTimeout(nerr) { + t.Errorf("expected error '%v' to be timeout", nerr) + } + + if !nerr.Timeout() { + t.Errorf("expected .Timeout() on '%v' to be true", nerr) + } + + //nolint:staticcheck // Temporary() is deprecated + if !nerr.Temporary() { + t.Errorf("expected .Temporary() on '%v' to be true", nerr) + } + }) + } +} diff --git a/internal/hcs/v2/migration.go b/internal/hcs/v2/migration.go new file mode 100644 index 0000000000..712f074539 --- /dev/null +++ b/internal/hcs/v2/migration.go @@ -0,0 +1,254 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "encoding/json" + "errors" + "syscall" + "time" + + "github.com/Microsoft/hcsshim/internal/computecore" + "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/oc" + + "go.opencensus.io/trace" +) + +// MigrationConfig holds parameters for starting a compute system as a live migration +// destination, or for initiating the source side of a live migration. +type MigrationConfig struct { + // Socket is the handle to the live migration transport socket. + Socket syscall.Handle + // SessionID identifies the migration session. + SessionID uint32 +} + +// StartWithMigrationOptions synchronously starts the compute system as a live +// migration destination using the provided configuration. +func (computeSystem *System) StartWithMigrationOptions(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("live migration config must not be nil") + } + + operation := "hcs::System::Start" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + opts, err := json.Marshal(hcsschema.StartOptions{ + DestinationMigrationOptions: &hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + }, + }) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + + _, callErr := runOperation(ctx, func(op computecore.HcsOperation) error { + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { + return err + } + return computecore.HcsStartComputeSystem(ctx, computeSystem.handle, op, string(opts)) + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + computeSystem.startTime = time.Now() + return nil +} + +// InitializeLiveMigrationOnSource prepares the source compute system for a +// live migration. Must be called on the source before StartLiveMigrationOnSource. +func (computeSystem *System) InitializeLiveMigrationOnSource(ctx context.Context, options *hcsschema.MigrationInitializeOptions) (err error) { + operation := "hcs::System::InitializeLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + if options == nil { + options = &hcsschema.MigrationInitializeOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Issue the initialize call and wait for completion. + if err = computecore.HcsInitializeLiveMigrationOnSource(ctx, computeSystem.handle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err) + } + if _, err = computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// StartLiveMigrationOnSource begins the source-side migration using the given +// transport socket and session ID. Blocks until HCS accepts the start; +// transfer progress is observed via MigrationNotifications. +func (computeSystem *System) StartLiveMigrationOnSource(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("migration config must not be nil") + } + + operation := "hcs::System::StartLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Attach the migration socket to the operation before starting. + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { + return makeSystemError(computeSystem, operation, err) + } + + options := hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + + // Issue the start call and wait for completion. + if err := computecore.HcsStartLiveMigrationOnSource(ctx, computeSystem.handle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// StartLiveMigrationTransfer starts the memory transfer phase of a live migration. +func (computeSystem *System) StartLiveMigrationTransfer(ctx context.Context, options *hcsschema.MigrationTransferOptions) (err error) { + operation := "hcs::System::StartLiveMigrationTransfer" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + if options == nil { + options = &hcsschema.MigrationTransferOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Begin the memory transfer and wait for completion. + if err := computecore.HcsStartLiveMigrationTransfer(ctx, computeSystem.handle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// FinalizeLiveMigration completes the live migration workflow. +func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, opts *hcsschema.MigrationFinalizedOptions) (err error) { + operation := "hcs::System::FinalizeLiveMigration" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + if opts == nil { + opts = &hcsschema.MigrationFinalizedOptions{} + } + optionsJSON, err := json.Marshal(opts) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Finalize the migration and wait for completion. + if err := computecore.HcsFinalizeLiveMigration(ctx, computeSystem.handle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// MigrationNotifications returns a read-only channel of live migration events +// for this System. The channel exists for the System's lifetime and is safe +// to subscribe to before any migration call, so callers do not miss early +// events such as SetupDone. +// +// The channel is never closed; callers signal end-of-stream via their own +// context. Sends are non-blocking (buffer size migrationNotificationBufferSize) +// and events are dropped on overflow, so consumers must drain promptly. +func (computeSystem *System) MigrationNotifications() <-chan hcsschema.OperationSystemMigrationNotificationInfo { + return computeSystem.migrationNotifyCh +} diff --git a/internal/hcs/migration_test.go b/internal/hcs/v2/migration_test.go similarity index 62% rename from internal/hcs/migration_test.go rename to internal/hcs/v2/migration_test.go index b5061e04be..4dbb2c855b 100644 --- a/internal/hcs/migration_test.go +++ b/internal/hcs/v2/migration_test.go @@ -1,6 +1,6 @@ //go:build windows -package hcs +package hcsv2 import ( "encoding/json" @@ -17,19 +17,28 @@ import ( // ───────────────────────────────────────────────────────────────────────────── // Test helpers // -// The handler under test reads its arguments as raw uintptrs that originate -// outside the Go heap (HCS hands them to us via a syscall callback). To -// faithfully exercise that contract — and the cgo pointer-passing rules it -// implies — the helpers below allocate the HcsEvent, the UTF-16 EventData -// buffer, and the channel context out of process heap memory via LocalAlloc. -// All allocations are bound to the test's lifetime through t.Cleanup, so the -// individual tests stay free of teardown bookkeeping. +// notificationHandler has the signature (event, ctx uintptr), matching the +// raw HCS syscall callback. The two arguments are built very differently in +// tests: +// +// 1. event — a pointer to an HcsEvent struct that, in production, lives in +// memory HCS allocated outside the Go heap. To honor the cgo rule that +// such pointers must not refer to Go-managed memory, allocCEvent +// allocates the HcsEvent (and any UTF-16 EventData buffer) with +// LocalAlloc rather than using a Go &HcsEvent{}. +// +// 2. ctx — not a pointer at all, but an opaque integer key into the +// package-level notificationContexts map. Real Go pointers can't be +// handed to HCS across the callback boundary, so each registration +// stores its state (channel, etc.) in that map and gives HCS only the +// ID. Tests use registerSystemCtx to insert an entry pointing at their +// channel and pass the returned ID straight into notificationHandler. // ───────────────────────────────────────────────────────────────────────────── -// allocCEvent returns a uintptr to a LocalAlloc'd HcsEvent. If payload is -// non-empty it is encoded as UTF-16 into a second LocalAlloc'd buffer and -// wired up as EventData; otherwise EventData is left nil. -func allocCEvent(t *testing.T, payload string) uintptr { +// allocCEvent returns a uintptr to a LocalAlloc'd HcsEvent of the given type. +// If payload is non-empty it is encoded as UTF-16 into a second LocalAlloc'd +// buffer and wired up as EventData; otherwise EventData is left nil. +func allocCEvent(t *testing.T, eventType computecore.HcsEventType, payload string) uintptr { t.Helper() evtAddr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(computecore.HcsEvent{}))) @@ -39,7 +48,7 @@ func allocCEvent(t *testing.T, payload string) uintptr { t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(evtAddr)) }) e := (*computecore.HcsEvent)(unsafe.Pointer(evtAddr)) - e.Type = computecore.HcsEventTypeGroupLiveMigration + e.Type = eventType if payload == "" { return evtAddr @@ -63,19 +72,14 @@ func allocCEvent(t *testing.T, payload string) uintptr { return evtAddr } -// allocCChanCtx stores ch in a LocalAlloc'd buffer and returns its address, -// so the handler reads the chan header out of C memory rather than the Go heap -// (matching how HCS delivers the registered callback context). -func allocCChanCtx(t *testing.T, ch chan hcsschema.OperationSystemMigrationNotificationInfo) uintptr { +// registerSystemCtx registers a fresh system-style notificationContext that +// forwards GroupLiveMigration events to ch and returns its lookup ID as a +// uintptr ready to pass to notificationHandler. Cleanup is registered on t. +func registerSystemCtx(t *testing.T, ch chan hcsschema.OperationSystemMigrationNotificationInfo) uintptr { t.Helper() - addr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(ch))) - if err != nil { - t.Fatalf("LocalAlloc(ctx): %v", err) - } - t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(addr)) }) - - *(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(addr)) = ch - return addr + id := registerNotificationContext("test-system", 0, nil, ch) + t.Cleanup(func() { unregisterNotificationContext(id) }) + return uintptr(id) } // expectNotification fails the test unless want is the next queued value on ch. @@ -104,26 +108,30 @@ func expectNoNotification(t *testing.T, ch <-chan hcsschema.OperationSystemMigra } // ───────────────────────────────────────────────────────────────────────────── -// Nil-argument guards +// Nil / unknown context guards // ───────────────────────────────────────────────────────────────────────────── -// TestMigrationCallbackHandler_NilArgs verifies that the handler is a no-op -// (returns 0, sends nothing on the channel) when either argument is zero. -func TestMigrationCallbackHandler_NilArgs(t *testing.T) { +// TestNotificationHandler_LM_NilOrUnknownArgs verifies that the handler is a +// no-op (returns 0, sends nothing on the channel) when the event pointer is +// zero or the context ID does not resolve to a registered entry. +func TestNotificationHandler_LM_NilOrUnknownArgs(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + ctx := registerSystemCtx(t, ch) cases := []struct { name string event, ctx uintptr }{ {"BothZero", 0, 0}, - {"EventZero", 0, allocCChanCtx(t, ch)}, - {"CtxZero", allocCEvent(t, ""), 0}, + {"EventZero", 0, ctx}, + // A non-zero but never-registered ID must miss the lookup + // silently rather than dispatch or panic. + {"UnknownCtx", allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, `{"Event":"SetupDone"}`), ^uintptr(0)}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - if ret := migrationCallbackHandler(tc.event, tc.ctx); ret != 0 { + if ret := notificationHandler(tc.event, tc.ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } }) @@ -135,10 +143,10 @@ func TestMigrationCallbackHandler_NilArgs(t *testing.T) { // Payload decoding // ───────────────────────────────────────────────────────────────────────────── -// TestMigrationCallbackHandler_Payloads verifies that real-world HCS +// TestNotificationHandler_LM_Payloads verifies that real-world HCS // GroupLiveMigration JSON payloads — including a nil EventData pointer — are // decoded and forwarded on the notification channel. -func TestMigrationCallbackHandler_Payloads(t *testing.T) { +func TestNotificationHandler_LM_Payloads(t *testing.T) { cases := []struct { name string payload string @@ -201,10 +209,10 @@ func TestMigrationCallbackHandler_Payloads(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) - evt := allocCEvent(t, tc.payload) - ctx := allocCChanCtx(t, ch) + ctx := registerSystemCtx(t, ch) + evt := allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, tc.payload) - if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + if ret := notificationHandler(evt, ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } expectNotification(t, ch, tc.want) @@ -212,30 +220,31 @@ func TestMigrationCallbackHandler_Payloads(t *testing.T) { } } -// TestMigrationCallbackHandler_InvalidJSONDropped verifies that an -// unparseable EventData payload is logged and dropped without sending. -func TestMigrationCallbackHandler_InvalidJSONDropped(t *testing.T) { +// TestNotificationHandler_LM_InvalidJSONDropped verifies that an unparseable +// EventData payload is logged and dropped without sending. +func TestNotificationHandler_LM_InvalidJSONDropped(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) - evt := allocCEvent(t, "not-json") - ctx := allocCChanCtx(t, ch) + ctx := registerSystemCtx(t, ch) + evt := allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, "not-json") - if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + if ret := notificationHandler(evt, ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } expectNoNotification(t, ch) } -// TestMigrationCallbackHandler_AdditionalDetailsDecodes verifies that the -// raw JSON captured in AdditionalDetails for a BlackoutExited event can be -// decoded by the consumer into the concrete BlackoutExitedEventDetails struct. -// This is the contract that motivates modeling AdditionalDetails as +// TestNotificationHandler_LM_AdditionalDetailsDecodes verifies that the raw +// JSON captured in AdditionalDetails for a BlackoutExited event can be +// decoded by the consumer into the concrete BlackoutExitedEventDetails +// struct. This is the contract that motivates modeling AdditionalDetails as // json.RawMessage rather than a typed *interface{}. -func TestMigrationCallbackHandler_AdditionalDetailsDecodes(t *testing.T) { +func TestNotificationHandler_LM_AdditionalDetailsDecodes(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) - evt := allocCEvent(t, `{"Event":"BlackoutExited","Result":"Success","AdditionalDetails":{"BlackoutDurationMilliseconds":1234,"BlackoutStopTimestamp":"2026-04-23T12:34:56Z"}}`) - ctx := allocCChanCtx(t, ch) + ctx := registerSystemCtx(t, ch) + evt := allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, + `{"Event":"BlackoutExited","Result":"Success","AdditionalDetails":{"BlackoutDurationMilliseconds":1234,"BlackoutStopTimestamp":"2026-04-23T12:34:56Z"}}`) - if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + if ret := notificationHandler(evt, ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } @@ -272,15 +281,15 @@ func TestMigrationCallbackHandler_AdditionalDetailsDecodes(t *testing.T) { } } -// TestMigrationCallbackHandler_AdditionalDetailsAbsent verifies that a +// TestNotificationHandler_LM_AdditionalDetailsAbsent verifies that a // payload without an AdditionalDetails field results in a nil // json.RawMessage on the forwarded notification. -func TestMigrationCallbackHandler_AdditionalDetailsAbsent(t *testing.T) { +func TestNotificationHandler_LM_AdditionalDetailsAbsent(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) - evt := allocCEvent(t, `{"Event":"SetupDone"}`) - ctx := allocCChanCtx(t, ch) + ctx := registerSystemCtx(t, ch) + evt := allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, `{"Event":"SetupDone"}`) - if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + if ret := notificationHandler(evt, ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } @@ -294,20 +303,20 @@ func TestMigrationCallbackHandler_AdditionalDetailsAbsent(t *testing.T) { // Backpressure // ───────────────────────────────────────────────────────────────────────────── -// TestMigrationCallbackHandler_FullChannelDropsEvent verifies that when the +// TestNotificationHandler_LM_FullChannelDropsEvent verifies that when the // notification channel is full the handler drops the new event rather than // blocking the HCS callback thread. -func TestMigrationCallbackHandler_FullChannelDropsEvent(t *testing.T) { +func TestNotificationHandler_LM_FullChannelDropsEvent(t *testing.T) { ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + ctx := registerSystemCtx(t, ch) // Pre-fill the channel so the next send would block. prefill := hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventSetupDone} ch <- prefill - evt := allocCEvent(t, `{"Event":"MigrationDone"}`) - ctx := allocCChanCtx(t, ch) + evt := allocCEvent(t, computecore.HcsEventTypeGroupLiveMigration, `{"Event":"MigrationDone"}`) - if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + if ret := notificationHandler(evt, ctx); ret != 0 { t.Fatalf("expected 0, got %d", ret) } @@ -317,3 +326,26 @@ func TestMigrationCallbackHandler_FullChannelDropsEvent(t *testing.T) { } expectNoNotification(t, ch) } + +// ───────────────────────────────────────────────────────────────────────────── +// Event-type routing +// ───────────────────────────────────────────────────────────────────────────── + +// TestNotificationHandler_NonLMEvent_NotDispatched verifies that a +// non-GroupLiveMigration event does not land on the migration channel even +// when a channel is registered. This guards the dispatch switch in +// notificationHandler. +func TestNotificationHandler_NonLMEvent_NotDispatched(t *testing.T) { + ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + ctx := registerSystemCtx(t, ch) + + // SystemExited is a terminal exit event; without a notificationState + // registered (nil above) it must not panic and must not send anything + // onto the migration channel. + evt := allocCEvent(t, computecore.HcsEventTypeSystemExited, `{"Status":0}`) + + if ret := notificationHandler(evt, ctx); ret != 0 { + t.Fatalf("expected 0, got %d", ret) + } + expectNoNotification(t, ch) +} diff --git a/internal/hcs/v2/notification.go b/internal/hcs/v2/notification.go new file mode 100644 index 0000000000..22a687cf28 --- /dev/null +++ b/internal/hcs/v2/notification.go @@ -0,0 +1,176 @@ +//go:build windows + +package hcsv2 + +import ( + "encoding/json" + "sync" + "sync/atomic" + "syscall" + "unsafe" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" + + "github.com/Microsoft/hcsshim/internal/computecore" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/logfields" +) + +// migrationNotificationBufferSize is the capacity of a System's live migration +// notification channel. It only needs enough headroom to absorb a short burst +// of HCS-side events between consumer reads; the dispatch in +// notificationHandler is non-blocking and drops on overflow. +const migrationNotificationBufferSize = 16 + +// HCS V2 callbacks take an opaque void* context. Rather than handing HCS a +// live Go pointer, we register a numeric ID that maps to the real context in +// notificationContexts. +// +// Registrations use HcsEventOptionEnableLiveMigrationEvents. +// The package never attaches a per-operation callback. +var ( + notificationNextID atomic.Uint64 + notificationContexts sync.Map // uint64 -> *notificationContext + + notificationCallback = syscall.NewCallback(notificationHandler) +) + +// notificationState rendezvous a terminal HCS event with waitBackground. +// Exactly one of exit (normal exit + payload) or abort (abnormal termination) +// is signaled. Both channels are buffered(1) and closed after send. +type notificationState struct { + signalOnce sync.Once + exit chan json.RawMessage + abort chan error +} + +func newNotificationState() *notificationState { + return ¬ificationState{ + exit: make(chan json.RawMessage, 1), + abort: make(chan error, 1), + } +} + +// signalExit delivers a normal exit payload. First signal wins. +func (s *notificationState) signalExit(raw json.RawMessage) { + s.signalOnce.Do(func() { + s.exit <- raw + close(s.exit) + }) +} + +// signalAbort delivers an abnormal-termination error. First signal wins. +func (s *notificationState) signalAbort(err error) { + s.signalOnce.Do(func() { + s.abort <- err + close(s.abort) + }) +} + +// notificationContext is the per-handle data resolved from the callback's +// opaque ctx. processID == 0 means the callback belongs to a system handle. +type notificationContext struct { + systemID string + processID int // 0 for system handle callbacks + state *notificationState + migrationCh chan<- hcsschema.OperationSystemMigrationNotificationInfo +} + +// registerNotificationContext returns the ID to pass as the void* context to +// HcsSet{ComputeSystem,Process}Callback. The caller must invoke +// unregisterNotificationContext after the HCS handle is closed (HCS guarantees +// no further callbacks fire past close). +// +// migrationCh may be nil; pass non-nil only for system handles that should +// receive live migration notifications. +func registerNotificationContext(systemID string, processID int, state *notificationState, migrationCh chan<- hcsschema.OperationSystemMigrationNotificationInfo) uint64 { + id := notificationNextID.Add(1) + notificationContexts.Store(id, ¬ificationContext{ + systemID: systemID, + processID: processID, + state: state, + migrationCh: migrationCh, + }) + return id +} + +// unregisterNotificationContext drops the mapping for id. No-op for id == 0. +func unregisterNotificationContext(id uint64) { + if id != 0 { + notificationContexts.Delete(id) + } +} + +// notificationHandler is the single syscall callback shared by all HCS system +// and process registrations. It logs the event, signals the owning +// notificationState on terminal exit events, and dispatches live migration +// events to the registered migration channel. The return value is ignored by +// HCS. +func notificationHandler(eventPtr uintptr, ctx uintptr) uintptr { + if eventPtr == 0 { + return 0 + } + e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr)) + + fields := logrus.Fields{"event-type": e.Type.String()} + var eventData string + if e.EventData != nil { + eventData = windows.UTF16PtrToString(e.EventData) + fields["event-data"] = eventData + } + + source := "system" + v, ok := notificationContexts.Load(uint64(ctx)) + if ok { + nc := v.(*notificationContext) + if nc.systemID != "" { + fields[logfields.ContainerID] = nc.systemID + } + if nc.processID != 0 { + fields[logfields.ProcessID] = nc.processID + source = "process" + } + switch e.Type { + case computecore.HcsEventTypeSystemExited, computecore.HcsEventTypeProcessExited: + if nc.state != nil { + nc.state.signalExit(json.RawMessage(eventData)) + } + case computecore.HcsEventTypeServiceDisconnect: + if nc.state != nil { + nc.state.signalAbort(ErrUnexpectedProcessAbort) + } + case computecore.HcsEventTypeGroupLiveMigration: + // Forward to the system's migration channel, if one was + // registered. + if nc.migrationCh != nil { + dispatchMigrationEvent(nc.migrationCh, e.Type, json.RawMessage(eventData)) + } + } + } + + logrus.WithFields(fields).Debugf("HCS %s notification", source) + return 0 +} + +// dispatchMigrationEvent decodes a GroupLiveMigration EventData payload and +// non-blocking-sends it on ch. An empty payload yields the zero value (HCS +// occasionally delivers LM events with a nil EventData pointer). +func dispatchMigrationEvent(ch chan<- hcsschema.OperationSystemMigrationNotificationInfo, eventType computecore.HcsEventType, eventData json.RawMessage) { + var info hcsschema.OperationSystemMigrationNotificationInfo + if len(eventData) > 0 { + if err := json.Unmarshal(eventData, &info); err != nil { + logrus.WithFields(logrus.Fields{ + "event-type": eventType.String(), + "event-data": string(eventData), + logrus.ErrorKey: err, + }).Warn("failed to unmarshal migration notification payload, dropping event") + return + } + } + select { + case ch <- info: + default: + logrus.WithField("event-type", eventType.String()).Warn("migration notification channel full, dropping event") + } +} diff --git a/internal/hcs/v2/operation.go b/internal/hcs/v2/operation.go new file mode 100644 index 0000000000..65e336dd55 --- /dev/null +++ b/internal/hcs/v2/operation.go @@ -0,0 +1,120 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "errors" + "syscall" + "time" + + "github.com/Microsoft/hcsshim/internal/computecore" +) + +// infiniteTimeout is the milliseconds value passed to +// HcsWaitForOperationResult to wait forever (Win32 INFINITE, 0xFFFFFFFF). +const infiniteTimeout = ^uint32(0) + +// hcsErrOperationTimeout is HCS_E_OPERATION_TIMEOUT, returned by +// HcsWaitForOperationResult when the wait elapses while HCS is still +// tracking the operation. +const hcsErrOperationTimeout = syscall.Errno(0x80370118) + +// waitTimeoutMs derives the TimeoutMs argument for HcsWaitForOperationResult +// from ctx's deadline. With no deadline (or one already past) it falls back +// to INFINITE / a minimal poll respectively. +func waitTimeoutMs(ctx context.Context) uint32 { + dl, ok := ctx.Deadline() + if !ok { + return infiniteTimeout + } + d := time.Until(dl) + if d <= 0 { + return 0 + } + ms := d.Milliseconds() + if ms >= int64(infiniteTimeout) { + return infiniteTimeout - 1 + } + return uint32(ms) +} + +// runOperation creates an HCS operation, invokes fn(op), then synchronously +// waits for the operation result. The returned resultDoc is the JSON document +// produced by the tracked HCS API (which on failure may contain a ResultError +// describing the error events). The operation is always closed before return. +// +// The wait is bounded by ctx's deadline (if any); otherwise it waits forever. +// ctx cancellation is stripped via context.WithoutCancel; see computecore.execute +// for why HCS syscalls cannot be abandoned mid-flight. +func runOperation(ctx context.Context, fn func(op computecore.HcsOperation) error) (resultDoc string, err error) { + timeoutMs := waitTimeoutMs(ctx) + syscallCtx := context.WithoutCancel(ctx) + + // We do not use any operation level callback. + op, err := computecore.HcsCreateOperation(syscallCtx, 0, 0) + if err != nil { + return "", err + } + defer computecore.HcsCloseOperation(syscallCtx, op) + + if fnErr := fn(op); fnErr != nil { + // Attach any result doc HCS already produced for additional context. + doc, _ := computecore.HcsGetOperationResult(syscallCtx, op) + return doc, wrapHcsResult(ctx, fnErr, doc) + } + doc, waitErr := computecore.HcsWaitForOperationResult(syscallCtx, op, timeoutMs) + if errors.Is(waitErr, hcsErrOperationTimeout) { + // Wait deadline elapsed but HCS is still tracking the request; + // ask it to abort so the operation does not outlive this call. + _ = computecore.HcsCancelOperation(syscallCtx, op) + } + return doc, wrapHcsResult(ctx, waitErr, doc) +} + +// runProcessOperation is the equivalent of runOperation for HCS APIs that are +// associated with an HCS_PROCESS handle (HcsCreateProcess, HcsGetProcessInfo, +// etc.) and whose operation result includes the HcsProcessInformation struct. +func runProcessOperation(ctx context.Context, fn func(op computecore.HcsOperation) error) (info computecore.HcsProcessInformation, resultDoc string, err error) { + timeoutMs := waitTimeoutMs(ctx) + syscallCtx := context.WithoutCancel(ctx) + + op, err := computecore.HcsCreateOperation(syscallCtx, 0, 0) + if err != nil { + return info, "", err + } + defer computecore.HcsCloseOperation(syscallCtx, op) + + if fnErr := fn(op); fnErr != nil { + _, doc, _ := computecore.HcsGetOperationResultAndProcessInfo(syscallCtx, op) + return info, doc, wrapHcsResult(ctx, fnErr, doc) + } + info, doc, waitErr := computecore.HcsWaitForOperationResultAndProcessInfo(syscallCtx, op, timeoutMs) + if errors.Is(waitErr, hcsErrOperationTimeout) { + _ = computecore.HcsCancelOperation(syscallCtx, op) + } + return info, doc, wrapHcsResult(ctx, waitErr, doc) +} + +// submitOperation creates an operation, invokes fn(op) to hand the request +// to HCS, and returns without waiting for completion. The operation handle +// is closed immediately; per the HCS V2 contract this is safe while the +// request is in flight (HCS continues running it and discards the result). +// +// This preserves the V1 fire-and-forget semantics of HcsShutdownComputeSystem +// and HcsTerminateComputeSystem, where the shim only needs to know the +// request was accepted (callers observe completion via the system exit +// notification, not the operation result). +func submitOperation(ctx context.Context, fn func(op computecore.HcsOperation) error) error { + syscallCtx := context.WithoutCancel(ctx) + op, err := computecore.HcsCreateOperation(syscallCtx, 0, 0) + if err != nil { + return err + } + defer computecore.HcsCloseOperation(syscallCtx, op) + if fnErr := fn(op); fnErr != nil { + doc, _ := computecore.HcsGetOperationResult(syscallCtx, op) + return wrapHcsResult(ctx, fnErr, doc) + } + return nil +} diff --git a/internal/hcs/v2/process.go b/internal/hcs/v2/process.go new file mode 100644 index 0000000000..497a8bb5a1 --- /dev/null +++ b/internal/hcs/v2/process.go @@ -0,0 +1,550 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "encoding/json" + "errors" + "io" + "os" + "sync" + "syscall" + "time" + + "go.opencensus.io/trace" + + "github.com/Microsoft/hcsshim/internal/computecore" + "github.com/Microsoft/hcsshim/internal/cow" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" +) + +type Process struct { + handleLock sync.RWMutex + handle computecore.HcsProcess + processID int + system *System + hasCachedStdio bool + stdioLock sync.Mutex + stdin io.WriteCloser + stdout io.ReadCloser + stderr io.ReadCloser + killSignalDelivered bool + + // notificationID is the lookup key passed as the void* context to + // HcsSetProcessCallback. Zero when no callback is registered. + notificationID uint64 + // notify rendezvous between the HCS notification callback + // (HcsEventProcessExited) and waitBackground. Close also signals it to + // release waitBackground without publishing an exit. + notify *notificationState + + closedWaitOnce sync.Once + waitBlock chan struct{} + exitCode int + waitError error +} + +var _ cow.Process = &Process{} + +func newProcess(process computecore.HcsProcess, processID int, computeSystem *System) *Process { + return &Process{ + handle: process, + processID: processID, + system: computeSystem, + waitBlock: make(chan struct{}), + notify: newNotificationState(), + } +} + +// Pid returns the process ID of the process within the container. +func (process *Process) Pid() int { + return process.processID +} + +// SystemID returns the ID of the process's compute system. +func (process *Process) SystemID() string { + return process.system.ID() +} + +func (process *Process) processSignalResult(ctx context.Context, err error) (bool, error) { + if err == nil { + return true, nil + } + if errors.Is(err, ErrVmcomputeOperationInvalidState) || errors.Is(err, ErrComputeSystemDoesNotExist) || errors.Is(err, ErrElementNotFound) { + if !process.stopped() { + // The process should be gone, but we have not received the notification. + // After a second, force unblock the process wait to work around a possible + // deadlock in the HCS. + go func() { + time.Sleep(time.Second) + process.closedWaitOnce.Do(func() { + log.G(ctx).WithError(err).Warn("force unblocking process waits") + process.exitCode = -1 + process.waitError = err + close(process.waitBlock) + }) + }() + } + return false, nil + } + return false, nil +} + +// Signal signals the process with `options`. +// +// For LCOW `guestresource.SignalProcessOptionsLCOW`. +// +// For WCOW `guestresource.SignalProcessOptionsWCOW`. +func (process *Process) Signal(ctx context.Context, options interface{}) (bool, error) { + process.handleLock.RLock() + defer process.handleLock.RUnlock() + + operation := "hcs::Process::Signal" + + if process.handle == 0 { + return false, makeProcessError(process, operation, ErrAlreadyClosed) + } + + optionsb, err := json.Marshal(options) + if err != nil { + return false, err + } + optionsStr := string(optionsb) + + _, sigErr := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsSignalProcess(ctx, process.handle, op, optionsStr) + }) + delivered, sigErr := process.processSignalResult(ctx, sigErr) + if sigErr != nil { + sigErr = makeProcessError(process, operation, sigErr) + } + return delivered, sigErr +} + +// Kill signals the process to terminate but does not wait for it to finish terminating. +func (process *Process) Kill(ctx context.Context) (bool, error) { + process.handleLock.RLock() + defer process.handleLock.RUnlock() + + operation := "hcs::Process::Kill" + + if process.handle == 0 { + return false, makeProcessError(process, operation, ErrAlreadyClosed) + } + + if process.stopped() { + return false, makeProcessError(process, operation, ErrProcessAlreadyStopped) + } + + if process.killSignalDelivered { + // A kill signal has already been sent to this process. Sending a second + // one offers no real benefit, as processes cannot stop themselves from + // being terminated, once a TerminateProcess has been issued. Sending a + // second kill may result in a number of errors (two of which detailed bellow) + // and which we can avoid handling. + return true, nil + } + + // HCS serializes the signals sent to a target pid per compute system handle. + // To avoid SIGKILL being serialized behind other signals, we open a new compute + // system handle to deliver the kill signal. + // If the calls to opening a new compute system handle fail, we forcefully + // terminate the container itself so that no container is left behind + hcsSystem, err := OpenComputeSystem(ctx, process.system.id) + if err != nil { + // log error and force termination of container + log.G(ctx).WithField("err", err).Error("OpenComputeSystem() call failed") + err = process.system.Terminate(ctx) + // if the Terminate() call itself ever failed, log and return error + if err != nil { + log.G(ctx).WithField("err", err).Error("Terminate() call failed") + return false, err + } + process.system.Close() + return true, nil + } + defer hcsSystem.Close() + + newProcessHandle, err := hcsSystem.OpenProcess(ctx, process.Pid()) + if err != nil { + // Return true only if the target process has either already + // exited, or does not exist. + if IsAlreadyStopped(err) { + return true, nil + } else { + return false, err + } + } + defer newProcessHandle.Close() + + _, killErr := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsTerminateProcess(ctx, newProcessHandle.handle, op, "") + }) + if killErr != nil { + // We still need to check these two cases, as processes may still be killed by an + // external actor (human operator, OOM, random script etc). + if errors.Is(killErr, os.ErrPermission) || IsAlreadyStopped(killErr) { + // There are two cases where it should be safe to ignore an error returned + // by HcsTerminateProcess. The first one is cause by the fact that + // HcsTerminateProcess ends up calling TerminateProcess in the context + // of a container. According to the TerminateProcess documentation: + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-terminateprocess#remarks + // After a process has terminated, call to TerminateProcess with open + // handles to the process fails with ERROR_ACCESS_DENIED (5) error code. + // It's safe to ignore this error here. HCS should always have permissions + // to kill processes inside any container. So an ERROR_ACCESS_DENIED + // is unlikely to be anything else than what the ending remarks in the + // documentation states. + // + // The second case is generated by hcs itself, if for any reason HcsTerminateProcess + // is called twice in a very short amount of time. In such cases, hcs may return + // HCS_E_PROCESS_ALREADY_STOPPED. + return true, nil + } + } + delivered, killErr := newProcessHandle.processSignalResult(ctx, killErr) + if killErr != nil { + killErr = makeProcessError(newProcessHandle, operation, killErr) + } + + process.killSignalDelivered = delivered + return delivered, killErr +} + +// waitBackground blocks until either the HCS callback delivers +// HcsEventProcessExited (real exit, publish exit code) or Close fires +// (release without publishing). It then sets `process.waitError` (if any) +// and unblocks all `Wait` calls. +// +// HCS does not deliver a final notification on HcsCloseProcess — it just +// unregisters the callback — so Close needs its own signal. Publishing a +// synthetic exit on Close would report exit_code=255 to containerd for +// processes that are still running. +// +// This MUST be called exactly once per `process.handle` but `Wait` is safe +// to call multiple times. +func (process *Process) waitBackground() { + operation := "hcs::Process::waitBackground" + ctx, span := oc.StartSpan(context.Background(), operation) + defer span.End() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + var raw json.RawMessage + exitCode := -1 + var err error + select { + case <-process.waitBlock: + log.G(ctx).Debug("process waitBackground returning without exit notification (handle closed)") + return + case raw = <-process.notify.exit: + case abortErr := <-process.notify.abort: + err = makeProcessError(process, operation, abortErr) + } + + if err == nil && len(raw) > 0 { + properties := &hcsschema.ProcessStatus{} + if uErr := json.Unmarshal(raw, properties); uErr != nil { + err = makeProcessError(process, operation, uErr) + } else if properties.LastWaitResult != 0 { + log.G(ctx).WithField("wait-result", properties.LastWaitResult).Warning("non-zero last wait result") + } else { + exitCode = int(properties.ExitCode) + } + } + log.G(ctx).WithField("exitCode", exitCode).Debug("process exited") + + process.closedWaitOnce.Do(func() { + process.exitCode = exitCode + process.waitError = err + close(process.waitBlock) + }) + oc.SetSpanStatus(span, err) +} + +// Wait waits for the process to exit. If the process has already exited returns +// the previous error (if any). +func (process *Process) Wait() error { + <-process.waitBlock + return process.waitError +} + +// Exited returns if the process has stopped +func (process *Process) stopped() bool { + select { + case <-process.waitBlock: + return true + default: + return false + } +} + +// ResizeConsole resizes the console of the process. +func (process *Process) ResizeConsole(ctx context.Context, width, height uint16) error { + process.handleLock.RLock() + defer process.handleLock.RUnlock() + + operation := "hcs::Process::ResizeConsole" + + if process.handle == 0 { + return makeProcessError(process, operation, ErrAlreadyClosed) + } + modifyRequest := hcsschema.ProcessModifyRequest{ + Operation: guestrequest.ModifyProcessConsoleSize, + ConsoleSize: &hcsschema.ConsoleSize{ + Height: height, + Width: width, + }, + } + + modifyRequestb, err := json.Marshal(modifyRequest) + if err != nil { + return err + } + modifyRequestStr := string(modifyRequestb) + + _, modErr := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsModifyProcess(ctx, process.handle, op, modifyRequestStr) + }) + if modErr != nil { + return makeProcessError(process, operation, modErr) + } + + return nil +} + +// ExitCode returns the exit code of the process. The process must have +// already terminated. +func (process *Process) ExitCode() (int, error) { + if !process.stopped() { + return -1, makeProcessError(process, "hcs::Process::ExitCode", ErrInvalidProcessState) + } + if process.waitError != nil { + return -1, process.waitError + } + return process.exitCode, nil +} + +// StdioLegacy returns the stdin, stdout, and stderr pipes, respectively. Closing +// these pipes does not close the underlying pipes. Once returned, these pipes +// are the responsibility of the caller to close. +func (process *Process) StdioLegacy() (_ io.WriteCloser, _ io.ReadCloser, _ io.ReadCloser, err error) { + operation := "hcs::Process::StdioLegacy" + ctx, span := oc.StartSpan(context.Background(), operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + process.handleLock.RLock() + defer process.handleLock.RUnlock() + + if process.handle == 0 { + return nil, nil, nil, makeProcessError(process, operation, ErrAlreadyClosed) + } + + process.stdioLock.Lock() + defer process.stdioLock.Unlock() + if process.hasCachedStdio { + stdin, stdout, stderr := process.stdin, process.stdout, process.stderr + process.stdin, process.stdout, process.stderr = nil, nil, nil + process.hasCachedStdio = false + return stdin, stdout, stderr, nil + } + + processInfo, _, err := runProcessOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsGetProcessInfo(ctx, process.handle, op) + }) + if err != nil { + return nil, nil, nil, makeProcessError(process, operation, err) + } + + pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError}) + if err != nil { + return nil, nil, nil, makeProcessError(process, operation, err) + } + + return pipes[0], pipes[1], pipes[2], nil +} + +// Stdio returns the stdin, stdout, and stderr pipes, respectively. +// To close them, close the process handle, or use the `CloseStd*` functions. +func (process *Process) Stdio() (stdin io.Writer, stdout, stderr io.Reader) { + process.stdioLock.Lock() + defer process.stdioLock.Unlock() + return process.stdin, process.stdout, process.stderr +} + +// CloseStdin closes the write side of the stdin pipe so that the process is +// notified on the read side that there is no more data in stdin. +func (process *Process) CloseStdin(ctx context.Context) (err error) { + operation := "hcs::Process::CloseStdin" + ctx, span := trace.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + process.handleLock.RLock() + defer process.handleLock.RUnlock() + + if process.handle == 0 { + return makeProcessError(process, operation, ErrAlreadyClosed) + } + + // HcsModifyProcess request to close stdin will fail if the process has already exited + if !process.stopped() { + modifyRequest := hcsschema.ProcessModifyRequest{ + Operation: guestrequest.CloseProcessHandle, + CloseHandle: &hcsschema.CloseHandle{ + Handle: guestrequest.STDInHandle, + }, + } + + modifyRequestb, err := json.Marshal(modifyRequest) + if err != nil { + return err + } + modifyRequestStr := string(modifyRequestb) + + _, modErr := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsModifyProcess(ctx, process.handle, op, modifyRequestStr) + }) + if modErr != nil { + return makeProcessError(process, operation, modErr) + } + } + + process.stdioLock.Lock() + defer process.stdioLock.Unlock() + if process.stdin != nil { + process.stdin.Close() + process.stdin = nil + } + + return nil +} + +func (process *Process) CloseStdout(ctx context.Context) (err error) { + ctx, span := oc.StartSpan(ctx, "hcs::Process::CloseStdout") //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + process.handleLock.Lock() + defer process.handleLock.Unlock() + + if process.handle == 0 { + return nil + } + + process.stdioLock.Lock() + defer process.stdioLock.Unlock() + if process.stdout != nil { + process.stdout.Close() + process.stdout = nil + } + return nil +} + +func (process *Process) CloseStderr(ctx context.Context) (err error) { + ctx, span := oc.StartSpan(ctx, "hcs::Process::CloseStderr") //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + process.handleLock.Lock() + defer process.handleLock.Unlock() + + if process.handle == 0 { + return nil + } + + process.stdioLock.Lock() + defer process.stdioLock.Unlock() + if process.stderr != nil { + process.stderr.Close() + process.stderr = nil + } + return nil +} + +// Close cleans up any state associated with the process but does not kill +// or wait on it. +func (process *Process) Close() (err error) { + operation := "hcs::Process::Close" + ctx, span := oc.StartSpan(context.Background(), operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + + process.handleLock.Lock() + defer process.handleLock.Unlock() + + // Don't double free this + if process.handle == 0 { + return nil + } + + process.stdioLock.Lock() + if process.stdin != nil { + process.stdin.Close() + process.stdin = nil + } + if process.stdout != nil { + process.stdout.Close() + process.stdout = nil + } + if process.stderr != nil { + process.stderr.Close() + process.stderr = nil + } + process.stdioLock.Unlock() + + // HcsCloseProcess internally unregisters our notification callback + // and drains in-flight invocations before tearing the handle down. + computecore.HcsCloseProcess(ctx, process.handle) + unregisterNotificationContext(process.notificationID) + process.notificationID = 0 + + // Release Wait/ExitCode callers with ErrAlreadyClosed + // and unblock waitBackground. + process.handle = 0 + process.closedWaitOnce.Do(func() { + process.exitCode = -1 + process.waitError = ErrAlreadyClosed + close(process.waitBlock) + }) + + return nil +} + +// registerNotification registers the package-wide HCS notification callback +// on this process handle. Must be called BEFORE waitBackground starts so +// notifications are not missed. +func (process *Process) registerNotification(ctx context.Context) error { + id := registerNotificationContext(process.SystemID(), process.processID, process.notify, nil) + if err := computecore.HcsSetProcessCallback( + ctx, process.handle, + computecore.HcsEventOptionNone, + uintptr(id), notificationCallback, + ); err != nil { + unregisterNotificationContext(id) + return err + } + process.notificationID = id + return nil +} diff --git a/internal/hcs/v2/service.go b/internal/hcs/v2/service.go new file mode 100644 index 0000000000..a0133b7c76 --- /dev/null +++ b/internal/hcs/v2/service.go @@ -0,0 +1,51 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "encoding/json" + + "github.com/Microsoft/hcsshim/internal/computecore" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" +) + +// GetServiceProperties returns properties of the host compute service. +func GetServiceProperties(ctx context.Context, q hcsschema.PropertyQuery) (*hcsschema.ServiceProperties, error) { + operation := "hcs::GetServiceProperties" + + queryb, err := json.Marshal(q) + if err != nil { + return nil, err + } + propertiesJSON, err := computecore.HcsGetServiceProperties(ctx, string(queryb)) + if err != nil { + err = wrapHcsResult(ctx, err, propertiesJSON) + return nil, &HcsError{Op: operation, Err: err, Events: eventsFromError(err)} + } + + if propertiesJSON == "" { + return nil, ErrUnexpectedValue + } + properties := &hcsschema.ServiceProperties{} + if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { + return nil, err + } + return properties, nil +} + +// ModifyServiceSettings modifies settings of the host compute service. +func ModifyServiceSettings(ctx context.Context, settings hcsschema.ModificationRequest) error { + operation := "hcs::ModifyServiceSettings" + + settingsJSON, err := json.Marshal(settings) + if err != nil { + return err + } + resultJSON, err := computecore.HcsModifyServiceSettings(ctx, string(settingsJSON)) + if err != nil { + err = wrapHcsResult(ctx, err, resultJSON) + return &HcsError{Op: operation, Err: err, Events: eventsFromError(err)} + } + return nil +} diff --git a/internal/hcs/v2/system.go b/internal/hcs/v2/system.go new file mode 100644 index 0000000000..aeb03d7fcc --- /dev/null +++ b/internal/hcs/v2/system.go @@ -0,0 +1,1015 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "sync" + "syscall" + "time" + + "github.com/Microsoft/hcsshim/internal/computecore" + "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/hcs/schema1" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/jobobject" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/timeout" + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" +) + +type System struct { + handleLock sync.RWMutex + handle computecore.HcsSystem + id string + + // notificationID is the lookup key passed as the void* context to + // HcsSetComputeSystemCallback. Zero when no callback is registered. + notificationID uint64 + // notify rendezvous between the HCS notification callback + // (HcsEventSystemExited) and waitBackground. Close also signals it to + // release waitBackground without publishing an exit. + notify *notificationState + + closedWaitOnce sync.Once + waitBlock chan struct{} + waitError error + exitError error + os, typ, owner string + startTime time.Time + stopTime time.Time + + // migrationNotifyCh delivers live migration events from + // notificationHandler. Never closed (callers signal end-of-stream + // via their own context); sends are non-blocking and drop on overflow. + migrationNotifyCh chan hcsschema.OperationSystemMigrationNotificationInfo +} + +var _ cow.Container = &System{} +var _ cow.ProcessHost = &System{} + +func newSystem(id string) *System { + return &System{ + id: id, + waitBlock: make(chan struct{}), + notify: newNotificationState(), + migrationNotifyCh: make(chan hcsschema.OperationSystemMigrationNotificationInfo, migrationNotificationBufferSize), + } +} + +// Implementation detail for silo naming, this should NOT be relied upon very heavily. +func siloNameFmt(containerID string) string { + return fmt.Sprintf(`\Container_%s`, containerID) +} + +// CreateComputeSystem creates a new compute system with the given configuration but does not start it. +func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) { + operation := "hcs::CreateComputeSystem" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", id)) + + computeSystem := newSystem(id) + + hcsDocumentB, err := json.Marshal(hcsDocumentInterface) + if err != nil { + return nil, err + } + + hcsDocument := string(hcsDocumentB) + + // On any error after this point, tear down the compute system and + // release the handle. Terminate is guarded (no-op when handle == 0 + // or already stopped) so this is safe on every failure path, + // including a synchronous create failure. + defer func() { + if err != nil { + _ = computeSystem.Terminate(ctx) + computeSystem.Close() + } + }() + + createCtx, cancel := context.WithTimeout(ctx, timeout.SystemCreate) + defer cancel() + _, createErr := runOperation(createCtx, func(op computecore.HcsOperation) error { + var hErr error + computeSystem.handle, hErr = computecore.HcsCreateComputeSystem(ctx, id, hcsDocument, op, nil) + return hErr + }) + if createErr != nil { + return nil, makeSystemError(computeSystem, operation, createErr) + } + + if err = computeSystem.registerNotification(ctx); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + go computeSystem.waitBackground() + if err = computeSystem.getCachedProperties(ctx); err != nil { + return nil, err + } + return computeSystem, nil +} + +// OpenComputeSystem opens an existing compute system by ID. +func OpenComputeSystem(ctx context.Context, id string) (*System, error) { + operation := "hcs::OpenComputeSystem" + + computeSystem := newSystem(id) + handle, err := computecore.HcsOpenComputeSystem(ctx, id, syscall.GENERIC_ALL) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + computeSystem.handle = handle + defer func() { + if err != nil { + computeSystem.Close() + } + }() + if err = computeSystem.registerNotification(ctx); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + go computeSystem.waitBackground() + if err = computeSystem.getCachedProperties(ctx); err != nil { + return nil, err + } + return computeSystem, nil +} + +// registerNotification registers the package-wide HCS notification callback +// on this system's primary handle. Must be called BEFORE waitBackground +// starts so notifications are not missed. +func (computeSystem *System) registerNotification(ctx context.Context) error { + id := registerNotificationContext(computeSystem.id, 0, computeSystem.notify, computeSystem.migrationNotifyCh) + if err := computecore.HcsSetComputeSystemCallback( + ctx, computeSystem.handle, + computecore.HcsEventOptionEnableLiveMigrationEvents, + uintptr(id), notificationCallback, + ); err != nil { + unregisterNotificationContext(id) + return err + } + computeSystem.notificationID = id + return nil +} + +func (computeSystem *System) getCachedProperties(ctx context.Context) error { + props, err := computeSystem.Properties(ctx) + if err != nil { + return err + } + computeSystem.typ = strings.ToLower(props.SystemType) + computeSystem.os = strings.ToLower(props.RuntimeOSType) + computeSystem.owner = strings.ToLower(props.Owner) + if computeSystem.os == "" && computeSystem.typ == "container" { + // Pre-RS5 HCS did not return the OS, but it only supported containers + // that ran Windows. + computeSystem.os = "windows" + } + return nil +} + +// OS returns the operating system of the compute system, "linux" or "windows". +func (computeSystem *System) OS() string { + return computeSystem.os +} + +// IsOCI returns whether processes in the compute system should be created via +// OCI. +func (computeSystem *System) IsOCI() bool { + return computeSystem.os == "linux" && computeSystem.typ == "container" +} + +// GetComputeSystems gets a list of the compute systems on the system that match the query +func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) { + operation := "hcs::GetComputeSystems" + + queryb, err := json.Marshal(q) + if err != nil { + return nil, err + } + query := string(queryb) + + resultJSON, err := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsEnumerateComputeSystems(ctx, query, op) + }) + if err != nil { + return nil, &HcsError{Op: operation, Err: err, Events: eventsFromError(err)} + } + if resultJSON == "" { + return nil, ErrUnexpectedValue + } + computeSystems := []schema1.ContainerProperties{} + if err = json.Unmarshal([]byte(resultJSON), &computeSystems); err != nil { + return nil, err + } + + return computeSystems, nil +} + +// Start synchronously starts the computeSystem. +func (computeSystem *System) Start(ctx context.Context) (err error) { + operation := "hcs::System::Start" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + // Prevent starting an exited system: we do not recreate waitBlock or + // rerun waitBackground, so we have no way to be notified of it closing again. + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + startCtx, cancel := context.WithTimeout(ctx, timeout.SystemStart) + defer cancel() + _, callErr := runOperation(startCtx, func(op computecore.HcsOperation) error { + return computecore.HcsStartComputeSystem(ctx, computeSystem.handle, op, "") + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + computeSystem.startTime = time.Now() + return nil +} + +// ID returns the compute system's identifier. +func (computeSystem *System) ID() string { + return computeSystem.id +} + +// Shutdown requests a compute system shutdown. +func (computeSystem *System) Shutdown(ctx context.Context) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + operation := "hcs::System::Shutdown" + + if computeSystem.handle == 0 || computeSystem.stopped() { + return nil + } + + err := submitOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsShutDownComputeSystem(ctx, computeSystem.handle, op, "") + }) + if err != nil && + !errors.Is(err, ErrVmcomputeAlreadyStopped) && + !errors.Is(err, ErrComputeSystemDoesNotExist) && + !errors.Is(err, ErrVmcomputeOperationPending) { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// Terminate requests a compute system terminate. +func (computeSystem *System) Terminate(ctx context.Context) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + operation := "hcs::System::Terminate" + + if computeSystem.handle == 0 || computeSystem.stopped() { + return nil + } + + err := submitOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsTerminateComputeSystem(ctx, computeSystem.handle, op, "") + }) + if err != nil && + !errors.Is(err, ErrVmcomputeAlreadyStopped) && + !errors.Is(err, ErrComputeSystemDoesNotExist) && + !errors.Is(err, ErrVmcomputeOperationPending) { + return makeSystemError(computeSystem, operation, err) + } + return nil +} + +// waitBackground blocks until either the HCS callback delivers +// HcsEventSystemExited (real exit, publish status) or Close fires (release +// without publishing). It then sets `computeSystem.waitError` (if any) and +// unblocks all `Wait` calls. +// +// HCS does not deliver a final notification on HcsCloseComputeSystem — it +// just unregisters the callback — so Close needs its own signal. +// Prematurely closing WaitChannel() causes `hcsExec.waitForContainerExit` +// to Kill the container's running process and report exitCode=-1 +// (rendered as 255). +// +// This MUST be called exactly once per `computeSystem.handle` but `Wait` is +// safe to call multiple times. +func (computeSystem *System) waitBackground() { + operation := "hcs::System::waitBackground" + ctx, span := oc.StartSpan(context.Background(), operation) + defer span.End() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + var raw json.RawMessage + var err error + select { + case <-computeSystem.waitBlock: + log.G(ctx).Debug("system waitBackground returning without exit notification (handle closed)") + return + case raw = <-computeSystem.notify.exit: + case abortErr := <-computeSystem.notify.abort: + err = makeSystemError(computeSystem, operation, abortErr) + } + + if err == nil && len(raw) > 0 { + var status struct { + Status int32 `json:"Status"` + ExitType string `json:"ExitType"` + } + if uErr := json.Unmarshal(raw, &status); uErr != nil { + log.G(ctx).WithError(uErr).WithField("exit-data", string(raw)).Warning("failed to parse SystemExitStatus") + } else if status.ExitType == "UnexpectedExit" { + log.G(ctx).Debug("unexpected system exit") + computeSystem.exitError = makeSystemError(computeSystem, operation, ErrVmcomputeUnexpectedExit) + } + } + computeSystem.closedWaitOnce.Do(func() { + computeSystem.waitError = err + computeSystem.stopTime = time.Now() + close(computeSystem.waitBlock) + }) + oc.SetSpanStatus(span, err) +} + +func (computeSystem *System) WaitChannel() <-chan struct{} { + return computeSystem.waitBlock +} + +func (computeSystem *System) WaitError() error { + return computeSystem.waitError +} + +// Wait synchronously waits for the compute system to shutdown or terminate. +// If the compute system has already exited returns the previous error (if any). +func (computeSystem *System) Wait() error { + return computeSystem.WaitCtx(context.Background()) +} + +// WaitCtx synchronously waits for the compute system to shutdown or terminate, or the context to be cancelled. +// +// See [System.Wait] for more information. +func (computeSystem *System) WaitCtx(ctx context.Context) error { + select { + case <-computeSystem.WaitChannel(): + return computeSystem.WaitError() + case <-ctx.Done(): + return ctx.Err() + } +} + +// stopped returns true if the compute system stopped. +func (computeSystem *System) stopped() bool { + select { + case <-computeSystem.waitBlock: + return true + default: + } + return false +} + +// ExitError returns an error describing the reason the compute system terminated. +func (computeSystem *System) ExitError() error { + if !computeSystem.stopped() { + return errors.New("container not exited") + } + if computeSystem.waitError != nil { + return computeSystem.waitError + } + return computeSystem.exitError +} + +// Properties returns the requested container properties targeting a V1 schema container. +func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + operation := "hcs::System::Properties" + + if computeSystem.handle == 0 { + return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types}) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + query := string(queryBytes) + + propertiesJSON, err := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsGetComputeSystemProperties(ctx, computeSystem.handle, op, query) + }) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + if propertiesJSON == "" { + return nil, ErrUnexpectedValue + } + properties := &schema1.ContainerProperties{} + if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + return properties, nil +} + +// openSilo opens the container's server silo job object by its well-known name +// (`\Container_`). HCS owns the silo; the only way to open it from the shim is +// by name, and only while running as SYSTEM. The caller owns the returned handle and +// must Close it. +// +// In the future we can make use of some new functionality in HCS that allows you to +// pass a job object for HCS to use for the container. +func (computeSystem *System) openSilo(ctx context.Context) (*jobobject.JobObject, error) { + return jobobject.Open(ctx, &jobobject.Options{ + UseNTVariant: true, + Name: siloNameFmt(computeSystem.id), + }) +} + +// queryInProc handles querying for container properties without reaching out to HCS. `props` +// will be updated to contain any data returned from the queries present in `types`. If any properties +// failed to be queried they will be tallied up and returned in as the first return value. Failures on +// query are NOT considered errors; the only failure case for this method is if the containers job object +// cannot be opened. +func (computeSystem *System) queryInProc( + ctx context.Context, + props *hcsschema.Properties, + types []hcsschema.PropertyType, +) ([]hcsschema.PropertyType, error) { + job, err := computeSystem.openSilo(ctx) + if err != nil { + return nil, err + } + defer job.Close() + + var fallbackQueryTypes []hcsschema.PropertyType + for _, propType := range types { + switch propType { + case hcsschema.PTStatistics: + // Handle a bad caller asking for the same type twice. No use in re-querying if this is + // filled in already. + if props.Statistics == nil { + props.Statistics, err = computeSystem.statisticsInProc(job) + if err != nil { + log.G(ctx).WithError(err).Warn("failed to get statistics in-proc") + + fallbackQueryTypes = append(fallbackQueryTypes, propType) + } + } + default: + fallbackQueryTypes = append(fallbackQueryTypes, propType) + } + } + + return fallbackQueryTypes, nil +} + +// statisticsInProc emulates what HCS does to grab statistics for a given container with a small +// change to make grabbing the private working set total much more efficient. +func (computeSystem *System) statisticsInProc(job *jobobject.JobObject) (*hcsschema.Statistics, error) { + // Start timestamp for these stats before we grab them to match HCS + timestamp := time.Now() + + memInfo, err := job.QueryMemoryStats() + if err != nil { + return nil, err + } + + processorInfo, err := job.QueryProcessorStats() + if err != nil { + return nil, err + } + + storageInfo, err := job.QueryStorageStats() + if err != nil { + return nil, err + } + + // This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation + // with the class SystemProcessInformation which returns an array containing system information for *every* + // process running on the machine. They then grab the pids that are running in the container and filter down + // the entries in the array to only what's running in that silo and start tallying up the total. This doesn't + // work well as performance should get worse if more processess are running on the machine in general and not + // just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored + // as well which isn't great and is wasted work to fetch. + // + // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private + // working set ourselves and ask for everything else separately. The optimization we can make here is + // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating + // the private working set in a more efficient manner by: + // + // 1. Find the pids running in the silo + // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) + // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters + // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. + privateWorkingSet, err := job.QueryPrivateWorkingSet() + if err != nil { + return nil, err + } + + return &hcsschema.Statistics{ + Timestamp: timestamp, + ContainerStartTime: computeSystem.startTime, + Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, + Memory: &hcsschema.MemoryStats{ + MemoryUsageCommitBytes: memInfo.JobMemory, + MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, + MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, + }, + Processor: &hcsschema.ProcessorStats{ + RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), + RuntimeUser100ns: uint64(processorInfo.TotalUserTime), + TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), + }, + Storage: &hcsschema.StorageStats{ + ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), + ReadSizeBytes: storageInfo.ReadStats.TotalSize, + WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), + WriteSizeBytes: storageInfo.WriteStats.TotalSize, + }, + }, nil +} + +// hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types. +func (computeSystem *System) hcsPropertiesV2Query(ctx context.Context, types []hcsschema.PropertyType) (*hcsschema.Properties, error) { + operation := "hcs::System::PropertiesV2" + + if computeSystem.handle == 0 { + return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types}) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + query := string(queryBytes) + + propertiesJSON, err := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsGetComputeSystemProperties(ctx, computeSystem.handle, op, query) + }) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + if propertiesJSON == "" { + return nil, ErrUnexpectedValue + } + props := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + return props, nil +} + +// PropertiesV2 returns the requested compute systems properties targeting a V2 schema compute system. +func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + // Let HCS tally up the total for VM based queries instead of querying ourselves. + if computeSystem.typ != "container" { + return computeSystem.hcsPropertiesV2Query(ctx, types) + } + + // Define a starter Properties struct with the default fields returned from every + // query. Owner is only returned from Statistics but it's harmless to include. + properties := &hcsschema.Properties{ + Id: computeSystem.id, + SystemType: computeSystem.typ, + RuntimeOsType: computeSystem.os, + Owner: computeSystem.owner, + } + + logEntry := log.G(ctx) + // First lets try and query ourselves without reaching to HCS. If any of the queries fail + // we'll take note and fallback to querying HCS for any of the failed types. + fallbackTypes, err := computeSystem.queryInProc(ctx, properties, types) + if err == nil && len(fallbackTypes) == 0 { + return properties, nil + } else if err != nil { + logEntry = logEntry.WithError(fmt.Errorf("failed to query compute system properties in-proc: %w", err)) + fallbackTypes = types + } + + logEntry.WithFields(logrus.Fields{ + logfields.ContainerID: computeSystem.id, + "propertyTypes": fallbackTypes, + }).Info("falling back to HCS for property type queries") + + hcsProperties, err := computeSystem.hcsPropertiesV2Query(ctx, fallbackTypes) + if err != nil { + return nil, err + } + + // Now add in anything that we might have successfully queried in process. + if properties.Statistics != nil { + hcsProperties.Statistics = properties.Statistics + hcsProperties.Owner = properties.Owner + } + + // For future support for querying processlist in-proc as well. + if properties.ProcessList != nil { + hcsProperties.ProcessList = properties.ProcessList + } + + return hcsProperties, nil +} + +// PropertiesV3 returns the requested compute system properties using a V2 schema property query. +// Unlike [System.PropertiesV2], this method accepts a full [hcsschema.PropertyQuery] directly, +// giving the caller more control over the query structure. The query is forwarded to HCS as-is +// without any in-proc optimisations such as that is V2. +func (computeSystem *System) PropertiesV3(ctx context.Context, query *hcsschema.PropertyQuery) (_ *hcsschema.Properties, err error) { + operation := "hcs::System::PropertiesV3" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + log.G(ctx).WithFields(logrus.Fields{ + logfields.ContainerID: computeSystem.id, + "propertyTypes": query.PropertyTypes, + "propertyQueries": query.Queries, + }).Debug("querying compute system properties via PropertiesV3") + + queryBytes, err := json.Marshal(query) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + queryStr := string(queryBytes) + + propertiesJSON, err := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsGetComputeSystemProperties(ctx, computeSystem.handle, op, queryStr) + }) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + if propertiesJSON == "" { + return nil, ErrUnexpectedValue + } + + props := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + return props, nil +} + +// Pause pauses the execution of the computeSystem. This feature is not enabled in TP5. +func (computeSystem *System) Pause(ctx context.Context) (err error) { + operation := "hcs::System::Pause" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + pauseCtx, cancel := context.WithTimeout(ctx, timeout.SystemPause) + defer cancel() + _, callErr := runOperation(pauseCtx, func(op computecore.HcsOperation) error { + return computecore.HcsPauseComputeSystem(ctx, computeSystem.handle, op, "") + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + return nil +} + +// Resume resumes the execution of the computeSystem. This feature is not enabled in TP5. +func (computeSystem *System) Resume(ctx context.Context) (err error) { + operation := "hcs::System::Resume" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + resumeCtx, cancel := context.WithTimeout(ctx, timeout.SystemResume) + defer cancel() + _, callErr := runOperation(resumeCtx, func(op computecore.HcsOperation) error { + return computecore.HcsResumeComputeSystem(ctx, computeSystem.handle, op, "") + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + return nil +} + +// Save the compute system +func (computeSystem *System) Save(ctx context.Context, options interface{}) (err error) { + operation := "hcs::System::Save" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + saveOptions, err := json.Marshal(options) + if err != nil { + return err + } + saveOptionsStr := string(saveOptions) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + saveCtx, cancel := context.WithTimeout(ctx, timeout.SystemSave) + defer cancel() + _, callErr := runOperation(saveCtx, func(op computecore.HcsOperation) error { + return computecore.HcsSaveComputeSystem(ctx, computeSystem.handle, op, saveOptionsStr) + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + return nil +} + +// createProcess launches a process in the compute system and returns its +// handle plus the stdio info HCS produced. +// +// On process-isolated containers (observed on WS2022) HcsCreateProcess can +// complete synchronously and leave the tracking operation in a state where +// the wait spuriously fails (e.g. E_INVALIDARG) even though HCS handed back +// a valid process handle. Recover via HcsGetProcessInfo, but only on wait +// failure — re-fetching after a successful wait races a short-lived process +// exit and surfaces as HCS_E_PROCESS_ALREADY_STOPPED, which would wrongly +// fail the create. +func (computeSystem *System) createProcess(ctx context.Context, operation string, c interface{}) (*Process, *computecore.HcsProcessInformation, error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return nil, nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + configurationb, err := json.Marshal(c) + if err != nil { + return nil, nil, makeSystemError(computeSystem, operation, err) + } + configuration := string(configurationb) + + // Tag the operation label with the offending command line so error logs + // identify what HCS rejected. + switch v := c.(type) { + case *hcsschema.ProcessParameters: + operation += ": " + v.CommandLine + case *schema1.ProcessConfig: + operation += ": " + v.CommandLine + } + + var processHandle computecore.HcsProcess + processInfo, _, createErr := runProcessOperation(ctx, func(op computecore.HcsOperation) error { + var hErr error + processHandle, hErr = computecore.HcsCreateProcess(ctx, computeSystem.handle, configuration, op, nil) + return hErr + }) + if createErr != nil { + // No handle means the create itself failed; only a failed wait with + // a live handle is the recoverable sync-completion case. + if processHandle == 0 { + return nil, nil, makeSystemError(computeSystem, operation, createErr) + } + + log.G(ctx).WithError(createErr).Debug("HcsCreateProcess wait failed; falling back to HcsGetProcessInfo") + var recoverErr error + processInfo, _, recoverErr = runProcessOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsGetProcessInfo(ctx, processHandle, op) + }) + if recoverErr != nil { + // Recovery error (e.g. RPC_E_NULL_CONTEXT_HANDLE on a stale handle) + // hides the real cause; surface the original create-wait error. + computecore.HcsCloseProcess(ctx, processHandle) + return nil, nil, makeSystemError(computeSystem, operation, createErr) + } + } + + log.G(ctx).WithField("pid", processInfo.ProcessID).Debug("created process pid") + return newProcess(processHandle, int(processInfo.ProcessID), computeSystem), &processInfo, nil +} + +// CreateProcess launches a new process within the computeSystem. +func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) { + operation := "hcs::System::CreateProcess" + process, processInfo, err := computeSystem.createProcess(ctx, operation, c) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + process.Close() + } + }() + + pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError}) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + process.stdin = pipes[0] + process.stdout = pipes[1] + process.stderr = pipes[2] + process.hasCachedStdio = true + + if err = process.registerNotification(ctx); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + go process.waitBackground() + + return process, nil +} + +// OpenProcess gets an interface to an existing process within the computeSystem. +func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + operation := "hcs::System::OpenProcess" + + if computeSystem.handle == 0 { + return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + processHandle, err := computecore.HcsOpenProcess(ctx, computeSystem.handle, uint32(pid), syscall.GENERIC_ALL) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + + process := newProcess(processHandle, pid, computeSystem) + defer func() { + if err != nil { + _ = process.Close() + } + }() + if err = process.registerNotification(ctx); err != nil { + return nil, makeSystemError(computeSystem, operation, err) + } + go process.waitBackground() + + return process, nil +} + +// Close cleans up any state associated with the compute system but does not terminate or wait for it. +func (computeSystem *System) Close() error { + return computeSystem.CloseCtx(context.Background()) +} + +// CloseCtx is similar to [System.Close], but accepts a context. +// +// The context is used for all operations, including waits, so timeouts/cancellations may prevent +// proper system cleanup. +func (computeSystem *System) CloseCtx(ctx context.Context) (err error) { + operation := "hcs::System::Close" + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + // Don't double free this + if computeSystem.handle == 0 { + return nil + } + + // HcsCloseComputeSystem internally unregisters our notification + // callback and drains in-flight invocations before tearing the handle + // down. + computecore.HcsCloseComputeSystem(ctx, computeSystem.handle) + unregisterNotificationContext(computeSystem.notificationID) + computeSystem.notificationID = 0 + + // Release Wait/WaitChannel callers with ErrAlreadyClosed + // and unblock waitBackground. + computeSystem.handle = 0 + computeSystem.closedWaitOnce.Do(func() { + computeSystem.waitError = ErrAlreadyClosed + computeSystem.stopTime = time.Now() + close(computeSystem.waitBlock) + }) + + return nil +} + +// Modify the System by sending a request to HCS +func (computeSystem *System) Modify(ctx context.Context, config interface{}) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + operation := "hcs::System::Modify" + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed) + } + + requestBytes, err := json.Marshal(config) + if err != nil { + return err + } + + requestJSON := string(requestBytes) + + _, callErr := runOperation(ctx, func(op computecore.HcsOperation) error { + return computecore.HcsModifyComputeSystem(ctx, computeSystem.handle, op, requestJSON, 0) + }) + if callErr != nil { + return makeSystemError(computeSystem, operation, callErr) + } + return nil +} + +// SetCPUGroupAffinities pins the container's server silo to the given processor +// group affinities. HCS does not expose a CPU-affinity field on the container Processor +// schema, so for process-isolated (Argon) containers we set the affinity directly on the +// silo's job object via SetInformationJobObject(JobObjectGroupInformationEx). +// +// HCS owns the silo; we only open a transient handle (by the silo's well-known job name, +// the same handle queryInProc opens) to record the affinity property. The kernel enforces +// it on every process that joins the silo via AssignProcessToJobObject — including the init +// process at Start and any descendants it spawns. +// +// This must be called after the compute system is created but before it is started, so the +// affinity is already recorded on the job when HCS assigns the init process. Applying it to +// an already-running silo is also safe: the kernel re-applies the mask to current members and +// migrates threads at the next scheduling dispatch. +// +// It implements the cow.Container interface. +func (computeSystem *System) SetCPUGroupAffinities(ctx context.Context, affinities []jobobject.GroupAffinity) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + // Guard the compute system's lifecycle while we touch its silo: the RLock blocks + // a concurrent Close(), and handle == 0 means it is already torn down. + if computeSystem.handle == 0 { + return fmt.Errorf("set cpu group affinities on %s silo: %w", computeSystem.ID(), ErrAlreadyClosed) + } + // The silo job object only exists for containers, not VM-based compute systems. + if computeSystem.typ != "container" { + return fmt.Errorf("cpu group affinities are only supported on container compute systems, got %q", computeSystem.typ) + } + + job, err := computeSystem.openSilo(ctx) + if err != nil { + return fmt.Errorf("open %s silo: %w", computeSystem.ID(), err) + } + defer job.Close() + + if err := job.SetCPUGroupAffinities(affinities); err != nil { + return fmt.Errorf("set cpu group affinities on %s silo: %w", computeSystem.ID(), err) + } + return nil +} + +func (computeSystem *System) StoppedTime() time.Time { + return computeSystem.stopTime +} + +func (computeSystem *System) StartedTime() time.Time { + return computeSystem.startTime +} diff --git a/internal/hcs/v2/utils.go b/internal/hcs/v2/utils.go new file mode 100644 index 0000000000..7b4240a994 --- /dev/null +++ b/internal/hcs/v2/utils.go @@ -0,0 +1,72 @@ +//go:build windows + +package hcsv2 + +import ( + "context" + "io" + "syscall" + + "github.com/Microsoft/go-winio" + diskutil "github.com/Microsoft/go-winio/vhd" + "github.com/Microsoft/hcsshim/computestorage" + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// makeOpenFiles wraps each handle in input as an overlapped I/O file, returning +// a parallel slice (typically the stdin/stdout/stderr of a compute-system +// process). On any wrap failure, all opened files are closed and the +// remaining unwrapped handles are closed before returning the error. +// +// Handles equal to 0 (NULL) or INVALID_HANDLE_VALUE are treated as "not +// present" and map to a nil entry in the result. HCS APIs such as +// HcsGetProcessInfo use INVALID_HANDLE_VALUE to indicate std streams the +// caller did not request; passing that value to NewOpenFile would +// incorrectly bind the sentinel as if it were a valid pipe handle. +func makeOpenFiles(hs []syscall.Handle) (_ []io.ReadWriteCloser, err error) { + fs := make([]io.ReadWriteCloser, len(hs)) + for i, h := range hs { + if h != syscall.Handle(0) && h != syscall.Handle(windows.InvalidHandle) { + if err == nil { + fs[i], err = winio.NewOpenFile(windows.Handle(h)) + } + if err != nil { + syscall.Close(h) + } + } + } + if err != nil { + for _, f := range fs { + if f != nil { + f.Close() + } + } + return nil, err + } + return fs, nil +} + +// CreateNTFSVHD creates a VHD formatted with NTFS of size `sizeGB` at the given `vhdPath`. +func CreateNTFSVHD(ctx context.Context, vhdPath string, sizeGB uint32) (err error) { + if err := diskutil.CreateVhdx(vhdPath, sizeGB, 1); err != nil { + return errors.Wrap(err, "failed to create VHD") + } + + vhd, err := diskutil.OpenVirtualDisk(vhdPath, diskutil.VirtualDiskAccessNone, diskutil.OpenVirtualDiskFlagNone) + if err != nil { + return errors.Wrap(err, "failed to open VHD") + } + defer func() { + err2 := windows.CloseHandle(windows.Handle(vhd)) + if err == nil { + err = errors.Wrap(err2, "failed to close VHD") + } + }() + + if err := computestorage.FormatWritableLayerVhd(ctx, windows.Handle(vhd)); err != nil { + return errors.Wrap(err, "failed to format VHD") + } + + return nil +} diff --git a/internal/hcsoci/cpuaffinity.go b/internal/hcsoci/cpuaffinity.go index f794259103..15b95dcec9 100644 --- a/internal/hcsoci/cpuaffinity.go +++ b/internal/hcsoci/cpuaffinity.go @@ -10,7 +10,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/jobobject" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/osversion" diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index d59007e29b..9358c5c85d 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -17,8 +17,8 @@ import ( "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/guestpath" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/hvsocket" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" diff --git a/internal/jobcontainers/jobcontainer.go b/internal/jobcontainers/jobcontainer.go index d0b560f9d2..fc670e5827 100644 --- a/internal/jobcontainers/jobcontainer.go +++ b/internal/jobcontainers/jobcontainer.go @@ -17,9 +17,9 @@ import ( "github.com/Microsoft/hcsshim/internal/conpty" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/exec" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/jobobject" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" diff --git a/internal/jobcontainers/process.go b/internal/jobcontainers/process.go index cf3318f3b7..91510e2a51 100644 --- a/internal/jobcontainers/process.go +++ b/internal/jobcontainers/process.go @@ -15,7 +15,7 @@ import ( "github.com/Microsoft/hcsshim/internal/conpty" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/exec" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/winapi" diff --git a/internal/processorinfo/host_information.go b/internal/processorinfo/host_information.go index ebeb2ec3df..d82e0667d5 100644 --- a/internal/processorinfo/host_information.go +++ b/internal/processorinfo/host_information.go @@ -8,8 +8,8 @@ import ( "errors" "fmt" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" ) // HostProcessorInfo queries HCS for the host's processor information, including topology diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 65ce81901b..d2723243d4 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -16,8 +16,8 @@ import ( "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/internal/cow" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" diff --git a/internal/uvm/plan9.go b/internal/uvm/plan9.go index 555d12cecf..5e3cf9c3ec 100644 --- a/internal/uvm/plan9.go +++ b/internal/uvm/plan9.go @@ -8,9 +8,9 @@ import ( "fmt" "strconv" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/vm/vmutils" diff --git a/internal/uvm/scsi/backend.go b/internal/uvm/scsi/backend.go index 6ec4208d8a..2d6e6594dc 100644 --- a/internal/uvm/scsi/backend.go +++ b/internal/uvm/scsi/backend.go @@ -8,9 +8,9 @@ import ( "fmt" "github.com/Microsoft/hcsshim/internal/gcs" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 2edc099701..45755b8643 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -13,7 +13,7 @@ import ( "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/gcs" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/uvm/scsi" ) diff --git a/internal/uvm/vsmb.go b/internal/uvm/vsmb.go index 314425702a..dff2eb3121 100644 --- a/internal/uvm/vsmb.go +++ b/internal/uvm/vsmb.go @@ -14,9 +14,9 @@ import ( "github.com/sirupsen/logrus" "golang.org/x/sys/windows" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/winapi" diff --git a/internal/vm/vmmanager/uvm.go b/internal/vm/vmmanager/uvm.go index ccff4747fb..8a2b9105a8 100644 --- a/internal/vm/vmmanager/uvm.go +++ b/internal/vm/vmmanager/uvm.go @@ -6,8 +6,8 @@ import ( "context" "fmt" - "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" diff --git a/internal/vm/vmutils/gcs_logs.go b/internal/vm/vmutils/gcs_logs.go index 62ce3c8cb3..117fdcf6b2 100644 --- a/internal/vm/vmutils/gcs_logs.go +++ b/internal/vm/vmutils/gcs_logs.go @@ -8,7 +8,7 @@ import ( "io" "time" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" diff --git a/internal/vm/vmutils/utils.go b/internal/vm/vmutils/utils.go index e5b1a51748..dff7ad371a 100644 --- a/internal/vm/vmutils/utils.go +++ b/internal/vm/vmutils/utils.go @@ -10,7 +10,7 @@ import ( "path/filepath" runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/log" "github.com/containerd/typeurl/v2" diff --git a/test/functional/uvm_plannine_test.go b/test/functional/uvm_plannine_test.go index 3b0b3d582d..4b0665287b 100644 --- a/test/functional/uvm_plannine_test.go +++ b/test/functional/uvm_plannine_test.go @@ -12,7 +12,7 @@ import ( "path/filepath" "testing" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/osversion" diff --git a/test/functional/uvm_vsmb_test.go b/test/functional/uvm_vsmb_test.go index 7914d666cd..27b9b97c8d 100644 --- a/test/functional/uvm_vsmb_test.go +++ b/test/functional/uvm_vsmb_test.go @@ -11,7 +11,7 @@ import ( "path/filepath" "testing" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/osversion" "github.com/Microsoft/hcsshim/test/internal/util" diff --git a/test/internal/scratch.go b/test/internal/scratch.go index 8afef0533d..a712a670c3 100644 --- a/test/internal/scratch.go +++ b/test/internal/scratch.go @@ -7,7 +7,7 @@ import ( "path/filepath" "testing" - "github.com/Microsoft/hcsshim/internal/hcs" + hcs "github.com/Microsoft/hcsshim/internal/hcs/v2" "github.com/Microsoft/hcsshim/internal/lcow" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/wclayer" diff --git a/test/pkg/definitions/hcs/hcs.go b/test/pkg/definitions/hcs/hcs.go index bb66064e51..e6e36cc1d4 100644 --- a/test/pkg/definitions/hcs/hcs.go +++ b/test/pkg/definitions/hcs/hcs.go @@ -3,7 +3,7 @@ package hcs import ( - internalhcs "github.com/Microsoft/hcsshim/internal/hcs" + internalhcs "github.com/Microsoft/hcsshim/internal/hcs/v2" ) var (