Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions acceptance/experimental/air/status/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions acceptance/experimental/air/status/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

=== status (text)
>>> [CLI] experimental air status 123
Run ID: 123
Status: SUCCESS
Submitted: [TIMESTAMP]
Duration: 12s
Retries: 0
Experiment: my-exp
User: user@example.com
Accelerators: 8x H100
MLflow: [DATABRICKS_URL]/ml/experiments/exp1/runs/run1/artifacts/logs/node_0
Dashboard: https://my-workspace.cloud.databricks.test/jobs/runs/123

=== status (json)
>>> [CLI] experimental air status 123 -o json
{
"v": 1,
"ts": "[TIMESTAMP]",
"data": {
"run_id": "123",
"status": "SUCCESS",
"started_at": "[TIMESTAMP]",
"duration_seconds": 12,
"attempt_number": 0,
"experiment_name": "my-exp",
"dashboard_url": "https://my-workspace.cloud.databricks.test/jobs/runs/123",
"mlflow_url": "[DATABRICKS_URL]/ml/experiments/exp1/runs/run1/artifacts/logs/node_0"
}
}

=== invalid run id
>>> [CLI] experimental air status notanumber
Error: invalid RUN_ID "notanumber": must be a positive integer

Exit code: 1
8 changes: 8 additions & 0 deletions acceptance/experimental/air/status/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
title "status (text)"
trace $CLI experimental air status 123

title "status (json)"
trace $CLI experimental air status 123 -o json

title "invalid run id"
errcode trace $CLI experimental air status notanumber
40 changes: 40 additions & 0 deletions acceptance/experimental/air/status/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This command does not deploy a bundle, so no engine matrix is needed.
[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = []

# The SDK occasionally probes host reachability with a HEAD request; stub it so
# the test is deterministic.
[[Server]]
Pattern = "HEAD /"
Response.Body = ''

# A single GenAI-compute run with an experiment, GPUs, and a creator.
[[Server]]
Pattern = "GET /api/2.2/jobs/runs/get"
Response.Body = '''
{
"run_id": 123,
"run_page_url": "https://my-workspace.cloud.databricks.test/jobs/runs/123",
"creator_user_name": "user@example.com",
"start_time": 1700000000000,
"end_time": 1700000012000,
"state": {"life_cycle_state": "TERMINATED", "result_state": "SUCCESS"},
"tasks": [
{
"task_key": "train",
"attempt_number": 0,
"gen_ai_compute_task": {
"mlflow_experiment_name": "/Users/user@example.com/my-exp",
"compute": {"gpu_type": "GPU_8xH100", "num_gpus": 8}
}
}
]
}
'''

# MLflow identifiers for the deep-link (runs/get-output is not modeled by the typed SDK).
[[Server]]
Pattern = "GET /api/2.2/jobs/runs/get-output"
Response.Body = '''
{"gen_ai_compute_output": {"run_info": {"mlflow_experiment_id": "exp1", "mlflow_run_id": "run1"}}}
'''
6 changes: 0 additions & 6 deletions acceptance/experimental/air/unimplemented/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ Error: `air run` is not implemented yet

Exit code: 1

=== status
>>> [CLI] experimental air status 123
Error: `air status` is not implemented yet

Exit code: 1

=== list
>>> [CLI] experimental air list
Error: `air list` is not implemented yet
Expand Down
3 changes: 0 additions & 3 deletions acceptance/experimental/air/unimplemented/script
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
title "run"
errcode trace $CLI experimental air run

title "status"
errcode trace $CLI experimental air status 123

title "list"
errcode trace $CLI experimental air list

Expand Down
154 changes: 154 additions & 0 deletions experimental/air/cmd/format.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package aircmd

import (
"fmt"
"strings"
"time"

"github.com/databricks/databricks-sdk-go/service/jobs"
)

// gpuDisplayNames maps the GPU identifiers returned by the backend to the short
// names we show to users. Unknown identifiers are shown unchanged.
var gpuDisplayNames = map[string]string{
"h100_80gb": "H100",
"a10": "A10",
"GPU_1xA10": "A10",
"GPU_8xH100": "H100",
"GPU_1xH100": "H100",
}

// runStatus returns the single status word to show for a run. The backend
// reports two values: a lifecycle state (e.g. PENDING, RUNNING) and, once the
// run has finished, a result state (e.g. SUCCESS, FAILED). The result state is
// the more meaningful one, so we prefer it when it is set.
func runStatus(state *jobs.RunState) string {
if state == nil {
return "UNKNOWN"
}
if state.ResultState != "" {
return string(state.ResultState)
}
if state.LifeCycleState != "" {
return string(state.LifeCycleState)
}
return "UNKNOWN"
}

// startedAt converts the run's start time (epoch milliseconds) to an RFC 3339
// UTC string, or returns nil if the run has not started yet.
func startedAt(run *jobs.Run) *string {
if run.StartTime == 0 {
return nil
}
s := time.UnixMilli(run.StartTime).UTC().Format(time.RFC3339)
return &s
}

// durationSeconds returns how long the run has taken, in whole seconds, or nil
// if it has not started. For a finished run this is the elapsed time; for a
// still-running run it is the time since it started.
func durationSeconds(run *jobs.Run) *int64 {
if run.StartTime == 0 {
return nil
}

var endMillis int64
switch {
case run.RunDuration > 0:
// The backend already computed the duration for us.
d := run.RunDuration / 1000
return &d
case run.EndTime > 0:
endMillis = run.EndTime
default:
// Still running: measure against the current time.
endMillis = time.Now().UnixMilli()
}

d := (endMillis - run.StartTime) / 1000
return &d
}

// formatDuration turns a number of seconds into a compact human string such as
// "1h 2m 3s". Trailing zero units are dropped, but a lone "0s" is kept so the
// result is never empty.
func formatDuration(totalSeconds int64) string {
hours := totalSeconds / 3600
minutes := (totalSeconds % 3600) / 60
seconds := totalSeconds % 60

var parts []string
if hours > 0 {
parts = append(parts, fmt.Sprintf("%dh", hours))
}
if minutes > 0 {
parts = append(parts, fmt.Sprintf("%dm", minutes))
}
if seconds > 0 || len(parts) == 0 {
parts = append(parts, fmt.Sprintf("%ds", seconds))
}
return strings.Join(parts, " ")
}

// latestAttemptNumber returns the retry count of the run's most recent task.
// Tasks start at attempt 0, so a value of 0 means the run has not been retried.
func latestAttemptNumber(run *jobs.Run) int {
if len(run.Tasks) == 0 {
return 0
}
return run.Tasks[len(run.Tasks)-1].AttemptNumber
}

// experimentName returns the MLflow experiment name for the run, or nil if there
// isn't one. Experiment names are often stored under a user's home folder (e.g.
// "/Users/me@example.com/my-experiment"); we strip that prefix so users see just
// the experiment name they chose.
func experimentName(run *jobs.Run) *string {
if len(run.Tasks) == 0 {
return nil
}
task := run.Tasks[0].GenAiComputeTask
if task == nil || task.MlflowExperimentName == "" {
return nil
}
name := stripExperimentUserPrefix(task.MlflowExperimentName)
return &name
}

// stripExperimentUserPrefix removes a leading "/Users/<user>/" from an
// experiment name, leaving the remainder. Names without that prefix are returned
// unchanged.
func stripExperimentUserPrefix(name string) string {
if !strings.HasPrefix(name, "/Users/") {
return name
}
// Split into ["", "Users", "<user>", "<rest>"]; keep "<rest>".
parts := strings.SplitN(name, "/", 4)
if len(parts) == 4 {
return parts[3]
}
return name
}

// accelerators returns a short description of the GPUs the run uses, such as
// "8x H100", or an empty string if the run has no GPU compute attached.
func accelerators(run *jobs.Run) string {
if len(run.Tasks) == 0 {
return ""
}
task := run.Tasks[0].GenAiComputeTask
if task == nil || task.Compute == nil || task.Compute.NumGpus == 0 {
return ""
}
return fmt.Sprintf("%dx %s", task.Compute.NumGpus, gpuDisplayName(task.Compute.GpuType))
}

// gpuDisplayName returns the friendly name for a GPU identifier, falling back to
// the identifier itself when it is not one we recognize.
func gpuDisplayName(gpuType string) string {
if name, ok := gpuDisplayNames[gpuType]; ok {
return name
}
return gpuType
}
Loading
Loading