Get GPU cluster by cluster ID

Together AI SDK (v2)

from together import Together
client = Together()

cluster = client.beta.clusters.retrieve("cluster_id")
print(cluster)

import Together from "together-ai";
const client = new Together();

const cluster = await client.beta.clusters.retrieve("cluster_id");
console.log(cluster);

import Together from "together-ai";
const client = new Together();

const cluster = await client.beta.clusters.retrieve("cluster_id");
console.log(cluster);

tg beta clusters get <cluster_id>

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.together.ai/v1/compute/clusters/{cluster_id}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.together.ai/v1/compute/clusters/{cluster_id}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.together.ai/v1/compute/clusters/{cluster_id}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.together.ai/v1/compute/clusters/{cluster_id}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "cluster_id": "<string>",
  "region": "<string>",
  "cluster_name": "<string>",
  "volumes": [
    {
      "volume_id": "<string>",
      "volume_name": "<string>",
      "size_tib": 123,
      "status": "<string>"
    }
  ],
  "control_plane_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "memory_gib": 123,
      "network": "<string>",
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "public_ipv4": "<string>"
    }
  ],
  "gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>",
        "linked_alerts": [
          {
            "passive_health_check_alert_id": "<string>",
            "cluster_id": "<string>",
            "target_vm": "<string>",
            "alert_name": "<string>",
            "annotations": {},
            "started_at": "2023-11-07T05:31:56Z",
            "instance_id": "<string>",
            "resolved_at": "2023-11-07T05:31:56Z",
            "node_remediation_intent_id": "<string>"
          }
        ]
      },
      "slurm_worker_hostname": "<string>",
      "marked_for_deletion": true,
      "public_ipv4": "<string>",
      "ib_hca_type": "<string>",
      "ib_hca_count": 123,
      "nvswitch_count": 123,
      "nvswitch_type": "<string>",
      "ephemeral_storage": "<string>",
      "auto_remediation_enabled": true,
      "deleted_at": "2023-11-07T05:31:56Z"
    }
  ],
  "kube_config": "<string>",
  "num_gpus": 123,
  "cuda_version": "<string>",
  "nvidia_driver_version": "<string>",
  "project_id": "<string>",
  "num_cpu_workers": 123,
  "phase_transitions": [
    {
      "transition_time": "2023-11-07T05:31:56Z"
    }
  ],
  "desired_preemptible_gpus": 123,
  "allocated_preemptible_gpus": 123,
  "add_ons": [
    {
      "name": "<string>",
      "add_on_type": "<string>",
      "config": {
        "dashboard": {
          "enabled": true
        },
        "ingress": {
          "enabled": true
        },
        "torchpass": {
          "enabled": true
        }
      },
      "state": {
        "dashboard": {},
        "ingress": {},
        "torchpass": {}
      }
    }
  ],
  "num_capacity_pool_gpus": 123,
  "num_reserved_gpus": 123,
  "duration_hours": 123,
  "slurm_shm_size_gib": 123,
  "capacity_pool_id": "<string>",
  "reservation_start_time": "2023-11-07T05:31:56Z",
  "reservation_end_time": "2023-11-07T05:31:56Z",
  "install_traefik": true,
  "created_at": "2023-11-07T05:31:56Z",
  "oidc_config": {
    "issuer_url": "<string>",
    "client_id": "<string>",
    "username_claim": "<string>",
    "username_prefix": "<string>",
    "group_claim": "<string>",
    "group_prefix": "<string>",
    "ca_cert": "<string>"
  },
  "cluster_config": {
    "kubernetes_dashboard_enabled": true,
    "jumphost_enabled": true,
    "slurm_startup_scripts": {
      "worker_prolog": "<string>",
      "worker_epilog": "<string>",
      "controller_prolog": "<string>",
      "controller_epilog": "<string>",
      "login_init_script": "<string>",
      "nodeset_init_script": "<string>",
      "extra_slurm_conf": "<string>"
    },
    "ingress": {
      "enabled": true
    },
    "observability": {
      "enabled": true
    },
    "gpu_operator_version": "<string>",
    "network_operator_version": "<string>",
    "ssh_ca_enabled": true
  },
  "machine_cluster_id": "<string>",
  "first_ready_at": "2023-11-07T05:31:56Z",
  "is_in_substrate": true,
  "control_plane_ready": true,
  "ums_project_id": "<string>",
  "ums_org_id": "<string>",
  "os_image": "<string>",
  "nvidia_driver_version_id": "<string>",
  "deleted_gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>",
        "linked_alerts": [
          {
            "passive_health_check_alert_id": "<string>",
            "cluster_id": "<string>",
            "target_vm": "<string>",
            "alert_name": "<string>",
            "annotations": {},
            "started_at": "2023-11-07T05:31:56Z",
            "instance_id": "<string>",
            "resolved_at": "2023-11-07T05:31:56Z",
            "node_remediation_intent_id": "<string>"
          }
        ]
      },
      "slurm_worker_hostname": "<string>",
      "marked_for_deletion": true,
      "public_ipv4": "<string>",
      "ib_hca_type": "<string>",
      "ib_hca_count": 123,
      "nvswitch_count": 123,
      "nvswitch_type": "<string>",
      "ephemeral_storage": "<string>",
      "auto_remediation_enabled": true,
      "deleted_at": "2023-11-07T05:31:56Z"
    }
  ],
  "node_lifecycle_events": [
    {
      "node_id": "<string>",
      "reason": "<string>",
      "message": "<string>",
      "timestamp": "2023-11-07T05:31:56Z"
    }
  ]
}

GET

compute

clusters

{cluster_id}

Together AI SDK (v2)

from together import Together
client = Together()

cluster = client.beta.clusters.retrieve("cluster_id")
print(cluster)

import Together from "together-ai";
const client = new Together();

const cluster = await client.beta.clusters.retrieve("cluster_id");
console.log(cluster);

import Together from "together-ai";
const client = new Together();

const cluster = await client.beta.clusters.retrieve("cluster_id");
console.log(cluster);

tg beta clusters get <cluster_id>

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.together.ai/v1/compute/clusters/{cluster_id}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.together.ai/v1/compute/clusters/{cluster_id}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.together.ai/v1/compute/clusters/{cluster_id}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.together.ai/v1/compute/clusters/{cluster_id}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "cluster_id": "<string>",
  "region": "<string>",
  "cluster_name": "<string>",
  "volumes": [
    {
      "volume_id": "<string>",
      "volume_name": "<string>",
      "size_tib": 123,
      "status": "<string>"
    }
  ],
  "control_plane_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "memory_gib": 123,
      "network": "<string>",
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "public_ipv4": "<string>"
    }
  ],
  "gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>",
        "linked_alerts": [
          {
            "passive_health_check_alert_id": "<string>",
            "cluster_id": "<string>",
            "target_vm": "<string>",
            "alert_name": "<string>",
            "annotations": {},
            "started_at": "2023-11-07T05:31:56Z",
            "instance_id": "<string>",
            "resolved_at": "2023-11-07T05:31:56Z",
            "node_remediation_intent_id": "<string>"
          }
        ]
      },
      "slurm_worker_hostname": "<string>",
      "marked_for_deletion": true,
      "public_ipv4": "<string>",
      "ib_hca_type": "<string>",
      "ib_hca_count": 123,
      "nvswitch_count": 123,
      "nvswitch_type": "<string>",
      "ephemeral_storage": "<string>",
      "auto_remediation_enabled": true,
      "deleted_at": "2023-11-07T05:31:56Z"
    }
  ],
  "kube_config": "<string>",
  "num_gpus": 123,
  "cuda_version": "<string>",
  "nvidia_driver_version": "<string>",
  "project_id": "<string>",
  "num_cpu_workers": 123,
  "phase_transitions": [
    {
      "transition_time": "2023-11-07T05:31:56Z"
    }
  ],
  "desired_preemptible_gpus": 123,
  "allocated_preemptible_gpus": 123,
  "add_ons": [
    {
      "name": "<string>",
      "add_on_type": "<string>",
      "config": {
        "dashboard": {
          "enabled": true
        },
        "ingress": {
          "enabled": true
        },
        "torchpass": {
          "enabled": true
        }
      },
      "state": {
        "dashboard": {},
        "ingress": {},
        "torchpass": {}
      }
    }
  ],
  "num_capacity_pool_gpus": 123,
  "num_reserved_gpus": 123,
  "duration_hours": 123,
  "slurm_shm_size_gib": 123,
  "capacity_pool_id": "<string>",
  "reservation_start_time": "2023-11-07T05:31:56Z",
  "reservation_end_time": "2023-11-07T05:31:56Z",
  "install_traefik": true,
  "created_at": "2023-11-07T05:31:56Z",
  "oidc_config": {
    "issuer_url": "<string>",
    "client_id": "<string>",
    "username_claim": "<string>",
    "username_prefix": "<string>",
    "group_claim": "<string>",
    "group_prefix": "<string>",
    "ca_cert": "<string>"
  },
  "cluster_config": {
    "kubernetes_dashboard_enabled": true,
    "jumphost_enabled": true,
    "slurm_startup_scripts": {
      "worker_prolog": "<string>",
      "worker_epilog": "<string>",
      "controller_prolog": "<string>",
      "controller_epilog": "<string>",
      "login_init_script": "<string>",
      "nodeset_init_script": "<string>",
      "extra_slurm_conf": "<string>"
    },
    "ingress": {
      "enabled": true
    },
    "observability": {
      "enabled": true
    },
    "gpu_operator_version": "<string>",
    "network_operator_version": "<string>",
    "ssh_ca_enabled": true
  },
  "machine_cluster_id": "<string>",
  "first_ready_at": "2023-11-07T05:31:56Z",
  "is_in_substrate": true,
  "control_plane_ready": true,
  "ums_project_id": "<string>",
  "ums_org_id": "<string>",
  "os_image": "<string>",
  "nvidia_driver_version_id": "<string>",
  "deleted_gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>",
        "linked_alerts": [
          {
            "passive_health_check_alert_id": "<string>",
            "cluster_id": "<string>",
            "target_vm": "<string>",
            "alert_name": "<string>",
            "annotations": {},
            "started_at": "2023-11-07T05:31:56Z",
            "instance_id": "<string>",
            "resolved_at": "2023-11-07T05:31:56Z",
            "node_remediation_intent_id": "<string>"
          }
        ]
      },
      "slurm_worker_hostname": "<string>",
      "marked_for_deletion": true,
      "public_ipv4": "<string>",
      "ib_hca_type": "<string>",
      "ib_hca_count": 123,
      "nvswitch_count": 123,
      "nvswitch_type": "<string>",
      "ephemeral_storage": "<string>",
      "auto_remediation_enabled": true,
      "deleted_at": "2023-11-07T05:31:56Z"
    }
  ],
  "node_lifecycle_events": [
    {
      "node_id": "<string>",
      "reason": "<string>",
      "message": "<string>",
      "timestamp": "2023-11-07T05:31:56Z"
    }
  ]
}

Authorizations

Authorization

string

header

default:default

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

cluster_id

string

required

The ID of the cluster to retrieve

Response

200 - application/json

cluster_id

string

required

cluster_type

enum<string>

required

Type of cluster.

Available options:

KUBERNETES,

SLURM

region

string

required

gpu_type

enum<string>

required

Available options:

H100_SXM,

H200_SXM,

RTX_6000_PCI,

L40_PCIE,

B200_SXM,

H100_SXM_INF

cluster_name

string

required

volumes

object[]

required

Show child attributes

status

enum<string>

required

Current status of the GPU cluster.

Available options:

WaitingForControlPlaneNodes,

WaitingForDataPlaneNodes,

WaitingForSubnet,

WaitingForSharedVolume,

InstallingDrivers,

RunningAcceptanceTests,

Paused,

OnDemandComputePaused,

Ready,

Degraded,

Deleting

control_plane_nodes

object[]

required

Show child attributes

gpu_worker_nodes

object[]

required

Show child attributes

kube_config

string

required

num_gpus

integer<int32>

required

cuda_version

string

required

nvidia_driver_version

string

required

project_id

string

required

num_cpu_workers

integer<int32>

required

Number of CPU-only worker nodes in the cluster.

phase_transitions

object[]

required

Cluster-level phase transition history.

Show child attributes

desired_preemptible_gpus

integer<int32>

required

Customer's requested number of preemptible GPUs. Set on cluster create or update; persists until changed.

allocated_preemptible_gpus

integer<int32>

required

Actual number of preemptible GPUs currently allocated to the cluster. Updated asynchronously by the fulfillment and reclamation workers; may be less than desired_preemptible_gpus when capacity is constrained.

billing_type

enum<string>

required

Billing type for the cluster (RESERVED, ON_DEMAND, or SCHEDULED_CAPACITY).

Available options:

RESERVED,

ON_DEMAND,

SCHEDULED_CAPACITY

add_ons

object[]

required

Enabled add-ons on this cluster. Only add-ons with enabled=true in their config are returned.

Show child attributes

num_capacity_pool_gpus

integer<int32>

required

Number of GPUs to draw from a capacity pool. A component of the overall num_gpus, alongside num_reserved_gpus.

num_reserved_gpus

integer<int32>

required

Number of prepaid reserved GPUs for this cluster. A component of the overall num_gpus, alongside num_capacity_pool_gpus.

duration_hours

integer

slurm_shm_size_gib

integer

capacity_pool_id

string

reservation_start_time

string<date-time>

reservation_end_time

string<date-time>

install_traefik

boolean

created_at

string<date-time>

oidc_config

object

Show child attributes

cluster_config

object

Show child attributes

machine_cluster_id

string

ID of the machine cluster backing this GPU cluster.

first_ready_at

string<date-time>

Timestamp when the cluster first reached the Ready phase.

is_in_substrate

boolean

Whether the cluster is managed inside a substrate environment.

control_plane_ready

boolean

Whether the control plane is currently ready.

ums_project_id

string

UMS project ID associated with this cluster.

ums_org_id

string

UMS organization ID associated with this cluster.

os_image

string

Data-volume image name for GPU worker nodes.

nvidia_driver_version_id

string

Internal NVIDIA version ID for this cluster's driver and CUDA combination.

deleted_gpu_worker_nodes

object[]

GPU worker nodes retained after they left the live data plane. These are separate from gpu_worker_nodes and must not be counted as live capacity.

Show child attributes

node_lifecycle_events

object[]

Recent node lifecycle events such as scale-up, scale-down, and preemption. Combine these with live and deleted node lists to render the cluster timeline.

Show child attributes

List all GPU clusters

Update a GPU cluster

⌘I