Cluster

class Cluster : KotlinCustomResource

Manages a Cloud Dataproc cluster resource within GCP.

  • API documentation

  • How-to Guides

    • Official Documentation !>Warning: Due to limitations of the API, all arguments except labels,cluster_config.worker_config.num_instances and cluster_config.preemptible_worker_config.num_instances are non-updatable. Changing cluster_config.worker_config.min_num_instances will be ignored. Changing others will cause recreation of the whole cluster!

Example Usage

Basic

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const simplecluster = new gcp.dataproc.Cluster("simplecluster", {
name: "simplecluster",
region: "us-central1",
});
import pulumi
import pulumi_gcp as gcp
simplecluster = gcp.dataproc.Cluster("simplecluster",
name="simplecluster",
region="us-central1")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var simplecluster = new Gcp.Dataproc.Cluster("simplecluster", new()
{
Name = "simplecluster",
Region = "us-central1",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewCluster(ctx, "simplecluster", &dataproc.ClusterArgs{
Name: pulumi.String("simplecluster"),
Region: pulumi.String("us-central1"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var simplecluster = new Cluster("simplecluster", ClusterArgs.builder()
.name("simplecluster")
.region("us-central1")
.build());
}
}
resources:
simplecluster:
type: gcp:dataproc:Cluster
properties:
name: simplecluster
region: us-central1

Advanced

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const _default = new gcp.serviceaccount.Account("default", {
accountId: "service-account-id",
displayName: "Service Account",
});
const mycluster = new gcp.dataproc.Cluster("mycluster", {
name: "mycluster",
region: "us-central1",
gracefulDecommissionTimeout: "120s",
labels: {
foo: "bar",
},
clusterConfig: {
stagingBucket: "dataproc-staging-bucket",
masterConfig: {
numInstances: 1,
machineType: "e2-medium",
diskConfig: {
bootDiskType: "pd-ssd",
bootDiskSizeGb: 30,
},
},
workerConfig: {
numInstances: 2,
machineType: "e2-medium",
minCpuPlatform: "Intel Skylake",
diskConfig: {
bootDiskSizeGb: 30,
numLocalSsds: 1,
},
},
preemptibleWorkerConfig: {
numInstances: 0,
},
softwareConfig: {
imageVersion: "2.0.35-debian10",
overrideProperties: {
"dataproc:dataproc.allow.zero.workers": "true",
},
},
gceClusterConfig: {
tags: [
"foo",
"bar",
],
serviceAccount: _default.email,
serviceAccountScopes: ["cloud-platform"],
},
initializationActions: [{
script: "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
timeoutSec: 500,
}],
},
});
import pulumi
import pulumi_gcp as gcp
default = gcp.serviceaccount.Account("default",
account_id="service-account-id",
display_name="Service Account")
mycluster = gcp.dataproc.Cluster("mycluster",
name="mycluster",
region="us-central1",
graceful_decommission_timeout="120s",
labels={
"foo": "bar",
},
cluster_config={
"staging_bucket": "dataproc-staging-bucket",
"master_config": {
"num_instances": 1,
"machine_type": "e2-medium",
"disk_config": {
"boot_disk_type": "pd-ssd",
"boot_disk_size_gb": 30,
},
},
"worker_config": {
"num_instances": 2,
"machine_type": "e2-medium",
"min_cpu_platform": "Intel Skylake",
"disk_config": {
"boot_disk_size_gb": 30,
"num_local_ssds": 1,
},
},
"preemptible_worker_config": {
"num_instances": 0,
},
"software_config": {
"image_version": "2.0.35-debian10",
"override_properties": {
"dataproc_dataproc_allow_zero_workers": "true",
},
},
"gce_cluster_config": {
"tags": [
"foo",
"bar",
],
"service_account": default.email,
"service_account_scopes": ["cloud-platform"],
},
"initialization_actions": [{
"script": "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
"timeout_sec": 500,
}],
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var @default = new Gcp.ServiceAccount.Account("default", new()
{
AccountId = "service-account-id",
DisplayName = "Service Account",
});
var mycluster = new Gcp.Dataproc.Cluster("mycluster", new()
{
Name = "mycluster",
Region = "us-central1",
GracefulDecommissionTimeout = "120s",
Labels =
{
{ "foo", "bar" },
},
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
StagingBucket = "dataproc-staging-bucket",
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
NumInstances = 1,
MachineType = "e2-medium",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
{
BootDiskType = "pd-ssd",
BootDiskSizeGb = 30,
},
},
WorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigArgs
{
NumInstances = 2,
MachineType = "e2-medium",
MinCpuPlatform = "Intel Skylake",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs
{
BootDiskSizeGb = 30,
NumLocalSsds = 1,
},
},
PreemptibleWorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs
{
NumInstances = 0,
},
SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
{
ImageVersion = "2.0.35-debian10",
OverrideProperties =
{
{ "dataproc:dataproc.allow.zero.workers", "true" },
},
},
GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
{
Tags = new[]
{
"foo",
"bar",
},
ServiceAccount = @default.Email,
ServiceAccountScopes = new[]
{
"cloud-platform",
},
},
InitializationActions = new[]
{
new Gcp.Dataproc.Inputs.ClusterClusterConfigInitializationActionArgs
{
Script = "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
TimeoutSec = 500,
},
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := serviceaccount.NewAccount(ctx, "default", &serviceaccount.AccountArgs{
AccountId: pulumi.String("service-account-id"),
DisplayName: pulumi.String("Service Account"),
})
if err != nil {
return err
}
_, err = dataproc.NewCluster(ctx, "mycluster", &dataproc.ClusterArgs{
Name: pulumi.String("mycluster"),
Region: pulumi.String("us-central1"),
GracefulDecommissionTimeout: pulumi.String("120s"),
Labels: pulumi.StringMap{
"foo": pulumi.String("bar"),
},
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
StagingBucket: pulumi.String("dataproc-staging-bucket"),
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
NumInstances: pulumi.Int(1),
MachineType: pulumi.String("e2-medium"),
DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
BootDiskType: pulumi.String("pd-ssd"),
BootDiskSizeGb: pulumi.Int(30),
},
},
WorkerConfig: &dataproc.ClusterClusterConfigWorkerConfigArgs{
NumInstances: pulumi.Int(2),
MachineType: pulumi.String("e2-medium"),
MinCpuPlatform: pulumi.String("Intel Skylake"),
DiskConfig: &dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs{
BootDiskSizeGb: pulumi.Int(30),
NumLocalSsds: pulumi.Int(1),
},
},
PreemptibleWorkerConfig: &dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs{
NumInstances: pulumi.Int(0),
},
SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
ImageVersion: pulumi.String("2.0.35-debian10"),
OverrideProperties: pulumi.StringMap{
"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
},
},
GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
Tags: pulumi.StringArray{
pulumi.String("foo"),
pulumi.String("bar"),
},
ServiceAccount: _default.Email,
ServiceAccountScopes: pulumi.StringArray{
pulumi.String("cloud-platform"),
},
},
InitializationActions: dataproc.ClusterClusterConfigInitializationActionArray{
&dataproc.ClusterClusterConfigInitializationActionArgs{
Script: pulumi.String("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh"),
TimeoutSec: pulumi.Int(500),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var default_ = new Account("default", AccountArgs.builder()
.accountId("service-account-id")
.displayName("Service Account")
.build());
var mycluster = new Cluster("mycluster", ClusterArgs.builder()
.name("mycluster")
.region("us-central1")
.gracefulDecommissionTimeout("120s")
.labels(Map.of("foo", "bar"))
.clusterConfig(ClusterClusterConfigArgs.builder()
.stagingBucket("dataproc-staging-bucket")
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.numInstances(1)
.machineType("e2-medium")
.diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
.bootDiskType("pd-ssd")
.bootDiskSizeGb(30)
.build())
.build())
.workerConfig(ClusterClusterConfigWorkerConfigArgs.builder()
.numInstances(2)
.machineType("e2-medium")
.minCpuPlatform("Intel Skylake")
.diskConfig(ClusterClusterConfigWorkerConfigDiskConfigArgs.builder()
.bootDiskSizeGb(30)
.numLocalSsds(1)
.build())
.build())
.preemptibleWorkerConfig(ClusterClusterConfigPreemptibleWorkerConfigArgs.builder()
.numInstances(0)
.build())
.softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
.imageVersion("2.0.35-debian10")
.overrideProperties(Map.of("dataproc:dataproc.allow.zero.workers", "true"))
.build())
.gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
.tags(
"foo",
"bar")
.serviceAccount(default_.email())
.serviceAccountScopes("cloud-platform")
.build())
.initializationActions(ClusterClusterConfigInitializationActionArgs.builder()
.script("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh")
.timeoutSec(500)
.build())
.build())
.build());
}
}
resources:
default:
type: gcp:serviceaccount:Account
properties:
accountId: service-account-id
displayName: Service Account
mycluster:
type: gcp:dataproc:Cluster
properties:
name: mycluster
region: us-central1
gracefulDecommissionTimeout: 120s
labels:
foo: bar
clusterConfig:
stagingBucket: dataproc-staging-bucket
masterConfig:
numInstances: 1
machineType: e2-medium
diskConfig:
bootDiskType: pd-ssd
bootDiskSizeGb: 30
workerConfig:
numInstances: 2
machineType: e2-medium
minCpuPlatform: Intel Skylake
diskConfig:
bootDiskSizeGb: 30
numLocalSsds: 1
preemptibleWorkerConfig:
numInstances: 0
softwareConfig:
imageVersion: 2.0.35-debian10
overrideProperties:
dataproc:dataproc.allow.zero.workers: 'true'
gceClusterConfig:
tags:
- foo
- bar
serviceAccount: ${default.email}
serviceAccountScopes:
- cloud-platform
initializationActions:
- script: gs://dataproc-initialization-actions/stackdriver/stackdriver.sh
timeoutSec: 500

Using A GPU Accelerator

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const acceleratedCluster = new gcp.dataproc.Cluster("accelerated_cluster", {
name: "my-cluster-with-gpu",
region: "us-central1",
clusterConfig: {
gceClusterConfig: {
zone: "us-central1-a",
},
masterConfig: {
accelerators: [{
acceleratorType: "nvidia-tesla-k80",
acceleratorCount: 1,
}],
},
},
});
import pulumi
import pulumi_gcp as gcp
accelerated_cluster = gcp.dataproc.Cluster("accelerated_cluster",
name="my-cluster-with-gpu",
region="us-central1",
cluster_config={
"gce_cluster_config": {
"zone": "us-central1-a",
},
"master_config": {
"accelerators": [{
"accelerator_type": "nvidia-tesla-k80",
"accelerator_count": 1,
}],
},
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var acceleratedCluster = new Gcp.Dataproc.Cluster("accelerated_cluster", new()
{
Name = "my-cluster-with-gpu",
Region = "us-central1",
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
{
Zone = "us-central1-a",
},
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
Accelerators = new[]
{
new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigAcceleratorArgs
{
AcceleratorType = "nvidia-tesla-k80",
AcceleratorCount = 1,
},
},
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewCluster(ctx, "accelerated_cluster", &dataproc.ClusterArgs{
Name: pulumi.String("my-cluster-with-gpu"),
Region: pulumi.String("us-central1"),
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
Zone: pulumi.String("us-central1-a"),
},
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
Accelerators: dataproc.ClusterClusterConfigMasterConfigAcceleratorArray{
&dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs{
AcceleratorType: pulumi.String("nvidia-tesla-k80"),
AcceleratorCount: pulumi.Int(1),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var acceleratedCluster = new Cluster("acceleratedCluster", ClusterArgs.builder()
.name("my-cluster-with-gpu")
.region("us-central1")
.clusterConfig(ClusterClusterConfigArgs.builder()
.gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
.zone("us-central1-a")
.build())
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.accelerators(ClusterClusterConfigMasterConfigAcceleratorArgs.builder()
.acceleratorType("nvidia-tesla-k80")
.acceleratorCount("1")
.build())
.build())
.build())
.build());
}
}
resources:
acceleratedCluster:
type: gcp:dataproc:Cluster
name: accelerated_cluster
properties:
name: my-cluster-with-gpu
region: us-central1
clusterConfig:
gceClusterConfig:
zone: us-central1-a
masterConfig:
accelerators:
- acceleratorType: nvidia-tesla-k80
acceleratorCount: '1'

Import

This resource does not support import.

Properties

Link copied to clipboard

Allows you to configure various aspects of the cluster. Structure defined below.

Link copied to clipboard

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

Link copied to clipboard
Link copied to clipboard
val id: Output<String>
Link copied to clipboard
val labels: Output<Map<String, String>>?

The list of the labels (key/value pairs) configured on the resource and to be applied to instances in the cluster. Note: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field 'effective_labels' for all of the labels present on the resource.

Link copied to clipboard
val name: Output<String>

The name of the cluster, unique within the project and zone.

Link copied to clipboard
val project: Output<String>

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

Link copied to clipboard
val pulumiChildResources: Set<KotlinResource>
Link copied to clipboard
val pulumiLabels: Output<Map<String, String>>

The combination of labels configured directly on the resource and default labels configured on the provider.

Link copied to clipboard
Link copied to clipboard
Link copied to clipboard
val region: Output<String>?

The region in which the cluster and associated nodes will be created in. Defaults to global.

Link copied to clipboard
val urn: Output<String>
Link copied to clipboard

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.