Batch Args
Dataproc Serverless Batches lets you run Spark workloads without requiring you to provision and manage your own Dataproc cluster. To get more information about Batch, see:
How-to Guides
Example Usage
Dataproc Batch Spark
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleBatchSpark = new gcp.dataproc.Batch("example_batch_spark", {
batchId: "tf-test-batch_88722",
location: "us-central1",
labels: {
batch_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
ttl: "3600s",
networkTags: ["tag1"],
},
},
sparkBatch: {
mainClass: "org.apache.spark.examples.SparkPi",
args: ["10"],
jarFileUris: ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
},
});
import pulumi
import pulumi_gcp as gcp
example_batch_spark = gcp.dataproc.Batch("example_batch_spark",
batch_id="tf-test-batch_88722",
location="us-central1",
labels={
"batch_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
"ttl": "3600s",
"network_tags": ["tag1"],
},
},
spark_batch={
"main_class": "org.apache.spark.examples.SparkPi",
"args": ["10"],
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleBatchSpark = new Gcp.Dataproc.Batch("example_batch_spark", new()
{
BatchId = "tf-test-batch_88722",
Location = "us-central1",
Labels =
{
{ "batch_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
Ttl = "3600s",
NetworkTags = new[]
{
"tag1",
},
},
},
SparkBatch = new Gcp.Dataproc.Inputs.BatchSparkBatchArgs
{
MainClass = "org.apache.spark.examples.SparkPi",
Args = new[]
{
"10",
},
JarFileUris = new[]
{
"file:///usr/lib/spark/examples/jars/spark-examples.jar",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewBatch(ctx, "example_batch_spark", &dataproc.BatchArgs{
BatchId: pulumi.String("tf-test-batch_88722"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"batch_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
Ttl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
},
},
SparkBatch: &dataproc.BatchSparkBatchArgs{
MainClass: pulumi.String("org.apache.spark.examples.SparkPi"),
Args: pulumi.StringArray{
pulumi.String("10"),
},
JarFileUris: pulumi.StringArray{
pulumi.String("file:///usr/lib/spark/examples/jars/spark-examples.jar"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchSparkBatchArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleBatchSpark = new Batch("exampleBatchSpark", BatchArgs.builder()
.batchId("tf-test-batch_88722")
.location("us-central1")
.labels(Map.of("batch_test", "terraform"))
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.ttl("3600s")
.networkTags("tag1")
.build())
.build())
.sparkBatch(BatchSparkBatchArgs.builder()
.mainClass("org.apache.spark.examples.SparkPi")
.args("10")
.jarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar")
.build())
.build());
}
}
resources:
exampleBatchSpark:
type: gcp:dataproc:Batch
name: example_batch_spark
properties:
batchId: tf-test-batch_88722
location: us-central1
labels:
batch_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
ttl: 3600s
networkTags:
- tag1
sparkBatch:
mainClass: org.apache.spark.examples.SparkPi
args:
- '10'
jarFileUris:
- file:///usr/lib/spark/examples/jars/spark-examples.jar
Dataproc Batch Spark Full
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const project = gcp.organizations.getProject({});
const gcsAccount = gcp.storage.getProjectServiceAccount({});
const bucket = new gcp.storage.Bucket("bucket", {
uniformBucketLevelAccess: true,
name: "dataproc-bucket",
location: "US",
forceDestroy: true,
});
const cryptoKeyMember1 = new gcp.kms.CryptoKeyIAMMember("crypto_key_member_1", {
cryptoKeyId: "example-key",
role: "roles/cloudkms.cryptoKeyEncrypterDecrypter",
member: project.then(project => `serviceAccount:service-${project.number}@dataproc-accounts.iam.gserviceaccount.com`),
});
const ms = new gcp.dataproc.MetastoreService("ms", {
serviceId: "dataproc-batch",
location: "us-central1",
port: 9080,
tier: "DEVELOPER",
maintenanceWindow: {
hourOfDay: 2,
dayOfWeek: "SUNDAY",
},
hiveMetastoreConfig: {
version: "3.1.2",
},
});
const basic = new gcp.dataproc.Cluster("basic", {
name: "dataproc-batch",
region: "us-central1",
clusterConfig: {
softwareConfig: {
overrideProperties: {
"dataproc:dataproc.allow.zero.workers": "true",
"spark:spark.history.fs.logDirectory": pulumi.interpolate`gs://${bucket.name}/*/spark-job-history`,
},
},
endpointConfig: {
enableHttpPortAccess: true,
},
masterConfig: {
numInstances: 1,
machineType: "e2-standard-2",
diskConfig: {
bootDiskSizeGb: 35,
},
},
metastoreConfig: {
dataprocMetastoreService: ms.name,
},
},
});
const exampleBatchSpark = new gcp.dataproc.Batch("example_batch_spark", {
batchId: "dataproc-batch",
location: "us-central1",
labels: {
batch_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
version: "2.2",
},
environmentConfig: {
executionConfig: {
ttl: "3600s",
networkTags: ["tag1"],
kmsKey: "example-key",
networkUri: "default",
serviceAccount: project.then(project => `${project.number}-compute@developer.gserviceaccount.com`),
stagingBucket: bucket.name,
},
peripheralsConfig: {
metastoreService: ms.name,
sparkHistoryServerConfig: {
dataprocCluster: basic.id,
},
},
},
sparkBatch: {
mainClass: "org.apache.spark.examples.SparkPi",
args: ["10"],
jarFileUris: ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
},
}, {
dependsOn: [cryptoKeyMember1],
});
import pulumi
import pulumi_gcp as gcp
project = gcp.organizations.get_project()
gcs_account = gcp.storage.get_project_service_account()
bucket = gcp.storage.Bucket("bucket",
uniform_bucket_level_access=True,
name="dataproc-bucket",
location="US",
force_destroy=True)
crypto_key_member1 = gcp.kms.CryptoKeyIAMMember("crypto_key_member_1",
crypto_key_id="example-key",
role="roles/cloudkms.cryptoKeyEncrypterDecrypter",
member=f"serviceAccount:service-{project.number}@dataproc-accounts.iam.gserviceaccount.com")
ms = gcp.dataproc.MetastoreService("ms",
service_id="dataproc-batch",
location="us-central1",
port=9080,
tier="DEVELOPER",
maintenance_window={
"hour_of_day": 2,
"day_of_week": "SUNDAY",
},
hive_metastore_config={
"version": "3.1.2",
})
basic = gcp.dataproc.Cluster("basic",
name="dataproc-batch",
region="us-central1",
cluster_config={
"software_config": {
"override_properties": {
"dataproc:dataproc.allow.zero.workers": "true",
"spark:spark.history.fs.logDirectory": bucket.name.apply(lambda name: f"gs://{name}/*/spark-job-history"),
},
},
"endpoint_config": {
"enable_http_port_access": True,
},
"master_config": {
"num_instances": 1,
"machine_type": "e2-standard-2",
"disk_config": {
"boot_disk_size_gb": 35,
},
},
"metastore_config": {
"dataproc_metastore_service": ms.name,
},
})
example_batch_spark = gcp.dataproc.Batch("example_batch_spark",
batch_id="dataproc-batch",
location="us-central1",
labels={
"batch_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
"version": "2.2",
},
environment_config={
"execution_config": {
"ttl": "3600s",
"network_tags": ["tag1"],
"kms_key": "example-key",
"network_uri": "default",
"service_account": f"{project.number}-compute@developer.gserviceaccount.com",
"staging_bucket": bucket.name,
},
"peripherals_config": {
"metastore_service": ms.name,
"spark_history_server_config": {
"dataproc_cluster": basic.id,
},
},
},
spark_batch={
"main_class": "org.apache.spark.examples.SparkPi",
"args": ["10"],
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
},
opts = pulumi.ResourceOptions(depends_on=[crypto_key_member1]))
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var project = Gcp.Organizations.GetProject.Invoke();
var gcsAccount = Gcp.Storage.GetProjectServiceAccount.Invoke();
var bucket = new Gcp.Storage.Bucket("bucket", new()
{
UniformBucketLevelAccess = true,
Name = "dataproc-bucket",
Location = "US",
ForceDestroy = true,
});
var cryptoKeyMember1 = new Gcp.Kms.CryptoKeyIAMMember("crypto_key_member_1", new()
{
CryptoKeyId = "example-key",
Role = "roles/cloudkms.cryptoKeyEncrypterDecrypter",
Member = $"serviceAccount:service-{project.Apply(getProjectResult => getProjectResult.Number)}@dataproc-accounts.iam.gserviceaccount.com",
});
var ms = new Gcp.Dataproc.MetastoreService("ms", new()
{
ServiceId = "dataproc-batch",
Location = "us-central1",
Port = 9080,
Tier = "DEVELOPER",
MaintenanceWindow = new Gcp.Dataproc.Inputs.MetastoreServiceMaintenanceWindowArgs
{
HourOfDay = 2,
DayOfWeek = "SUNDAY",
},
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
});
var basic = new Gcp.Dataproc.Cluster("basic", new()
{
Name = "dataproc-batch",
Region = "us-central1",
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
{
OverrideProperties =
{
{ "dataproc:dataproc.allow.zero.workers", "true" },
{ "spark:spark.history.fs.logDirectory", bucket.Name.Apply(name => $"gs://{name}/*/spark-job-history") },
},
},
EndpointConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigEndpointConfigArgs
{
EnableHttpPortAccess = true,
},
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
NumInstances = 1,
MachineType = "e2-standard-2",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
{
BootDiskSizeGb = 35,
},
},
MetastoreConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMetastoreConfigArgs
{
DataprocMetastoreService = ms.Name,
},
},
});
var exampleBatchSpark = new Gcp.Dataproc.Batch("example_batch_spark", new()
{
BatchId = "dataproc-batch",
Location = "us-central1",
Labels =
{
{ "batch_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
Version = "2.2",
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
Ttl = "3600s",
NetworkTags = new[]
{
"tag1",
},
KmsKey = "example-key",
NetworkUri = "default",
ServiceAccount = $"{project.Apply(getProjectResult => getProjectResult.Number)}-compute@developer.gserviceaccount.com",
StagingBucket = bucket.Name,
},
PeripheralsConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigPeripheralsConfigArgs
{
MetastoreService = ms.Name,
SparkHistoryServerConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs
{
DataprocCluster = basic.Id,
},
},
},
SparkBatch = new Gcp.Dataproc.Inputs.BatchSparkBatchArgs
{
MainClass = "org.apache.spark.examples.SparkPi",
Args = new[]
{
"10",
},
JarFileUris = new[]
{
"file:///usr/lib/spark/examples/jars/spark-examples.jar",
},
},
}, new CustomResourceOptions
{
DependsOn =
{
cryptoKeyMember1,
},
});
});
package main
import (
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/kms"
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/organizations"
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
if err != nil {
return err
}
_, err = storage.GetProjectServiceAccount(ctx, &storage.GetProjectServiceAccountArgs{}, nil)
if err != nil {
return err
}
bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
UniformBucketLevelAccess: pulumi.Bool(true),
Name: pulumi.String("dataproc-bucket"),
Location: pulumi.String("US"),
ForceDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
cryptoKeyMember1, err := kms.NewCryptoKeyIAMMember(ctx, "crypto_key_member_1", &kms.CryptoKeyIAMMemberArgs{
CryptoKeyId: pulumi.String("example-key"),
Role: pulumi.String("roles/cloudkms.cryptoKeyEncrypterDecrypter"),
Member: pulumi.Sprintf("serviceAccount:service-%v@dataproc-accounts.iam.gserviceaccount.com", project.Number),
})
if err != nil {
return err
}
ms, err := dataproc.NewMetastoreService(ctx, "ms", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("dataproc-batch"),
Location: pulumi.String("us-central1"),
Port: pulumi.Int(9080),
Tier: pulumi.String("DEVELOPER"),
MaintenanceWindow: &dataproc.MetastoreServiceMaintenanceWindowArgs{
HourOfDay: pulumi.Int(2),
DayOfWeek: pulumi.String("SUNDAY"),
},
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
})
if err != nil {
return err
}
basic, err := dataproc.NewCluster(ctx, "basic", &dataproc.ClusterArgs{
Name: pulumi.String("dataproc-batch"),
Region: pulumi.String("us-central1"),
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
OverrideProperties: pulumi.StringMap{
"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
"spark:spark.history.fs.logDirectory": bucket.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("gs://%v/*/spark-job-history", name), nil
}).(pulumi.StringOutput),
},
},
EndpointConfig: &dataproc.ClusterClusterConfigEndpointConfigArgs{
EnableHttpPortAccess: pulumi.Bool(true),
},
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
NumInstances: pulumi.Int(1),
MachineType: pulumi.String("e2-standard-2"),
DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
BootDiskSizeGb: pulumi.Int(35),
},
},
MetastoreConfig: &dataproc.ClusterClusterConfigMetastoreConfigArgs{
DataprocMetastoreService: ms.Name,
},
},
})
if err != nil {
return err
}
_, err = dataproc.NewBatch(ctx, "example_batch_spark", &dataproc.BatchArgs{
BatchId: pulumi.String("dataproc-batch"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"batch_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
Version: pulumi.String("2.2"),
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
Ttl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
KmsKey: pulumi.String("example-key"),
NetworkUri: pulumi.String("default"),
ServiceAccount: pulumi.Sprintf("%v-compute@developer.gserviceaccount.com", project.Number),
StagingBucket: bucket.Name,
},
PeripheralsConfig: &dataproc.BatchEnvironmentConfigPeripheralsConfigArgs{
MetastoreService: ms.Name,
SparkHistoryServerConfig: &dataproc.BatchEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs{
DataprocCluster: basic.ID(),
},
},
},
SparkBatch: &dataproc.BatchSparkBatchArgs{
MainClass: pulumi.String("org.apache.spark.examples.SparkPi"),
Args: pulumi.StringArray{
pulumi.String("10"),
},
JarFileUris: pulumi.StringArray{
pulumi.String("file:///usr/lib/spark/examples/jars/spark-examples.jar"),
},
},
}, pulumi.DependsOn([]pulumi.Resource{
cryptoKeyMember1,
}))
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.storage.StorageFunctions;
import com.pulumi.gcp.storage.inputs.GetProjectServiceAccountArgs;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.kms.CryptoKeyIAMMember;
import com.pulumi.gcp.kms.CryptoKeyIAMMemberArgs;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceMaintenanceWindowArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigEndpointConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigPeripheralsConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchSparkBatchArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
.build());
final var gcsAccount = StorageFunctions.getProjectServiceAccount(GetProjectServiceAccountArgs.builder()
.build());
var bucket = new Bucket("bucket", BucketArgs.builder()
.uniformBucketLevelAccess(true)
.name("dataproc-bucket")
.location("US")
.forceDestroy(true)
.build());
var cryptoKeyMember1 = new CryptoKeyIAMMember("cryptoKeyMember1", CryptoKeyIAMMemberArgs.builder()
.cryptoKeyId("example-key")
.role("roles/cloudkms.cryptoKeyEncrypterDecrypter")
.member(String.format("serviceAccount:service-%s@dataproc-accounts.iam.gserviceaccount.com", project.number()))
.build());
var ms = new MetastoreService("ms", MetastoreServiceArgs.builder()
.serviceId("dataproc-batch")
.location("us-central1")
.port(9080)
.tier("DEVELOPER")
.maintenanceWindow(MetastoreServiceMaintenanceWindowArgs.builder()
.hourOfDay(2)
.dayOfWeek("SUNDAY")
.build())
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.build());
var basic = new Cluster("basic", ClusterArgs.builder()
.name("dataproc-batch")
.region("us-central1")
.clusterConfig(ClusterClusterConfigArgs.builder()
.softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
.overrideProperties(Map.ofEntries(
Map.entry("dataproc:dataproc.allow.zero.workers", "true"),
Map.entry("spark:spark.history.fs.logDirectory", bucket.name().applyValue(_name -> String.format("gs://%s/*/spark-job-history", _name)))
))
.build())
.endpointConfig(ClusterClusterConfigEndpointConfigArgs.builder()
.enableHttpPortAccess(true)
.build())
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.numInstances(1)
.machineType("e2-standard-2")
.diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
.bootDiskSizeGb(35)
.build())
.build())
.metastoreConfig(ClusterClusterConfigMetastoreConfigArgs.builder()
.dataprocMetastoreService(ms.name())
.build())
.build())
.build());
var exampleBatchSpark = new Batch("exampleBatchSpark", BatchArgs.builder()
.batchId("dataproc-batch")
.location("us-central1")
.labels(Map.of("batch_test", "terraform"))
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.version("2.2")
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.ttl("3600s")
.networkTags("tag1")
.kmsKey("example-key")
.networkUri("default")
.serviceAccount(String.format("%s-compute@developer.gserviceaccount.com", project.number()))
.stagingBucket(bucket.name())
.build())
.peripheralsConfig(BatchEnvironmentConfigPeripheralsConfigArgs.builder()
.metastoreService(ms.name())
.sparkHistoryServerConfig(BatchEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs.builder()
.dataprocCluster(basic.id())
.build())
.build())
.build())
.sparkBatch(BatchSparkBatchArgs.builder()
.mainClass("org.apache.spark.examples.SparkPi")
.args("10")
.jarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar")
.build())
.build(), CustomResourceOptions.builder()
.dependsOn(cryptoKeyMember1)
.build());
}
}
resources:
exampleBatchSpark:
type: gcp:dataproc:Batch
name: example_batch_spark
properties:
batchId: dataproc-batch
location: us-central1
labels:
batch_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
version: '2.2'
environmentConfig:
executionConfig:
ttl: 3600s
networkTags:
- tag1
kmsKey: example-key
networkUri: default
serviceAccount: ${project.number}-compute@developer.gserviceaccount.com
stagingBucket: ${bucket.name}
peripheralsConfig:
metastoreService: ${ms.name}
sparkHistoryServerConfig:
dataprocCluster: ${basic.id}
sparkBatch:
mainClass: org.apache.spark.examples.SparkPi
args:
- '10'
jarFileUris:
- file:///usr/lib/spark/examples/jars/spark-examples.jar
options:
dependsOn:
- ${cryptoKeyMember1}
bucket:
type: gcp:storage:Bucket
properties:
uniformBucketLevelAccess: true
name: dataproc-bucket
location: US
forceDestroy: true
cryptoKeyMember1:
type: gcp:kms:CryptoKeyIAMMember
name: crypto_key_member_1
properties:
cryptoKeyId: example-key
role: roles/cloudkms.cryptoKeyEncrypterDecrypter
member: serviceAccount:service-${project.number}@dataproc-accounts.iam.gserviceaccount.com
basic:
type: gcp:dataproc:Cluster
properties:
name: dataproc-batch
region: us-central1
clusterConfig:
softwareConfig:
overrideProperties:
dataproc:dataproc.allow.zero.workers: 'true'
spark:spark.history.fs.logDirectory: gs://${bucket.name}/*/spark-job-history
endpointConfig:
enableHttpPortAccess: true
masterConfig:
numInstances: 1
machineType: e2-standard-2
diskConfig:
bootDiskSizeGb: 35
metastoreConfig:
dataprocMetastoreService: ${ms.name}
ms:
type: gcp:dataproc:MetastoreService
properties:
serviceId: dataproc-batch
location: us-central1
port: 9080
tier: DEVELOPER
maintenanceWindow:
hourOfDay: 2
dayOfWeek: SUNDAY
hiveMetastoreConfig:
version: 3.1.2
variables:
project:
fn::invoke:
function: gcp:organizations:getProject
arguments: {}
gcsAccount:
fn::invoke:
function: gcp:storage:getProjectServiceAccount
arguments: {}
Dataproc Batch Sparksql
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleBatchSparsql = new gcp.dataproc.Batch("example_batch_sparsql", {
batchId: "tf-test-batch_39249",
location: "us-central1",
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
},
},
sparkSqlBatch: {
queryFileUri: "gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql",
jarFileUris: ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
queryVariables: {
name: "value",
},
},
});
import pulumi
import pulumi_gcp as gcp
example_batch_sparsql = gcp.dataproc.Batch("example_batch_sparsql",
batch_id="tf-test-batch_39249",
location="us-central1",
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
},
},
spark_sql_batch={
"query_file_uri": "gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql",
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
"query_variables": {
"name": "value",
},
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleBatchSparsql = new Gcp.Dataproc.Batch("example_batch_sparsql", new()
{
BatchId = "tf-test-batch_39249",
Location = "us-central1",
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
},
},
SparkSqlBatch = new Gcp.Dataproc.Inputs.BatchSparkSqlBatchArgs
{
QueryFileUri = "gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql",
JarFileUris = new[]
{
"file:///usr/lib/spark/examples/jars/spark-examples.jar",
},
QueryVariables =
{
{ "name", "value" },
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewBatch(ctx, "example_batch_sparsql", &dataproc.BatchArgs{
BatchId: pulumi.String("tf-test-batch_39249"),
Location: pulumi.String("us-central1"),
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
},
},
SparkSqlBatch: &dataproc.BatchSparkSqlBatchArgs{
QueryFileUri: pulumi.String("gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql"),
JarFileUris: pulumi.StringArray{
pulumi.String("file:///usr/lib/spark/examples/jars/spark-examples.jar"),
},
QueryVariables: pulumi.StringMap{
"name": pulumi.String("value"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchSparkSqlBatchArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleBatchSparsql = new Batch("exampleBatchSparsql", BatchArgs.builder()
.batchId("tf-test-batch_39249")
.location("us-central1")
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.build())
.build())
.sparkSqlBatch(BatchSparkSqlBatchArgs.builder()
.queryFileUri("gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql")
.jarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar")
.queryVariables(Map.of("name", "value"))
.build())
.build());
}
}
resources:
exampleBatchSparsql:
type: gcp:dataproc:Batch
name: example_batch_sparsql
properties:
batchId: tf-test-batch_39249
location: us-central1
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
sparkSqlBatch:
queryFileUri: gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql
jarFileUris:
- file:///usr/lib/spark/examples/jars/spark-examples.jar
queryVariables:
name: value
Dataproc Batch Pyspark
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleBatchPyspark = new gcp.dataproc.Batch("example_batch_pyspark", {
batchId: "tf-test-batch_74391",
location: "us-central1",
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
},
},
pysparkBatch: {
mainPythonFileUri: "https://storage.googleapis.com/terraform-batches/test_util.py",
args: ["10"],
jarFileUris: ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
pythonFileUris: ["gs://dataproc-examples/pyspark/hello-world/hello-world.py"],
archiveUris: [
"https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
"https://storage.googleapis.com/terraform-batches/animals.txt.jar",
"https://storage.googleapis.com/terraform-batches/animals.txt",
],
fileUris: ["https://storage.googleapis.com/terraform-batches/people.txt"],
},
});
import pulumi
import pulumi_gcp as gcp
example_batch_pyspark = gcp.dataproc.Batch("example_batch_pyspark",
batch_id="tf-test-batch_74391",
location="us-central1",
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
},
},
pyspark_batch={
"main_python_file_uri": "https://storage.googleapis.com/terraform-batches/test_util.py",
"args": ["10"],
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
"python_file_uris": ["gs://dataproc-examples/pyspark/hello-world/hello-world.py"],
"archive_uris": [
"https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
"https://storage.googleapis.com/terraform-batches/animals.txt.jar",
"https://storage.googleapis.com/terraform-batches/animals.txt",
],
"file_uris": ["https://storage.googleapis.com/terraform-batches/people.txt"],
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleBatchPyspark = new Gcp.Dataproc.Batch("example_batch_pyspark", new()
{
BatchId = "tf-test-batch_74391",
Location = "us-central1",
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
},
},
PysparkBatch = new Gcp.Dataproc.Inputs.BatchPysparkBatchArgs
{
MainPythonFileUri = "https://storage.googleapis.com/terraform-batches/test_util.py",
Args = new[]
{
"10",
},
JarFileUris = new[]
{
"file:///usr/lib/spark/examples/jars/spark-examples.jar",
},
PythonFileUris = new[]
{
"gs://dataproc-examples/pyspark/hello-world/hello-world.py",
},
ArchiveUris = new[]
{
"https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
"https://storage.googleapis.com/terraform-batches/animals.txt.jar",
"https://storage.googleapis.com/terraform-batches/animals.txt",
},
FileUris = new[]
{
"https://storage.googleapis.com/terraform-batches/people.txt",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewBatch(ctx, "example_batch_pyspark", &dataproc.BatchArgs{
BatchId: pulumi.String("tf-test-batch_74391"),
Location: pulumi.String("us-central1"),
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
},
},
PysparkBatch: &dataproc.BatchPysparkBatchArgs{
MainPythonFileUri: pulumi.String("https://storage.googleapis.com/terraform-batches/test_util.py"),
Args: pulumi.StringArray{
pulumi.String("10"),
},
JarFileUris: pulumi.StringArray{
pulumi.String("file:///usr/lib/spark/examples/jars/spark-examples.jar"),
},
PythonFileUris: pulumi.StringArray{
pulumi.String("gs://dataproc-examples/pyspark/hello-world/hello-world.py"),
},
ArchiveUris: pulumi.StringArray{
pulumi.String("https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked"),
pulumi.String("https://storage.googleapis.com/terraform-batches/animals.txt.jar"),
pulumi.String("https://storage.googleapis.com/terraform-batches/animals.txt"),
},
FileUris: pulumi.StringArray{
pulumi.String("https://storage.googleapis.com/terraform-batches/people.txt"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchPysparkBatchArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleBatchPyspark = new Batch("exampleBatchPyspark", BatchArgs.builder()
.batchId("tf-test-batch_74391")
.location("us-central1")
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.build())
.build())
.pysparkBatch(BatchPysparkBatchArgs.builder()
.mainPythonFileUri("https://storage.googleapis.com/terraform-batches/test_util.py")
.args("10")
.jarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar")
.pythonFileUris("gs://dataproc-examples/pyspark/hello-world/hello-world.py")
.archiveUris(
"https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
"https://storage.googleapis.com/terraform-batches/animals.txt.jar",
"https://storage.googleapis.com/terraform-batches/animals.txt")
.fileUris("https://storage.googleapis.com/terraform-batches/people.txt")
.build())
.build());
}
}
resources:
exampleBatchPyspark:
type: gcp:dataproc:Batch
name: example_batch_pyspark
properties:
batchId: tf-test-batch_74391
location: us-central1
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
pysparkBatch:
mainPythonFileUri: https://storage.googleapis.com/terraform-batches/test_util.py
args:
- '10'
jarFileUris:
- file:///usr/lib/spark/examples/jars/spark-examples.jar
pythonFileUris:
- gs://dataproc-examples/pyspark/hello-world/hello-world.py
archiveUris:
- https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked
- https://storage.googleapis.com/terraform-batches/animals.txt.jar
- https://storage.googleapis.com/terraform-batches/animals.txt
fileUris:
- https://storage.googleapis.com/terraform-batches/people.txt
Dataproc Batch Sparkr
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleBatchSparkr = new gcp.dataproc.Batch("example_batch_sparkr", {
batchId: "tf-test-batch_16511",
location: "us-central1",
labels: {
batch_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
ttl: "3600s",
networkTags: ["tag1"],
},
},
sparkRBatch: {
mainRFileUri: "https://storage.googleapis.com/terraform-batches/spark-r-flights.r",
args: ["https://storage.googleapis.com/terraform-batches/flights.csv"],
},
});
import pulumi
import pulumi_gcp as gcp
example_batch_sparkr = gcp.dataproc.Batch("example_batch_sparkr",
batch_id="tf-test-batch_16511",
location="us-central1",
labels={
"batch_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
"ttl": "3600s",
"network_tags": ["tag1"],
},
},
spark_r_batch={
"main_r_file_uri": "https://storage.googleapis.com/terraform-batches/spark-r-flights.r",
"args": ["https://storage.googleapis.com/terraform-batches/flights.csv"],
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleBatchSparkr = new Gcp.Dataproc.Batch("example_batch_sparkr", new()
{
BatchId = "tf-test-batch_16511",
Location = "us-central1",
Labels =
{
{ "batch_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
Ttl = "3600s",
NetworkTags = new[]
{
"tag1",
},
},
},
SparkRBatch = new Gcp.Dataproc.Inputs.BatchSparkRBatchArgs
{
MainRFileUri = "https://storage.googleapis.com/terraform-batches/spark-r-flights.r",
Args = new[]
{
"https://storage.googleapis.com/terraform-batches/flights.csv",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewBatch(ctx, "example_batch_sparkr", &dataproc.BatchArgs{
BatchId: pulumi.String("tf-test-batch_16511"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"batch_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
Ttl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
},
},
SparkRBatch: &dataproc.BatchSparkRBatchArgs{
MainRFileUri: pulumi.String("https://storage.googleapis.com/terraform-batches/spark-r-flights.r"),
Args: pulumi.StringArray{
pulumi.String("https://storage.googleapis.com/terraform-batches/flights.csv"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchSparkRBatchArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleBatchSparkr = new Batch("exampleBatchSparkr", BatchArgs.builder()
.batchId("tf-test-batch_16511")
.location("us-central1")
.labels(Map.of("batch_test", "terraform"))
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.ttl("3600s")
.networkTags("tag1")
.build())
.build())
.sparkRBatch(BatchSparkRBatchArgs.builder()
.mainRFileUri("https://storage.googleapis.com/terraform-batches/spark-r-flights.r")
.args("https://storage.googleapis.com/terraform-batches/flights.csv")
.build())
.build());
}
}
resources:
exampleBatchSparkr:
type: gcp:dataproc:Batch
name: example_batch_sparkr
properties:
batchId: tf-test-batch_16511
location: us-central1
labels:
batch_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
ttl: 3600s
networkTags:
- tag1
sparkRBatch:
mainRFileUri: https://storage.googleapis.com/terraform-batches/spark-r-flights.r
args:
- https://storage.googleapis.com/terraform-batches/flights.csv
Dataproc Batch Autotuning
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleBatchAutotuning = new gcp.dataproc.Batch("example_batch_autotuning", {
batchId: "tf-test-batch_8493",
location: "us-central1",
labels: {
batch_test: "terraform",
},
runtimeConfig: {
version: "2.2",
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
cohort: "tf-dataproc-batch-example",
autotuningConfig: {
scenarios: [
"SCALING",
"MEMORY",
],
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
ttl: "3600s",
},
},
sparkBatch: {
mainClass: "org.apache.spark.examples.SparkPi",
args: ["10"],
jarFileUris: ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
},
});
import pulumi
import pulumi_gcp as gcp
example_batch_autotuning = gcp.dataproc.Batch("example_batch_autotuning",
batch_id="tf-test-batch_8493",
location="us-central1",
labels={
"batch_test": "terraform",
},
runtime_config={
"version": "2.2",
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
"cohort": "tf-dataproc-batch-example",
"autotuning_config": {
"scenarios": [
"SCALING",
"MEMORY",
],
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
"ttl": "3600s",
},
},
spark_batch={
"main_class": "org.apache.spark.examples.SparkPi",
"args": ["10"],
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
})
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleBatchAutotuning = new Gcp.Dataproc.Batch("example_batch_autotuning", new()
{
BatchId = "tf-test-batch_8493",
Location = "us-central1",
Labels =
{
{ "batch_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigArgs
{
Version = "2.2",
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
Cohort = "tf-dataproc-batch-example",
AutotuningConfig = new Gcp.Dataproc.Inputs.BatchRuntimeConfigAutotuningConfigArgs
{
Scenarios = new[]
{
"SCALING",
"MEMORY",
},
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.BatchEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
Ttl = "3600s",
},
},
SparkBatch = new Gcp.Dataproc.Inputs.BatchSparkBatchArgs
{
MainClass = "org.apache.spark.examples.SparkPi",
Args = new[]
{
"10",
},
JarFileUris = new[]
{
"file:///usr/lib/spark/examples/jars/spark-examples.jar",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewBatch(ctx, "example_batch_autotuning", &dataproc.BatchArgs{
BatchId: pulumi.String("tf-test-batch_8493"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"batch_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.BatchRuntimeConfigArgs{
Version: pulumi.String("2.2"),
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
Cohort: pulumi.String("tf-dataproc-batch-example"),
AutotuningConfig: &dataproc.BatchRuntimeConfigAutotuningConfigArgs{
Scenarios: pulumi.StringArray{
pulumi.String("SCALING"),
pulumi.String("MEMORY"),
},
},
},
EnvironmentConfig: &dataproc.BatchEnvironmentConfigArgs{
ExecutionConfig: &dataproc.BatchEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
Ttl: pulumi.String("3600s"),
},
},
SparkBatch: &dataproc.BatchSparkBatchArgs{
MainClass: pulumi.String("org.apache.spark.examples.SparkPi"),
Args: pulumi.StringArray{
pulumi.String("10"),
},
JarFileUris: pulumi.StringArray{
pulumi.String("file:///usr/lib/spark/examples/jars/spark-examples.jar"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Batch;
import com.pulumi.gcp.dataproc.BatchArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchRuntimeConfigAutotuningConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.BatchSparkBatchArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleBatchAutotuning = new Batch("exampleBatchAutotuning", BatchArgs.builder()
.batchId("tf-test-batch_8493")
.location("us-central1")
.labels(Map.of("batch_test", "terraform"))
.runtimeConfig(BatchRuntimeConfigArgs.builder()
.version("2.2")
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.cohort("tf-dataproc-batch-example")
.autotuningConfig(BatchRuntimeConfigAutotuningConfigArgs.builder()
.scenarios(
"SCALING",
"MEMORY")
.build())
.build())
.environmentConfig(BatchEnvironmentConfigArgs.builder()
.executionConfig(BatchEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.ttl("3600s")
.build())
.build())
.sparkBatch(BatchSparkBatchArgs.builder()
.mainClass("org.apache.spark.examples.SparkPi")
.args("10")
.jarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar")
.build())
.build());
}
}
resources:
exampleBatchAutotuning:
type: gcp:dataproc:Batch
name: example_batch_autotuning
properties:
batchId: tf-test-batch_8493
location: us-central1
labels:
batch_test: terraform
runtimeConfig:
version: '2.2'
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
cohort: tf-dataproc-batch-example
autotuningConfig:
scenarios:
- SCALING
- MEMORY
environmentConfig:
executionConfig:
subnetworkUri: default
ttl: 3600s
sparkBatch:
mainClass: org.apache.spark.examples.SparkPi
args:
- '10'
jarFileUris:
- file:///usr/lib/spark/examples/jars/spark-examples.jar
Import
Batch can be imported using any of these accepted formats:
projects/{{project}}/locations/{{location}}/batches/{{batch_id}}
{{project}}/{{location}}/{{batch_id}}
{{location}}/{{batch_id}}
When using thepulumi import
command, Batch can be imported using one of the formats above. For example:
$ pulumi import gcp:dataproc/batch:Batch default projects/{{project}}/locations/{{location}}/batches/{{batch_id}}
$ pulumi import gcp:dataproc/batch:Batch default {{project}}/{{location}}/{{batch_id}}
$ pulumi import gcp:dataproc/batch:Batch default {{location}}/{{batch_id}}
Constructors
Properties
Environment configuration for the batch execution. Structure is documented below.
PySpark batch config. Structure is documented below.
Runtime configuration for the batch execution. Structure is documented below.
Spark batch config. Structure is documented below.
SparkR batch config. Structure is documented below.
Spark SQL batch config. Structure is documented below. //////