MLTransform Args
Provides a Glue ML Transform resource.
Example Usage
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.CatalogDatabase;
import com.pulumi.aws.glue.CatalogDatabaseArgs;
import com.pulumi.aws.glue.CatalogTable;
import com.pulumi.aws.glue.CatalogTableArgs;
import com.pulumi.aws.glue.inputs.CatalogTableStorageDescriptorArgs;
import com.pulumi.aws.glue.inputs.CatalogTableStorageDescriptorSerDeInfoArgs;
import com.pulumi.aws.glue.inputs.CatalogTableStorageDescriptorSkewedInfoArgs;
import com.pulumi.aws.glue.inputs.CatalogTablePartitionKeyArgs;
import com.pulumi.aws.glue.MLTransform;
import com.pulumi.aws.glue.MLTransformArgs;
import com.pulumi.aws.glue.inputs.MLTransformInputRecordTableArgs;
import com.pulumi.aws.glue.inputs.MLTransformParametersArgs;
import com.pulumi.aws.glue.inputs.MLTransformParametersFindMatchesParametersArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var testCatalogDatabase = new CatalogDatabase("testCatalogDatabase", CatalogDatabaseArgs.builder()
.name("example")
.build());
var testCatalogTable = new CatalogTable("testCatalogTable", CatalogTableArgs.builder()
.name("example")
.databaseName(testCatalogDatabase.name())
.owner("my_owner")
.retention(1)
.tableType("VIRTUAL_VIEW")
.viewExpandedText("view_expanded_text_1")
.viewOriginalText("view_original_text_1")
.storageDescriptor(CatalogTableStorageDescriptorArgs.builder()
.bucketColumns("bucket_column_1")
.compressed(false)
.inputFormat("SequenceFileInputFormat")
.location("my_location")
.numberOfBuckets(1)
.outputFormat("SequenceFileInputFormat")
.storedAsSubDirectories(false)
.parameters(Map.of("param1", "param1_val"))
.columns(
CatalogTableStorageDescriptorColumnArgs.builder()
.name("my_column_1")
.type("int")
.comment("my_column1_comment")
.build(),
CatalogTableStorageDescriptorColumnArgs.builder()
.name("my_column_2")
.type("string")
.comment("my_column2_comment")
.build())
.serDeInfo(CatalogTableStorageDescriptorSerDeInfoArgs.builder()
.name("ser_de_name")
.parameters(Map.of("param1", "param_val_1"))
.serializationLibrary("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")
.build())
.sortColumns(CatalogTableStorageDescriptorSortColumnArgs.builder()
.column("my_column_1")
.sortOrder(1)
.build())
.skewedInfo(CatalogTableStorageDescriptorSkewedInfoArgs.builder()
.skewedColumnNames("my_column_1")
.skewedColumnValueLocationMaps(Map.of("my_column_1", "my_column_1_val_loc_map"))
.skewedColumnValues("skewed_val_1")
.build())
.build())
.partitionKeys(
CatalogTablePartitionKeyArgs.builder()
.name("my_column_1")
.type("int")
.comment("my_column_1_comment")
.build(),
CatalogTablePartitionKeyArgs.builder()
.name("my_column_2")
.type("string")
.comment("my_column_2_comment")
.build())
.parameters(Map.of("param1", "param1_val"))
.build());
var testMLTransform = new MLTransform("testMLTransform", MLTransformArgs.builder()
.roleArn(aws_iam_role.test().arn())
.inputRecordTables(MLTransformInputRecordTableArgs.builder()
.databaseName(testCatalogTable.databaseName())
.tableName(testCatalogTable.name())
.build())
.parameters(MLTransformParametersArgs.builder()
.transformType("FIND_MATCHES")
.findMatchesParameters(MLTransformParametersFindMatchesParametersArgs.builder()
.primaryKeyColumnName("my_column_1")
.build())
.build())
.build(), CustomResourceOptions.builder()
.dependsOn(aws_iam_role_policy_attachment.test())
.build());
}
}
Import
Glue ML Transforms can be imported using id
, e.g.,
$ pulumi import aws:glue/mLTransform:MLTransform example tfm-c2cafbe83b1c575f49eaca9939220e2fcd58e2d5
Constructors
Properties
Description of the ML Transform.
The version of glue to use, for example "1.0". For information about available versions, see the AWS Glue Release Notes.
A list of AWS Glue table definitions used by the transform. see Input Record Tables.
The number of AWS Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2
to 100
DPUs; the default is 10
. max_capacity
is a mutually exclusive option with number_of_workers
and worker_type
.
The maximum number of times to retry this ML Transform if it fails.
The number of workers of a defined worker_type
that are allocated when an ML Transform runs. Required with worker_type
.
The algorithmic parameters that are specific to the transform type used. Conditionally dependent on the transform type. see Parameters.
The type of predefined worker that is allocated when an ML Transform runs. Accepts a value of Standard
, G.1X
, or G.2X
. Required with number_of_workers
.