pulumi-aws-kotlin/com.pulumi.aws.glue.kotlin/CrawlerArgs

CrawlerArgs

data class CrawlerArgs(val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, val classifiers: Output<List<String>>? = null, val configuration: Output<String>? = null, val databaseName: Output<String>? = null, val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, val description: Output<String>? = null, val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, val name: Output<String>? = null, val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, val role: Output<String>? = null, val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, val schedule: Output<String>? = null, val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, val securityConfiguration: Output<String>? = null, val tablePrefix: Output<String>? = null, val tags: Output<Map<String, String>>? = null) : ConvertibleToJava<CrawlerArgs>

Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide

Example Usage

DynamoDB Target Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
                .path("table-name")
                .build())
            .build());
    }
}

JDBC Target Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .jdbcTargets(CrawlerJdbcTargetArgs.builder()
                .connectionName(aws_glue_connection.example().name())
                .path("database-name/%")
                .build())
            .build());
    }
}

S3 Target Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", aws_s3_bucket.example().bucket()))
                .build())
            .build());
    }
}

Catalog Target Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .catalogTargets(CrawlerCatalogTargetArgs.builder()
                .databaseName(aws_glue_catalog_database.example().name())
                .tables(aws_glue_catalog_table.example().name())
                .build())
            .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
                .deleteBehavior("LOG")
                .build())
            .configuration("""
{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
            """)
            .build());
    }
}

MongoDB Target Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .mongodbTargets(CrawlerMongodbTargetArgs.builder()
                .connectionName(aws_glue_connection.example().name())
                .path("database-name/%")
                .build())
            .build());
    }
}

Configuration Settings Example

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
            .databaseName(aws_glue_catalog_database.glue_database().name())
            .schedule("cron(0 1 * * ? *)")
            .role(aws_iam_role.glue_role().arn())
            .tags(var_.tags())
            .configuration(serializeJson(
                jsonObject(
                    jsonProperty("Grouping", jsonObject(
                        jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
                    )),
                    jsonProperty("CrawlerOutput", jsonObject(
                        jsonProperty("Partitions", jsonObject(
                            jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
                        ))
                    )),
                    jsonProperty("Version", 1)
                )))
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", aws_s3_bucket.data_lake_bucket().bucket()))
                .build())
            .build());
    }
}

Import

Using pulumi import, import Glue Crawlers using name. For example:

$ pulumi import aws:glue/crawler:Crawler MyJob MyJob

Constructors

CrawlerArgs

fun CrawlerArgs(catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, classifiers: Output<List<String>>? = null, configuration: Output<String>? = null, databaseName: Output<String>? = null, deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, description: Output<String>? = null, dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, name: Output<String>? = null, recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, role: Output<String>? = null, s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, schedule: Output<String>? = null, schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, securityConfiguration: Output<String>? = null, tablePrefix: Output<String>? = null, tags: Output<Map<String, String>>? = null)

Functions

toJava

open override fun toJava(): CrawlerArgs

Properties

catalogTargets

val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null

classifiers

val classifiers: Output<List<String>>? = null

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration

val configuration: Output<String>? = null

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

databaseName

val databaseName: Output<String>? = null

Glue database where results are written.

deltaTargets

val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null

List of nested Delta Lake target arguments. See Delta Target below.

description

val description: Output<String>? = null

Description of the crawler.

dynamodbTargets

val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null

List of nested DynamoDB target arguments. See Dynamodb Target below.

hudiTargets

val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null

List nested Hudi target arguments. See Iceberg Target below.

icebergTargets

val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null

List nested Iceberg target arguments. See Iceberg Target below.

jdbcTargets

val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null

List of nested JBDC target arguments. See JDBC Target below.

lakeFormationConfiguration

val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null

Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.

lineageConfiguration

val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets

val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null

List nested MongoDB target arguments. See MongoDB Target below.

name

val name: Output<String>? = null

Name of the crawler.

recrawlPolicy

val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role

val role: Output<String>? = null

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets

val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null

List nested Amazon S3 target arguments. See S3 Target below.

schedule

val schedule: Output<String>? = null

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy

val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration

val securityConfiguration: Output<String>? = null

The name of Security Configuration to be used by the crawler

tablePrefix

val tablePrefix: Output<String>? = null

The table prefix used for catalog tables that are created.