Crawler Args
data class CrawlerArgs(val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, val classifiers: Output<List<String>>? = null, val configuration: Output<String>? = null, val databaseName: Output<String>? = null, val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, val description: Output<String>? = null, val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, val name: Output<String>? = null, val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, val role: Output<String>? = null, val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, val schedule: Output<String>? = null, val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, val securityConfiguration: Output<String>? = null, val tablePrefix: Output<String>? = null, val tags: Output<Map<String, String>>? = null) : ConvertibleToJava<CrawlerArgs>
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
.path("table-name")
.build())
.build());
}
}
Content copied to clipboard
JDBC Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.jdbcTargets(CrawlerJdbcTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
Content copied to clipboard
S3 Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.example().bucket()))
.build())
.build());
}
}
Content copied to clipboard
Catalog Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.catalogTargets(CrawlerCatalogTargetArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.tables(aws_glue_catalog_table.example().name())
.build())
.schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
.deleteBehavior("LOG")
.build())
.configuration("""
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")
.build());
}
}
Content copied to clipboard
MongoDB Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.mongodbTargets(CrawlerMongodbTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
Content copied to clipboard
Configuration Settings Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.glue_database().name())
.schedule("cron(0 1 * * ? *)")
.role(aws_iam_role.glue_role().arn())
.tags(var_.tags())
.configuration(serializeJson(
jsonObject(
jsonProperty("Grouping", jsonObject(
jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
)),
jsonProperty("CrawlerOutput", jsonObject(
jsonProperty("Partitions", jsonObject(
jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
))
)),
jsonProperty("Version", 1)
)))
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.data_lake_bucket().bucket()))
.build())
.build());
}
}
Content copied to clipboard
Import
Using pulumi import
, import Glue Crawlers using name
. For example:
$ pulumi import aws:glue/crawler:Crawler MyJob MyJob
Content copied to clipboard
Constructors
Link copied to clipboard
fun CrawlerArgs(catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, classifiers: Output<List<String>>? = null, configuration: Output<String>? = null, databaseName: Output<String>? = null, deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, description: Output<String>? = null, dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, name: Output<String>? = null, recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, role: Output<String>? = null, s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, schedule: Output<String>? = null, schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, securityConfiguration: Output<String>? = null, tablePrefix: Output<String>? = null, tags: Output<Map<String, String>>? = null)
Functions
Properties
Link copied to clipboard
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
Link copied to clipboard
Link copied to clipboard
Link copied to clipboard
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *)
.
Link copied to clipboard