Crawler Args
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
.path("table-name")
.build())
.build());
}
}
JDBC Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.jdbcTargets(CrawlerJdbcTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
S3 Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.example().bucket()))
.build())
.build());
}
}
Catalog Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.catalogTargets(CrawlerCatalogTargetArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.tables(aws_glue_catalog_table.example().name())
.build())
.schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
.deleteBehavior("LOG")
.build())
.configuration("""
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")
.build());
}
}
MongoDB Target Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.mongodbTargets(CrawlerMongodbTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
Configuration Settings Example
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.glue_database().name())
.schedule("cron(0 1 * * ? *)")
.role(aws_iam_role.glue_role().arn())
.tags(var_.tags())
.configuration(serializeJson(
jsonObject(
jsonProperty("Grouping", jsonObject(
jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
)),
jsonProperty("CrawlerOutput", jsonObject(
jsonProperty("Partitions", jsonObject(
jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
))
)),
jsonProperty("Version", 1)
)))
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.data_lake_bucket().bucket()))
.build())
.build());
}
}
Import
Glue Crawlers can be imported using name
, e.g.,
$ pulumi import aws:glue/crawler:Crawler MyJob MyJob
Constructors
Properties
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
Glue database where results are written.
Description of the crawler.
List of nested DynamoDB target arguments. See Dynamodb Target below.
List of nested JBDC target arguments. See JDBC Target below.
Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
List nested MongoDB target arguments. See MongoDB Target below.
A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
List nested Amazon S3 target arguments. See S3 Target below.
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *)
.
Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
The name of Security Configuration to be used by the crawler
The table prefix used for catalog tables that are created.