Crawler Args
data class CrawlerArgs(val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, val classifiers: Output<List<String>>? = null, val configuration: Output<String>? = null, val databaseName: Output<String>? = null, val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, val description: Output<String>? = null, val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, val name: Output<String>? = null, val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, val role: Output<String>? = null, val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, val schedule: Output<String>? = null, val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, val securityConfiguration: Output<String>? = null, val tablePrefix: Output<String>? = null, val tags: Output<Map<String, String>>? = null) : ConvertibleToJava<CrawlerArgs>
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: exampleAwsGlueCatalogDatabase.name,
name: "example",
role: exampleAwsIamRole.arn,
dynamodbTargets: [{
path: "table-name",
}],
});Content copied to clipboard
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=example_aws_glue_catalog_database["name"],
name="example",
role=example_aws_iam_role["arn"],
dynamodb_targets=[aws.glue.CrawlerDynamodbTargetArgs(
path="table-name",
)])Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Name = "example",
Role = exampleAwsIamRole.Arn,
DynamodbTargets = new[]
{
new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
{
Path = "table-name",
},
},
});
});Content copied to clipboard
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Name: pulumi.String("example"),
Role: pulumi.Any(exampleAwsIamRole.Arn),
DynamodbTargets: glue.CrawlerDynamodbTargetArray{
&glue.CrawlerDynamodbTargetArgs{
Path: pulumi.String("table-name"),
},
},
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.name("example")
.role(exampleAwsIamRole.arn())
.dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
.path("table-name")
.build())
.build());
}
}Content copied to clipboard
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${exampleAwsGlueCatalogDatabase.name}
name: example
role: ${exampleAwsIamRole.arn}
dynamodbTargets:
- path: table-nameContent copied to clipboard
JDBC Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: exampleAwsGlueCatalogDatabase.name,
name: "example",
role: exampleAwsIamRole.arn,
jdbcTargets: [{
connectionName: exampleAwsGlueConnection.name,
path: "database-name/%",
}],
});Content copied to clipboard
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=example_aws_glue_catalog_database["name"],
name="example",
role=example_aws_iam_role["arn"],
jdbc_targets=[aws.glue.CrawlerJdbcTargetArgs(
connection_name=example_aws_glue_connection["name"],
path="database-name/%",
)])Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Name = "example",
Role = exampleAwsIamRole.Arn,
JdbcTargets = new[]
{
new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
{
ConnectionName = exampleAwsGlueConnection.Name,
Path = "database-name/%",
},
},
});
});Content copied to clipboard
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Name: pulumi.String("example"),
Role: pulumi.Any(exampleAwsIamRole.Arn),
JdbcTargets: glue.CrawlerJdbcTargetArray{
&glue.CrawlerJdbcTargetArgs{
ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
Path: pulumi.String("database-name/%"),
},
},
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.name("example")
.role(exampleAwsIamRole.arn())
.jdbcTargets(CrawlerJdbcTargetArgs.builder()
.connectionName(exampleAwsGlueConnection.name())
.path("database-name/%")
.build())
.build());
}
}Content copied to clipboard
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${exampleAwsGlueCatalogDatabase.name}
name: example
role: ${exampleAwsIamRole.arn}
jdbcTargets:
- connectionName: ${exampleAwsGlueConnection.name}
path: database-name/%Content copied to clipboard
S3 Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: exampleAwsGlueCatalogDatabase.name,
name: "example",
role: exampleAwsIamRole.arn,
s3Targets: [{
path: `s3://${exampleAwsS3Bucket.bucket}`,
}],
});Content copied to clipboard
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=example_aws_glue_catalog_database["name"],
name="example",
role=example_aws_iam_role["arn"],
s3_targets=[aws.glue.CrawlerS3TargetArgs(
path=f"s3://{example_aws_s3_bucket['bucket']}",
)])Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Name = "example",
Role = exampleAwsIamRole.Arn,
S3Targets = new[]
{
new Aws.Glue.Inputs.CrawlerS3TargetArgs
{
Path = $"s3://{exampleAwsS3Bucket.Bucket}",
},
},
});
});Content copied to clipboard
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Name: pulumi.String("example"),
Role: pulumi.Any(exampleAwsIamRole.Arn),
S3Targets: glue.CrawlerS3TargetArray{
&glue.CrawlerS3TargetArgs{
Path: pulumi.String(fmt.Sprintf("s3://%v", exampleAwsS3Bucket.Bucket)),
},
},
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.name("example")
.role(exampleAwsIamRole.arn())
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", exampleAwsS3Bucket.bucket()))
.build())
.build());
}
}Content copied to clipboard
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${exampleAwsGlueCatalogDatabase.name}
name: example
role: ${exampleAwsIamRole.arn}
s3Targets:
- path: s3://${exampleAwsS3Bucket.bucket}Content copied to clipboard
Catalog Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: exampleAwsGlueCatalogDatabase.name,
name: "example",
role: exampleAwsIamRole.arn,
catalogTargets: [{
databaseName: exampleAwsGlueCatalogDatabase.name,
tables: [exampleAwsGlueCatalogTable.name],
}],
schemaChangePolicy: {
deleteBehavior: "LOG",
},
configuration: `{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
`,
});Content copied to clipboard
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=example_aws_glue_catalog_database["name"],
name="example",
role=example_aws_iam_role["arn"],
catalog_targets=[aws.glue.CrawlerCatalogTargetArgs(
database_name=example_aws_glue_catalog_database["name"],
tables=[example_aws_glue_catalog_table["name"]],
)],
schema_change_policy=aws.glue.CrawlerSchemaChangePolicyArgs(
delete_behavior="LOG",
),
configuration="""{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Name = "example",
Role = exampleAwsIamRole.Arn,
CatalogTargets = new[]
{
new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Tables = new[]
{
exampleAwsGlueCatalogTable.Name,
},
},
},
SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
{
DeleteBehavior = "LOG",
},
Configuration = @"{
""Version"":1.0,
""Grouping"": {
""TableGroupingPolicy"": ""CombineCompatibleSchemas""
}
}
",
});
});Content copied to clipboard
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Name: pulumi.String("example"),
Role: pulumi.Any(exampleAwsIamRole.Arn),
CatalogTargets: glue.CrawlerCatalogTargetArray{
&glue.CrawlerCatalogTargetArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Tables: pulumi.StringArray{
exampleAwsGlueCatalogTable.Name,
},
},
},
SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
DeleteBehavior: pulumi.String("LOG"),
},
Configuration: pulumi.String(`{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
`),
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.name("example")
.role(exampleAwsIamRole.arn())
.catalogTargets(CrawlerCatalogTargetArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.tables(exampleAwsGlueCatalogTable.name())
.build())
.schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
.deleteBehavior("LOG")
.build())
.configuration("""
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")
.build());
}
}Content copied to clipboard
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${exampleAwsGlueCatalogDatabase.name}
name: example
role: ${exampleAwsIamRole.arn}
catalogTargets:
- databaseName: ${exampleAwsGlueCatalogDatabase.name}
tables:
- ${exampleAwsGlueCatalogTable.name}
schemaChangePolicy:
deleteBehavior: LOG
configuration: |
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}Content copied to clipboard
MongoDB Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: exampleAwsGlueCatalogDatabase.name,
name: "example",
role: exampleAwsIamRole.arn,
mongodbTargets: [{
connectionName: exampleAwsGlueConnection.name,
path: "database-name/%",
}],
});Content copied to clipboard
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=example_aws_glue_catalog_database["name"],
name="example",
role=example_aws_iam_role["arn"],
mongodb_targets=[aws.glue.CrawlerMongodbTargetArgs(
connection_name=example_aws_glue_connection["name"],
path="database-name/%",
)])Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = exampleAwsGlueCatalogDatabase.Name,
Name = "example",
Role = exampleAwsIamRole.Arn,
MongodbTargets = new[]
{
new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
{
ConnectionName = exampleAwsGlueConnection.Name,
Path = "database-name/%",
},
},
});
});Content copied to clipboard
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
Name: pulumi.String("example"),
Role: pulumi.Any(exampleAwsIamRole.Arn),
MongodbTargets: glue.CrawlerMongodbTargetArray{
&glue.CrawlerMongodbTargetArgs{
ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
Path: pulumi.String("database-name/%"),
},
},
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(exampleAwsGlueCatalogDatabase.name())
.name("example")
.role(exampleAwsIamRole.arn())
.mongodbTargets(CrawlerMongodbTargetArgs.builder()
.connectionName(exampleAwsGlueConnection.name())
.path("database-name/%")
.build())
.build());
}
}Content copied to clipboard
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${exampleAwsGlueCatalogDatabase.name}
name: example
role: ${exampleAwsIamRole.arn}
mongodbTargets:
- connectionName: ${exampleAwsGlueConnection.name}
path: database-name/%Content copied to clipboard
Configuration Settings Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const eventsCrawler = new aws.glue.Crawler("events_crawler", {
databaseName: glueDatabase.name,
schedule: "cron(0 1 * * ? *)",
name: `events_crawler_${environmentName}`,
role: glueRole.arn,
tags: tags,
configuration: JSON.stringify({
Grouping: {
TableGroupingPolicy: "CombineCompatibleSchemas",
},
CrawlerOutput: {
Partitions: {
AddOrUpdateBehavior: "InheritFromTable",
},
},
Version: 1,
}),
s3Targets: [{
path: `s3://${dataLakeBucket.bucket}`,
}],
});Content copied to clipboard
import pulumi
import json
import pulumi_aws as aws
events_crawler = aws.glue.Crawler("events_crawler",
database_name=glue_database["name"],
schedule="cron(0 1 * * ? *)",
name=f"events_crawler_{environment_name}",
role=glue_role["arn"],
tags=tags,
configuration=json.dumps({
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas",
},
"CrawlerOutput": {
"Partitions": {
"AddOrUpdateBehavior": "InheritFromTable",
},
},
"Version": 1,
}),
s3_targets=[aws.glue.CrawlerS3TargetArgs(
path=f"s3://{data_lake_bucket['bucket']}",
)])Content copied to clipboard
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var eventsCrawler = new Aws.Glue.Crawler("events_crawler", new()
{
DatabaseName = glueDatabase.Name,
Schedule = "cron(0 1 * * ? *)",
Name = $"events_crawler_{environmentName}",
Role = glueRole.Arn,
Tags = tags,
Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
{
["Grouping"] = new Dictionary<string, object?>
{
["TableGroupingPolicy"] = "CombineCompatibleSchemas",
},
["CrawlerOutput"] = new Dictionary<string, object?>
{
["Partitions"] = new Dictionary<string, object?>
{
["AddOrUpdateBehavior"] = "InheritFromTable",
},
},
["Version"] = 1,
}),
S3Targets = new[]
{
new Aws.Glue.Inputs.CrawlerS3TargetArgs
{
Path = $"s3://{dataLakeBucket.Bucket}",
},
},
});
});Content copied to clipboard
package main
import (
"encoding/json"
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
tmpJSON0, err := json.Marshal(map[string]interface{}{
"Grouping": map[string]interface{}{
"TableGroupingPolicy": "CombineCompatibleSchemas",
},
"CrawlerOutput": map[string]interface{}{
"Partitions": map[string]interface{}{
"AddOrUpdateBehavior": "InheritFromTable",
},
},
"Version": 1,
})
if err != nil {
return err
}
json0 := string(tmpJSON0)
_, err = glue.NewCrawler(ctx, "events_crawler", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(glueDatabase.Name),
Schedule: pulumi.String("cron(0 1 * * ? *)"),
Name: pulumi.String(fmt.Sprintf("events_crawler_%v", environmentName)),
Role: pulumi.Any(glueRole.Arn),
Tags: pulumi.Any(tags),
Configuration: pulumi.String(json0),
S3Targets: glue.CrawlerS3TargetArray{
&glue.CrawlerS3TargetArgs{
Path: pulumi.String(fmt.Sprintf("s3://%v", dataLakeBucket.Bucket)),
},
},
})
if err != nil {
return err
}
return nil
})
}Content copied to clipboard
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
.databaseName(glueDatabase.name())
.schedule("cron(0 1 * * ? *)")
.name(String.format("events_crawler_%s", environmentName))
.role(glueRole.arn())
.tags(tags)
.configuration(serializeJson(
jsonObject(
jsonProperty("Grouping", jsonObject(
jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
)),
jsonProperty("CrawlerOutput", jsonObject(
jsonProperty("Partitions", jsonObject(
jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
))
)),
jsonProperty("Version", 1)
)))
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", dataLakeBucket.bucket()))
.build())
.build());
}
}Content copied to clipboard
resources:
eventsCrawler:
type: aws:glue:Crawler
name: events_crawler
properties:
databaseName: ${glueDatabase.name}
schedule: cron(0 1 * * ? *)
name: events_crawler_${environmentName}
role: ${glueRole.arn}
tags: ${tags}
configuration:
fn::toJSON:
Grouping:
TableGroupingPolicy: CombineCompatibleSchemas
CrawlerOutput:
Partitions:
AddOrUpdateBehavior: InheritFromTable
Version: 1
s3Targets:
- path: s3://${dataLakeBucket.bucket}Content copied to clipboard
Import
Using pulumi import, import Glue Crawlers using name. For example:
$ pulumi import aws:glue/crawler:Crawler MyJob MyJobContent copied to clipboard
Constructors
Link copied to clipboard
fun CrawlerArgs(catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, classifiers: Output<List<String>>? = null, configuration: Output<String>? = null, databaseName: Output<String>? = null, deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, description: Output<String>? = null, dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, name: Output<String>? = null, recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, role: Output<String>? = null, s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, schedule: Output<String>? = null, schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, securityConfiguration: Output<String>? = null, tablePrefix: Output<String>? = null, tags: Output<Map<String, String>>? = null)
Functions
Properties
Link copied to clipboard
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
Link copied to clipboard
Link copied to clipboard
Link copied to clipboard
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
Link copied to clipboard