pulumi-aws-kotlin/com.pulumi.aws.glue.kotlin/CrawlerArgs

CrawlerArgs

data class CrawlerArgs(val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, val classifiers: Output<List<String>>? = null, val configuration: Output<String>? = null, val databaseName: Output<String>? = null, val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, val description: Output<String>? = null, val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, val name: Output<String>? = null, val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, val role: Output<String>? = null, val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, val schedule: Output<String>? = null, val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, val securityConfiguration: Output<String>? = null, val tablePrefix: Output<String>? = null, val tags: Output<Map<String, String>>? = null) : ConvertibleToJava<CrawlerArgs>

Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide

Example Usage

DynamoDB Target Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    dynamodbTargets: [{
        path: "table-name",
    }],
});

import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    dynamodb_targets=[{
        "path": "table-name",
    }])

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        DynamodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
            {
                Path = "table-name",
            },
        },
    });
});

package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			DynamodbTargets: glue.CrawlerDynamodbTargetArray{
				&glue.CrawlerDynamodbTargetArgs{
					Path: pulumi.String("table-name"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
                .path("table-name")
                .build())
            .build());
    }
}

resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      dynamodbTargets:
        - path: table-name

JDBC Target Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    jdbcTargets: [{
        connectionName: exampleAwsGlueConnection.name,
        path: "database-name/%",
    }],
});

import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    jdbc_targets=[{
        "connection_name": example_aws_glue_connection["name"],
        "path": "database-name/%",
    }])

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        JdbcTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
            {
                ConnectionName = exampleAwsGlueConnection.Name,
                Path = "database-name/%",
            },
        },
    });
});

package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			JdbcTargets: glue.CrawlerJdbcTargetArray{
				&glue.CrawlerJdbcTargetArgs{
					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
					Path:           pulumi.String("database-name/%"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .jdbcTargets(CrawlerJdbcTargetArgs.builder()
                .connectionName(exampleAwsGlueConnection.name())
                .path("database-name/%")
                .build())
            .build());
    }
}

resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      jdbcTargets:
        - connectionName: ${exampleAwsGlueConnection.name}
          path: database-name/%

S3 Target Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    s3Targets: [{
        path: `s3://${exampleAwsS3Bucket.bucket}`,
    }],
});

import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    s3_targets=[{
        "path": f"s3://{example_aws_s3_bucket['bucket']}",
    }])

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        S3Targets = new[]
        {
            new Aws.Glue.Inputs.CrawlerS3TargetArgs
            {
                Path = $"s3://{exampleAwsS3Bucket.Bucket}",
            },
        },
    });
});

package main
import (
	"fmt"
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.Sprintf("s3://%v", exampleAwsS3Bucket.Bucket),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", exampleAwsS3Bucket.bucket()))
                .build())
            .build());
    }
}

resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      s3Targets:
        - path: s3://${exampleAwsS3Bucket.bucket}

Catalog Target Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    catalogTargets: [{
        databaseName: exampleAwsGlueCatalogDatabase.name,
        tables: [exampleAwsGlueCatalogTable&#46;name],
    }],
    schemaChangePolicy: {
        deleteBehavior: "LOG",
    },
    configuration: `{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
`,
});

import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    catalog_targets=[{
        "database_name": example_aws_glue_catalog_database["name"],
        "tables": [example_aws_glue_catalog_table["name"]],
    }],
    schema_change_policy={
        "delete_behavior": "LOG",
    },
    configuration="""{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
""")

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        CatalogTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
            {
                DatabaseName = exampleAwsGlueCatalogDatabase.Name,
                Tables = new[]
                {
                    exampleAwsGlueCatalogTable.Name,
                },
            },
        },
        SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
        {
            DeleteBehavior = "LOG",
        },
        Configuration = @"{
  ""Version"":1.0,
  ""Grouping"": {
    ""TableGroupingPolicy"": ""CombineCompatibleSchemas""
  }
}
",
    });
});

package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			CatalogTargets: glue.CrawlerCatalogTargetArray{
				&glue.CrawlerCatalogTargetArgs{
					DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
					Tables: pulumi.StringArray{
						exampleAwsGlueCatalogTable.Name,
					},
				},
			},
			SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
				DeleteBehavior: pulumi.String("LOG"),
			},
			Configuration: pulumi.String(`{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
`),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .catalogTargets(CrawlerCatalogTargetArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .tables(exampleAwsGlueCatalogTable.name())
                .build())
            .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
                .deleteBehavior("LOG")
                .build())
            .configuration("""
{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
            """)
            .build());
    }
}

resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      catalogTargets:
        - databaseName: ${exampleAwsGlueCatalogDatabase.name}
          tables:
            - ${exampleAwsGlueCatalogTable.name}
      schemaChangePolicy:
        deleteBehavior: LOG
      configuration: |
        {
          "Version":1.0,
          "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas"
          }
        }

MongoDB Target Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    mongodbTargets: [{
        connectionName: exampleAwsGlueConnection.name,
        path: "database-name/%",
    }],
});

import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    mongodb_targets=[{
        "connection_name": example_aws_glue_connection["name"],
        "path": "database-name/%",
    }])

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        MongodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
            {
                ConnectionName = exampleAwsGlueConnection.Name,
                Path = "database-name/%",
            },
        },
    });
});

package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			MongodbTargets: glue.CrawlerMongodbTargetArray{
				&glue.CrawlerMongodbTargetArgs{
					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
					Path:           pulumi.String("database-name/%"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .mongodbTargets(CrawlerMongodbTargetArgs.builder()
                .connectionName(exampleAwsGlueConnection.name())
                .path("database-name/%")
                .build())
            .build());
    }
}

resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      mongodbTargets:
        - connectionName: ${exampleAwsGlueConnection.name}
          path: database-name/%

Configuration Settings Example

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const eventsCrawler = new aws.glue.Crawler("events_crawler", {
    databaseName: glueDatabase.name,
    schedule: "cron(0 1 * * ? *)",
    name: `events_crawler_${environmentName}`,
    role: glueRole.arn,
    tags: tags,
    configuration: JSON.stringify({
        Grouping: {
            TableGroupingPolicy: "CombineCompatibleSchemas",
        },
        CrawlerOutput: {
            Partitions: {
                AddOrUpdateBehavior: "InheritFromTable",
            },
        },
        Version: 1,
    }),
    s3Targets: [{
        path: `s3://${dataLakeBucket.bucket}`,
    }],
});

import pulumi
import json
import pulumi_aws as aws
events_crawler = aws.glue.Crawler("events_crawler",
    database_name=glue_database["name"],
    schedule="cron(0 1 * * ? *)",
    name=f"events_crawler_{environment_name}",
    role=glue_role["arn"],
    tags=tags,
    configuration=json.dumps({
        "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas",
        },
        "CrawlerOutput": {
            "Partitions": {
                "AddOrUpdateBehavior": "InheritFromTable",
            },
        },
        "Version": 1,
    }),
    s3_targets=[{
        "path": f"s3://{data_lake_bucket['bucket']}",
    }])

using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
    var eventsCrawler = new Aws.Glue.Crawler("events_crawler", new()
    {
        DatabaseName = glueDatabase.Name,
        Schedule = "cron(0 1 * * ? *)",
        Name = $"events_crawler_{environmentName}",
        Role = glueRole.Arn,
        Tags = tags,
        Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
        {
            ["Grouping"] = new Dictionary<string, object?>
            {
                ["TableGroupingPolicy"] = "CombineCompatibleSchemas",
            },
            ["CrawlerOutput"] = new Dictionary<string, object?>
            {
                ["Partitions"] = new Dictionary<string, object?>
                {
                    ["AddOrUpdateBehavior"] = "InheritFromTable",
                },
            },
            ["Version"] = 1,
        }),
        S3Targets = new[]
        {
            new Aws.Glue.Inputs.CrawlerS3TargetArgs
            {
                Path = $"s3://{dataLakeBucket.Bucket}",
            },
        },
    });
});

package main
import (
	"encoding/json"
	"fmt"
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		tmpJSON0, err := json.Marshal(map[string]interface{}{
			"Grouping": map[string]interface{}{
				"TableGroupingPolicy": "CombineCompatibleSchemas",
			},
			"CrawlerOutput": map[string]interface{}{
				"Partitions": map[string]interface{}{
					"AddOrUpdateBehavior": "InheritFromTable",
				},
			},
			"Version": 1,
		})
		if err != nil {
			return err
		}
		json0 := string(tmpJSON0)
		_, err = glue.NewCrawler(ctx, "events_crawler", &glue.CrawlerArgs{
			DatabaseName:  pulumi.Any(glueDatabase.Name),
			Schedule:      pulumi.String("cron(0 1 * * ? *)"),
			Name:          pulumi.Sprintf("events_crawler_%v", environmentName),
			Role:          pulumi.Any(glueRole.Arn),
			Tags:          pulumi.Any(tags),
			Configuration: pulumi.String(json0),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.Sprintf("s3://%v", dataLakeBucket.Bucket),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
            .databaseName(glueDatabase.name())
            .schedule("cron(0 1 * * ? *)")
            .name(String.format("events_crawler_%s", environmentName))
            .role(glueRole.arn())
            .tags(tags)
            .configuration(serializeJson(
                jsonObject(
                    jsonProperty("Grouping", jsonObject(
                        jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
                    )),
                    jsonProperty("CrawlerOutput", jsonObject(
                        jsonProperty("Partitions", jsonObject(
                            jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
                        ))
                    )),
                    jsonProperty("Version", 1)
                )))
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", dataLakeBucket.bucket()))
                .build())
            .build());
    }
}

resources:
  eventsCrawler:
    type: aws:glue:Crawler
    name: events_crawler
    properties:
      databaseName: ${glueDatabase.name}
      schedule: cron(0 1 * * ? *)
      name: events_crawler_${environmentName}
      role: ${glueRole.arn}
      tags: ${tags}
      configuration:
        fn::toJSON:
          Grouping:
            TableGroupingPolicy: CombineCompatibleSchemas
          CrawlerOutput:
            Partitions:
              AddOrUpdateBehavior: InheritFromTable
          Version: 1
      s3Targets:
        - path: s3://${dataLakeBucket.bucket}

Import

Using pulumi import, import Glue Crawlers using name. For example:

$ pulumi import aws:glue/crawler:Crawler MyJob MyJob

Constructors

CrawlerArgs

constructor(catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null, classifiers: Output<List<String>>? = null, configuration: Output<String>? = null, databaseName: Output<String>? = null, deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null, description: Output<String>? = null, dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null, hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null, icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null, jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null, lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null, lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null, mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null, name: Output<String>? = null, recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null, role: Output<String>? = null, s3Targets: Output<List<CrawlerS3TargetArgs>>? = null, schedule: Output<String>? = null, schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null, securityConfiguration: Output<String>? = null, tablePrefix: Output<String>? = null, tags: Output<Map<String, String>>? = null)

Properties

catalogTargets

val catalogTargets: Output<List<CrawlerCatalogTargetArgs>>? = null

List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.

classifiers

val classifiers: Output<List<String>>? = null

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration

val configuration: Output<String>? = null

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

databaseName

val databaseName: Output<String>? = null

Glue database where results are written.

deltaTargets

val deltaTargets: Output<List<CrawlerDeltaTargetArgs>>? = null

List of nested Delta Lake target arguments. See Delta Target below.

description

val description: Output<String>? = null

Description of the crawler.

dynamodbTargets

val dynamodbTargets: Output<List<CrawlerDynamodbTargetArgs>>? = null

List of nested DynamoDB target arguments. See Dynamodb Target below.

hudiTargets

val hudiTargets: Output<List<CrawlerHudiTargetArgs>>? = null

List of nested Hudi target arguments. See Iceberg Target below.

icebergTargets

val icebergTargets: Output<List<CrawlerIcebergTargetArgs>>? = null

List of nested Iceberg target arguments. See Iceberg Target below.

jdbcTargets

val jdbcTargets: Output<List<CrawlerJdbcTargetArgs>>? = null

List of nested JDBC target arguments. See JDBC Target below.

lakeFormationConfiguration

val lakeFormationConfiguration: Output<CrawlerLakeFormationConfigurationArgs>? = null

Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.

lineageConfiguration

val lineageConfiguration: Output<CrawlerLineageConfigurationArgs>? = null

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets

val mongodbTargets: Output<List<CrawlerMongodbTargetArgs>>? = null

List of nested MongoDB target arguments. See MongoDB Target below.

name

val name: Output<String>? = null

Name of the crawler.

recrawlPolicy

val recrawlPolicy: Output<CrawlerRecrawlPolicyArgs>? = null

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role

val role: Output<String>? = null

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets

val s3Targets: Output<List<CrawlerS3TargetArgs>>? = null

List of nested Amazon S3 target arguments. See S3 Target below.

schedule

val schedule: Output<String>? = null

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy

val schemaChangePolicy: Output<CrawlerSchemaChangePolicyArgs>? = null

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration

val securityConfiguration: Output<String>? = null

The name of Security Configuration to be used by the crawler

tablePrefix

val tablePrefix: Output<String>? = null

The table prefix used for catalog tables that are created.

Functions

toJava

open override fun toJava(): CrawlerArgs