Datascan
Represents a user-visible job which provides the insights for the related data source. To get more information about Datascan, see:
How-to Guides
Example Usage
Dataplex Datascan Basic Profile
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicProfile = new gcp.dataplex.Datascan("basic_profile", {
location: "us-central1",
dataScanId: "dataprofile-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataProfileSpec: {},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_profile = gcp.dataplex.Datascan("basic_profile",
location="us-central1",
data_scan_id="dataprofile-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_profile_spec={},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicProfile = new Gcp.DataPlex.Datascan("basic_profile", new()
{
Location = "us-central1",
DataScanId = "dataprofile-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataProfileSpec = null,
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataprofile-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: &dataplex.DatascanExecutionSpecTriggerOnDemandArgs{},
},
},
DataProfileSpec: &dataplex.DatascanDataProfileSpecArgs{},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicProfile = new Datascan("basicProfile", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataprofile-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand(DatascanExecutionSpecTriggerOnDemandArgs.builder()
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.build())
.project("my-project-name")
.build());
}
}
resources:
basicProfile:
type: gcp:dataplex:Datascan
name: basic_profile
properties:
location: us-central1
dataScanId: dataprofile-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataProfileSpec: {}
project: my-project-name
Dataplex Datascan Full Profile
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const source = new gcp.bigquery.Dataset("source", {
datasetId: "dataplex_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
deleteContentsOnDestroy: true,
});
const fullProfile = new gcp.dataplex.Datascan("full_profile", {
location: "us-central1",
displayName: "Full Datascan Profile",
dataScanId: "dataprofile-full",
description: "Example resource - Full Datascan Profile",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
},
dataProfileSpec: {
samplingPercent: 80,
rowFilter: "word_count 10",
includeFields: {
fieldNames: ["word_count"],
},
excludeFields: {
fieldNames: ["property_type"],
},
postScanActions: {
bigqueryExport: {
resultsTable: "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project: "my-project-name",
}, {
dependsOn: [source],
});
import pulumi
import pulumi_gcp as gcp
source = gcp.bigquery.Dataset("source",
dataset_id="dataplex_dataset",
friendly_name="test",
description="This is a test description",
location="US",
delete_contents_on_destroy=True)
full_profile = gcp.dataplex.Datascan("full_profile",
location="us-central1",
display_name="Full Datascan Profile",
data_scan_id="dataprofile-full",
description="Example resource - Full Datascan Profile",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
},
data_profile_spec={
"sampling_percent": 80,
"row_filter": "word_count 10",
"include_fields": {
"field_names": ["word_count"],
},
"exclude_fields": {
"field_names": ["property_type"],
},
"post_scan_actions": {
"bigquery_export": {
"results_table": "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project="my-project-name",
opts = pulumi.ResourceOptions(depends_on=[source]))
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var source = new Gcp.BigQuery.Dataset("source", new()
{
DatasetId = "dataplex_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DeleteContentsOnDestroy = true,
});
var fullProfile = new Gcp.DataPlex.Datascan("full_profile", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Profile",
DataScanId = "dataprofile-full",
Description = "Example resource - Full Datascan Profile",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
},
DataProfileSpec = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecArgs
{
SamplingPercent = 80,
RowFilter = "word_count 10",
IncludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecIncludeFieldsArgs
{
FieldNames = new[]
{
"word_count",
},
},
ExcludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecExcludeFieldsArgs
{
FieldNames = new[]
{
"property_type",
},
},
PostScanActions = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsArgs
{
BigqueryExport = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs
{
ResultsTable = "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
Project = "my-project-name",
}, new CustomResourceOptions
{
DependsOn =
{
source,
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
source, err := bigquery.NewDataset(ctx, "source", &bigquery.DatasetArgs{
DatasetId: pulumi.String("dataplex_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DeleteContentsOnDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = dataplex.NewDatascan(ctx, "full_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Profile"),
DataScanId: pulumi.String("dataprofile-full"),
Description: pulumi.String("Example resource - Full Datascan Profile"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
},
DataProfileSpec: &dataplex.DatascanDataProfileSpecArgs{
SamplingPercent: pulumi.Float64(80),
RowFilter: pulumi.String("word_count 10"),
IncludeFields: &dataplex.DatascanDataProfileSpecIncludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("word_count"),
},
},
ExcludeFields: &dataplex.DatascanDataProfileSpecExcludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("property_type"),
},
},
PostScanActions: &dataplex.DatascanDataProfileSpecPostScanActionsArgs{
BigqueryExport: &dataplex.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs{
ResultsTable: pulumi.String("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export"),
},
},
},
Project: pulumi.String("my-project-name"),
}, pulumi.DependsOn([]pulumi.Resource{
source,
}))
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecIncludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecExcludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var source = new Dataset("source", DatasetArgs.builder()
.datasetId("dataplex_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.deleteContentsOnDestroy(true)
.build());
var fullProfile = new Datascan("fullProfile", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Profile")
.dataScanId("dataprofile-full")
.description("Example resource - Full Datascan Profile")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.samplingPercent(80.0)
.rowFilter("word_count 10")
.includeFields(DatascanDataProfileSpecIncludeFieldsArgs.builder()
.fieldNames("word_count")
.build())
.excludeFields(DatascanDataProfileSpecExcludeFieldsArgs.builder()
.fieldNames("property_type")
.build())
.postScanActions(DatascanDataProfileSpecPostScanActionsArgs.builder()
.bigqueryExport(DatascanDataProfileSpecPostScanActionsBigqueryExportArgs.builder()
.resultsTable("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export")
.build())
.build())
.build())
.project("my-project-name")
.build(), CustomResourceOptions.builder()
.dependsOn(source)
.build());
}
}
resources:
fullProfile:
type: gcp:dataplex:Datascan
name: full_profile
properties:
location: us-central1
displayName: Full Datascan Profile
dataScanId: dataprofile-full
description: Example resource - Full Datascan Profile
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
dataProfileSpec:
samplingPercent: 80
rowFilter: word_count 10
includeFields:
fieldNames:
- word_count
excludeFields:
fieldNames:
- property_type
postScanActions:
bigqueryExport:
resultsTable: //bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export
project: my-project-name
options:
dependsOn:
- ${source}
source:
type: gcp:bigquery:Dataset
properties:
datasetId: dataplex_dataset
friendlyName: test
description: This is a test description
location: US
deleteContentsOnDestroy: true
Dataplex Datascan Basic Quality
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicQuality = new gcp.dataplex.Datascan("basic_quality", {
location: "us-central1",
dataScanId: "dataquality-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataQualitySpec: {
rules: [{
dimension: "VALIDITY",
name: "rule1",
description: "rule 1 for validity dimension",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
}],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_quality = gcp.dataplex.Datascan("basic_quality",
location="us-central1",
data_scan_id="dataquality-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_quality_spec={
"rules": [{
"dimension": "VALIDITY",
"name": "rule1",
"description": "rule 1 for validity dimension",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
}],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicQuality = new Gcp.DataPlex.Datascan("basic_quality", new()
{
Location = "us-central1",
DataScanId = "dataquality-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
Name = "rule1",
Description = "rule 1 for validity dimension",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataquality-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: &dataplex.DatascanExecutionSpecTriggerOnDemandArgs{},
},
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
Name: pulumi.String("rule1"),
Description: pulumi.String("rule 1 for validity dimension"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicQuality = new Datascan("basicQuality", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataquality-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand(DatascanExecutionSpecTriggerOnDemandArgs.builder()
.build())
.build())
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rules(DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.name("rule1")
.description("rule 1 for validity dimension")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
basicQuality:
type: gcp:dataplex:Datascan
name: basic_quality
properties:
location: us-central1
dataScanId: dataquality-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataQualitySpec:
rules:
- dimension: VALIDITY
name: rule1
description: rule 1 for validity dimension
tableConditionExpectation:
sqlExpression: COUNT(*) 0
project: my-project-name
Dataplex Datascan Full Quality
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const fullQuality = new gcp.dataplex.Datascan("full_quality", {
location: "us-central1",
displayName: "Full Datascan Quality",
dataScanId: "dataquality-full",
description: "Example resource - Full Datascan Quality",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
field: "modified_date",
},
dataQualitySpec: {
samplingPercent: 5,
rowFilter: "station_id 1000",
postScanActions: {
notificationReport: {
recipients: {
emails: ["jane.doe@example.com"],
},
scoreThresholdTrigger: {
scoreThreshold: 86,
},
},
},
rules: [
{
column: "address",
dimension: "VALIDITY",
threshold: 0.99,
nonNullExpectation: {},
},
{
column: "council_district",
dimension: "VALIDITY",
ignoreNull: true,
threshold: 0.9,
rangeExpectation: {
minValue: "1",
maxValue: "10",
strictMinEnabled: true,
strictMaxEnabled: false,
},
},
{
column: "power_type",
dimension: "VALIDITY",
ignoreNull: false,
regexExpectation: {
regex: ".*solar.*",
},
},
{
column: "property_type",
dimension: "VALIDITY",
ignoreNull: false,
setExpectation: {
values: [
"sidewalk",
"parkland",
],
},
},
{
column: "address",
dimension: "UNIQUENESS",
uniquenessExpectation: {},
},
{
column: "number_of_docks",
dimension: "VALIDITY",
statisticRangeExpectation: {
statistic: "MEAN",
minValue: "5",
maxValue: "15",
strictMinEnabled: true,
strictMaxEnabled: true,
},
},
{
column: "footprint_length",
dimension: "VALIDITY",
rowConditionExpectation: {
sqlExpression: "footprint_length 0 AND footprint_length <= 10",
},
},
{
dimension: "VALIDITY",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
},
{
dimension: "VALIDITY",
sqlAssertion: {
sqlStatement: "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
full_quality = gcp.dataplex.Datascan("full_quality",
location="us-central1",
display_name="Full Datascan Quality",
data_scan_id="dataquality-full",
description="Example resource - Full Datascan Quality",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
"field": "modified_date",
},
data_quality_spec={
"sampling_percent": 5,
"row_filter": "station_id 1000",
"post_scan_actions": {
"notification_report": {
"recipients": {
"emails": ["jane.doe@example.com"],
},
"score_threshold_trigger": {
"score_threshold": 86,
},
},
},
"rules": [
{
"column": "address",
"dimension": "VALIDITY",
"threshold": 0.99,
"non_null_expectation": {},
},
{
"column": "council_district",
"dimension": "VALIDITY",
"ignore_null": True,
"threshold": 0.9,
"range_expectation": {
"min_value": "1",
"max_value": "10",
"strict_min_enabled": True,
"strict_max_enabled": False,
},
},
{
"column": "power_type",
"dimension": "VALIDITY",
"ignore_null": False,
"regex_expectation": {
"regex": ".*solar.*",
},
},
{
"column": "property_type",
"dimension": "VALIDITY",
"ignore_null": False,
"set_expectation": {
"values": [
"sidewalk",
"parkland",
],
},
},
{
"column": "address",
"dimension": "UNIQUENESS",
"uniqueness_expectation": {},
},
{
"column": "number_of_docks",
"dimension": "VALIDITY",
"statistic_range_expectation": {
"statistic": "MEAN",
"min_value": "5",
"max_value": "15",
"strict_min_enabled": True,
"strict_max_enabled": True,
},
},
{
"column": "footprint_length",
"dimension": "VALIDITY",
"row_condition_expectation": {
"sql_expression": "footprint_length 0 AND footprint_length <= 10",
},
},
{
"dimension": "VALIDITY",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
},
{
"dimension": "VALIDITY",
"sql_assertion": {
"sql_statement": "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var fullQuality = new Gcp.DataPlex.Datascan("full_quality", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Quality",
DataScanId = "dataquality-full",
Description = "Example resource - Full Datascan Quality",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
Field = "modified_date",
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
SamplingPercent = 5,
RowFilter = "station_id 1000",
PostScanActions = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecPostScanActionsArgs
{
NotificationReport = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecPostScanActionsNotificationReportArgs
{
Recipients = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecPostScanActionsNotificationReportRecipientsArgs
{
Emails = new[]
{
"jane.doe@example.com",
},
},
ScoreThresholdTrigger = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecPostScanActionsNotificationReportScoreThresholdTriggerArgs
{
ScoreThreshold = 86,
},
},
},
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "VALIDITY",
Threshold = 0.99,
NonNullExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "council_district",
Dimension = "VALIDITY",
IgnoreNull = true,
Threshold = 0.9,
RangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRangeExpectationArgs
{
MinValue = "1",
MaxValue = "10",
StrictMinEnabled = true,
StrictMaxEnabled = false,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "power_type",
Dimension = "VALIDITY",
IgnoreNull = false,
RegexExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRegexExpectationArgs
{
Regex = ".*solar.*",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "property_type",
Dimension = "VALIDITY",
IgnoreNull = false,
SetExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSetExpectationArgs
{
Values = new[]
{
"sidewalk",
"parkland",
},
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "UNIQUENESS",
UniquenessExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "number_of_docks",
Dimension = "VALIDITY",
StatisticRangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs
{
Statistic = "MEAN",
MinValue = "5",
MaxValue = "15",
StrictMinEnabled = true,
StrictMaxEnabled = true,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "footprint_length",
Dimension = "VALIDITY",
RowConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRowConditionExpectationArgs
{
SqlExpression = "footprint_length 0 AND footprint_length <= 10",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
SqlAssertion = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSqlAssertionArgs
{
SqlStatement = "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "full_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Quality"),
DataScanId: pulumi.String("dataquality-full"),
Description: pulumi.String("Example resource - Full Datascan Quality"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
Field: pulumi.String("modified_date"),
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
SamplingPercent: pulumi.Float64(5),
RowFilter: pulumi.String("station_id 1000"),
PostScanActions: &dataplex.DatascanDataQualitySpecPostScanActionsArgs{
NotificationReport: &dataplex.DatascanDataQualitySpecPostScanActionsNotificationReportArgs{
Recipients: &dataplex.DatascanDataQualitySpecPostScanActionsNotificationReportRecipientsArgs{
Emails: pulumi.StringArray{
pulumi.String("jane.doe@example.com"),
},
},
ScoreThresholdTrigger: &dataplex.DatascanDataQualitySpecPostScanActionsNotificationReportScoreThresholdTriggerArgs{
ScoreThreshold: pulumi.Float64(86),
},
},
},
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("VALIDITY"),
Threshold: pulumi.Float64(0.99),
NonNullExpectation: &dataplex.DatascanDataQualitySpecRuleNonNullExpectationArgs{},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("council_district"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(true),
Threshold: pulumi.Float64(0.9),
RangeExpectation: &dataplex.DatascanDataQualitySpecRuleRangeExpectationArgs{
MinValue: pulumi.String("1"),
MaxValue: pulumi.String("10"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(false),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("power_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
RegexExpectation: &dataplex.DatascanDataQualitySpecRuleRegexExpectationArgs{
Regex: pulumi.String(".*solar.*"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("property_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
SetExpectation: &dataplex.DatascanDataQualitySpecRuleSetExpectationArgs{
Values: pulumi.StringArray{
pulumi.String("sidewalk"),
pulumi.String("parkland"),
},
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("UNIQUENESS"),
UniquenessExpectation: &dataplex.DatascanDataQualitySpecRuleUniquenessExpectationArgs{},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("number_of_docks"),
Dimension: pulumi.String("VALIDITY"),
StatisticRangeExpectation: &dataplex.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs{
Statistic: pulumi.String("MEAN"),
MinValue: pulumi.String("5"),
MaxValue: pulumi.String("15"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(true),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("footprint_length"),
Dimension: pulumi.String("VALIDITY"),
RowConditionExpectation: &dataplex.DatascanDataQualitySpecRuleRowConditionExpectationArgs{
SqlExpression: pulumi.String("footprint_length 0 AND footprint_length <= 10"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
SqlAssertion: &dataplex.DatascanDataQualitySpecRuleSqlAssertionArgs{
SqlStatement: pulumi.String("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecPostScanActionsNotificationReportArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecPostScanActionsNotificationReportRecipientsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecPostScanActionsNotificationReportScoreThresholdTriggerArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var fullQuality = new Datascan("fullQuality", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Quality")
.dataScanId("dataquality-full")
.description("Example resource - Full Datascan Quality")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.field("modified_date")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.samplingPercent(5.0)
.rowFilter("station_id 1000")
.postScanActions(DatascanDataQualitySpecPostScanActionsArgs.builder()
.notificationReport(DatascanDataQualitySpecPostScanActionsNotificationReportArgs.builder()
.recipients(DatascanDataQualitySpecPostScanActionsNotificationReportRecipientsArgs.builder()
.emails("jane.doe@example.com")
.build())
.scoreThresholdTrigger(DatascanDataQualitySpecPostScanActionsNotificationReportScoreThresholdTriggerArgs.builder()
.scoreThreshold(86.0)
.build())
.build())
.build())
.rules(
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("VALIDITY")
.threshold(0.99)
.nonNullExpectation(DatascanDataQualitySpecRuleNonNullExpectationArgs.builder()
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("council_district")
.dimension("VALIDITY")
.ignoreNull(true)
.threshold(0.9)
.rangeExpectation(DatascanDataQualitySpecRuleRangeExpectationArgs.builder()
.minValue("1")
.maxValue("10")
.strictMinEnabled(true)
.strictMaxEnabled(false)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("power_type")
.dimension("VALIDITY")
.ignoreNull(false)
.regexExpectation(DatascanDataQualitySpecRuleRegexExpectationArgs.builder()
.regex(".*solar.*")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("property_type")
.dimension("VALIDITY")
.ignoreNull(false)
.setExpectation(DatascanDataQualitySpecRuleSetExpectationArgs.builder()
.values(
"sidewalk",
"parkland")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("UNIQUENESS")
.uniquenessExpectation(DatascanDataQualitySpecRuleUniquenessExpectationArgs.builder()
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("number_of_docks")
.dimension("VALIDITY")
.statisticRangeExpectation(DatascanDataQualitySpecRuleStatisticRangeExpectationArgs.builder()
.statistic("MEAN")
.minValue("5")
.maxValue("15")
.strictMinEnabled(true)
.strictMaxEnabled(true)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("footprint_length")
.dimension("VALIDITY")
.rowConditionExpectation(DatascanDataQualitySpecRuleRowConditionExpectationArgs.builder()
.sqlExpression("footprint_length 0 AND footprint_length <= 10")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.sqlAssertion(DatascanDataQualitySpecRuleSqlAssertionArgs.builder()
.sqlStatement("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
fullQuality:
type: gcp:dataplex:Datascan
name: full_quality
properties:
location: us-central1
displayName: Full Datascan Quality
dataScanId: dataquality-full
description: Example resource - Full Datascan Quality
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
field: modified_date
dataQualitySpec:
samplingPercent: 5
rowFilter: station_id 1000
postScanActions:
notificationReport:
recipients:
emails:
- jane.doe@example.com
scoreThresholdTrigger:
scoreThreshold: 86
rules:
- column: address
dimension: VALIDITY
threshold: 0.99
nonNullExpectation: {}
- column: council_district
dimension: VALIDITY
ignoreNull: true
threshold: 0.9
rangeExpectation:
minValue: 1
maxValue: 10
strictMinEnabled: true
strictMaxEnabled: false
- column: power_type
dimension: VALIDITY
ignoreNull: false
regexExpectation:
regex: .*solar.*
- column: property_type
dimension: VALIDITY
ignoreNull: false
setExpectation:
values:
- sidewalk
- parkland
- column: address
dimension: UNIQUENESS
uniquenessExpectation: {}
- column: number_of_docks
dimension: VALIDITY
statisticRangeExpectation:
statistic: MEAN
minValue: 5
maxValue: 15
strictMinEnabled: true
strictMaxEnabled: true
- column: footprint_length
dimension: VALIDITY
rowConditionExpectation:
sqlExpression: footprint_length 0 AND footprint_length <= 10
- dimension: VALIDITY
tableConditionExpectation:
sqlExpression: COUNT(*) 0
- dimension: VALIDITY
sqlAssertion:
sqlStatement: select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null
project: my-project-name
Import
Datascan can be imported using any of these accepted formats:
projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
{{project}}/{{location}}/{{data_scan_id}}
{{location}}/{{data_scan_id}}
{{data_scan_id}}
When using thepulumi import
command, Datascan can be imported using one of the formats above. For example:
$ pulumi import gcp:dataplex/datascan:Datascan default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{project}}/{{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{data_scan_id}}
Properties
The time when the scan was created.
The data source for DataScan. Structure is documented below.
DataProfileScan related setting.
DataQualityScan related setting.
DataScan identifier. Must contain only lowercase letters, numbers and hyphens. Must start with a letter. Must end with a number or a letter.
Description of the scan.
User friendly display name.
All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Pulumi, other clients and services.
DataScan execution settings. Structure is documented below.
Status of the data scan execution. Structure is documented below.
The combination of labels configured directly on the resource and default labels configured on the provider.
The time when the scan was last updated.