Datascan

class Datascan : KotlinCustomResource

Represents a user-visible job which provides the insights for the related data source. To get more information about Datascan, see:

Example Usage

Dataplex Datascan Basic Profile

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicProfile = new gcp.dataplex.Datascan("basic_profile", {
location: "us-central1",
dataScanId: "dataprofile-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataProfileSpec: {},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_profile = gcp.dataplex.Datascan("basic_profile",
location="us-central1",
data_scan_id="dataprofile-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_profile_spec={},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicProfile = new Gcp.DataPlex.Datascan("basic_profile", new()
{
Location = "us-central1",
DataScanId = "dataprofile-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataProfileSpec = null,
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataprofile-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: nil,
},
},
DataProfileSpec: nil,
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicProfile = new Datascan("basicProfile", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataprofile-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.dataProfileSpec()
.project("my-project-name")
.build());
}
}
resources:
basicProfile:
type: gcp:dataplex:Datascan
name: basic_profile
properties:
location: us-central1
dataScanId: dataprofile-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataProfileSpec: {}
project: my-project-name

Dataplex Datascan Full Profile

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const source = new gcp.bigquery.Dataset("source", {
datasetId: "dataplex_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
deleteContentsOnDestroy: true,
});
const fullProfile = new gcp.dataplex.Datascan("full_profile", {
location: "us-central1",
displayName: "Full Datascan Profile",
dataScanId: "dataprofile-full",
description: "Example resource - Full Datascan Profile",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
},
dataProfileSpec: {
samplingPercent: 80,
rowFilter: "word_count 10",
includeFields: {
fieldNames: ["word_count"],
},
excludeFields: {
fieldNames: ["property_type"],
},
postScanActions: {
bigqueryExport: {
resultsTable: "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project: "my-project-name",
}, {
dependsOn: [source],
});
import pulumi
import pulumi_gcp as gcp
source = gcp.bigquery.Dataset("source",
dataset_id="dataplex_dataset",
friendly_name="test",
description="This is a test description",
location="US",
delete_contents_on_destroy=True)
full_profile = gcp.dataplex.Datascan("full_profile",
location="us-central1",
display_name="Full Datascan Profile",
data_scan_id="dataprofile-full",
description="Example resource - Full Datascan Profile",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
},
data_profile_spec={
"sampling_percent": 80,
"row_filter": "word_count 10",
"include_fields": {
"field_names": ["word_count"],
},
"exclude_fields": {
"field_names": ["property_type"],
},
"post_scan_actions": {
"bigquery_export": {
"results_table": "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project="my-project-name",
opts = pulumi.ResourceOptions(depends_on=[source]))
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var source = new Gcp.BigQuery.Dataset("source", new()
{
DatasetId = "dataplex_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DeleteContentsOnDestroy = true,
});
var fullProfile = new Gcp.DataPlex.Datascan("full_profile", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Profile",
DataScanId = "dataprofile-full",
Description = "Example resource - Full Datascan Profile",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
},
DataProfileSpec = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecArgs
{
SamplingPercent = 80,
RowFilter = "word_count 10",
IncludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecIncludeFieldsArgs
{
FieldNames = new[]
{
"word_count",
},
},
ExcludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecExcludeFieldsArgs
{
FieldNames = new[]
{
"property_type",
},
},
PostScanActions = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsArgs
{
BigqueryExport = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs
{
ResultsTable = "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
Project = "my-project-name",
}, new CustomResourceOptions
{
DependsOn =
{
source,
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
source, err := bigquery.NewDataset(ctx, "source", &bigquery.DatasetArgs{
DatasetId: pulumi.String("dataplex_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DeleteContentsOnDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = dataplex.NewDatascan(ctx, "full_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Profile"),
DataScanId: pulumi.String("dataprofile-full"),
Description: pulumi.String("Example resource - Full Datascan Profile"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
},
DataProfileSpec: &dataplex.DatascanDataProfileSpecArgs{
SamplingPercent: pulumi.Float64(80),
RowFilter: pulumi.String("word_count 10"),
IncludeFields: &dataplex.DatascanDataProfileSpecIncludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("word_count"),
},
},
ExcludeFields: &dataplex.DatascanDataProfileSpecExcludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("property_type"),
},
},
PostScanActions: &dataplex.DatascanDataProfileSpecPostScanActionsArgs{
BigqueryExport: &dataplex.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs{
ResultsTable: pulumi.String("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export"),
},
},
},
Project: pulumi.String("my-project-name"),
}, pulumi.DependsOn([]pulumi.Resource{
source,
}))
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecIncludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecExcludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var source = new Dataset("source", DatasetArgs.builder()
.datasetId("dataplex_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.deleteContentsOnDestroy(true)
.build());
var fullProfile = new Datascan("fullProfile", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Profile")
.dataScanId("dataprofile-full")
.description("Example resource - Full Datascan Profile")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.samplingPercent(80)
.rowFilter("word_count 10")
.includeFields(DatascanDataProfileSpecIncludeFieldsArgs.builder()
.fieldNames("word_count")
.build())
.excludeFields(DatascanDataProfileSpecExcludeFieldsArgs.builder()
.fieldNames("property_type")
.build())
.postScanActions(DatascanDataProfileSpecPostScanActionsArgs.builder()
.bigqueryExport(DatascanDataProfileSpecPostScanActionsBigqueryExportArgs.builder()
.resultsTable("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export")
.build())
.build())
.build())
.project("my-project-name")
.build(), CustomResourceOptions.builder()
.dependsOn(source)
.build());
}
}
resources:
fullProfile:
type: gcp:dataplex:Datascan
name: full_profile
properties:
location: us-central1
displayName: Full Datascan Profile
dataScanId: dataprofile-full
description: Example resource - Full Datascan Profile
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
dataProfileSpec:
samplingPercent: 80
rowFilter: word_count 10
includeFields:
fieldNames:
- word_count
excludeFields:
fieldNames:
- property_type
postScanActions:
bigqueryExport:
resultsTable: //bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export
project: my-project-name
options:
dependson:
- ${source}
source:
type: gcp:bigquery:Dataset
properties:
datasetId: dataplex_dataset
friendlyName: test
description: This is a test description
location: US
deleteContentsOnDestroy: true

Dataplex Datascan Basic Quality

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicQuality = new gcp.dataplex.Datascan("basic_quality", {
location: "us-central1",
dataScanId: "dataquality-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataQualitySpec: {
rules: [{
dimension: "VALIDITY",
name: "rule1",
description: "rule 1 for validity dimension",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
}],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_quality = gcp.dataplex.Datascan("basic_quality",
location="us-central1",
data_scan_id="dataquality-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_quality_spec={
"rules": [{
"dimension": "VALIDITY",
"name": "rule1",
"description": "rule 1 for validity dimension",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
}],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicQuality = new Gcp.DataPlex.Datascan("basic_quality", new()
{
Location = "us-central1",
DataScanId = "dataquality-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
Name = "rule1",
Description = "rule 1 for validity dimension",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataquality-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: nil,
},
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
Name: pulumi.String("rule1"),
Description: pulumi.String("rule 1 for validity dimension"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicQuality = new Datascan("basicQuality", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataquality-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rules(DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.name("rule1")
.description("rule 1 for validity dimension")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
basicQuality:
type: gcp:dataplex:Datascan
name: basic_quality
properties:
location: us-central1
dataScanId: dataquality-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataQualitySpec:
rules:
- dimension: VALIDITY
name: rule1
description: rule 1 for validity dimension
tableConditionExpectation:
sqlExpression: COUNT(*) 0
project: my-project-name

Dataplex Datascan Full Quality

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const fullQuality = new gcp.dataplex.Datascan("full_quality", {
location: "us-central1",
displayName: "Full Datascan Quality",
dataScanId: "dataquality-full",
description: "Example resource - Full Datascan Quality",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
field: "modified_date",
},
dataQualitySpec: {
samplingPercent: 5,
rowFilter: "station_id 1000",
rules: [
{
column: "address",
dimension: "VALIDITY",
threshold: 0.99,
nonNullExpectation: {},
},
{
column: "council_district",
dimension: "VALIDITY",
ignoreNull: true,
threshold: 0.9,
rangeExpectation: {
minValue: "1",
maxValue: "10",
strictMinEnabled: true,
strictMaxEnabled: false,
},
},
{
column: "power_type",
dimension: "VALIDITY",
ignoreNull: false,
regexExpectation: {
regex: ".*solar.*",
},
},
{
column: "property_type",
dimension: "VALIDITY",
ignoreNull: false,
setExpectation: {
values: [
"sidewalk",
"parkland",
],
},
},
{
column: "address",
dimension: "UNIQUENESS",
uniquenessExpectation: {},
},
{
column: "number_of_docks",
dimension: "VALIDITY",
statisticRangeExpectation: {
statistic: "MEAN",
minValue: "5",
maxValue: "15",
strictMinEnabled: true,
strictMaxEnabled: true,
},
},
{
column: "footprint_length",
dimension: "VALIDITY",
rowConditionExpectation: {
sqlExpression: "footprint_length 0 AND footprint_length <= 10",
},
},
{
dimension: "VALIDITY",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
},
{
dimension: "VALIDITY",
sqlAssertion: {
sqlStatement: "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
full_quality = gcp.dataplex.Datascan("full_quality",
location="us-central1",
display_name="Full Datascan Quality",
data_scan_id="dataquality-full",
description="Example resource - Full Datascan Quality",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
"field": "modified_date",
},
data_quality_spec={
"sampling_percent": 5,
"row_filter": "station_id 1000",
"rules": [
{
"column": "address",
"dimension": "VALIDITY",
"threshold": 0.99,
"non_null_expectation": {},
},
{
"column": "council_district",
"dimension": "VALIDITY",
"ignore_null": True,
"threshold": 0.9,
"range_expectation": {
"min_value": "1",
"max_value": "10",
"strict_min_enabled": True,
"strict_max_enabled": False,
},
},
{
"column": "power_type",
"dimension": "VALIDITY",
"ignore_null": False,
"regex_expectation": {
"regex": ".*solar.*",
},
},
{
"column": "property_type",
"dimension": "VALIDITY",
"ignore_null": False,
"set_expectation": {
"values": [
"sidewalk",
"parkland",
],
},
},
{
"column": "address",
"dimension": "UNIQUENESS",
"uniqueness_expectation": {},
},
{
"column": "number_of_docks",
"dimension": "VALIDITY",
"statistic_range_expectation": {
"statistic": "MEAN",
"min_value": "5",
"max_value": "15",
"strict_min_enabled": True,
"strict_max_enabled": True,
},
},
{
"column": "footprint_length",
"dimension": "VALIDITY",
"row_condition_expectation": {
"sql_expression": "footprint_length 0 AND footprint_length <= 10",
},
},
{
"dimension": "VALIDITY",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
},
{
"dimension": "VALIDITY",
"sql_assertion": {
"sql_statement": "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var fullQuality = new Gcp.DataPlex.Datascan("full_quality", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Quality",
DataScanId = "dataquality-full",
Description = "Example resource - Full Datascan Quality",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
Field = "modified_date",
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
SamplingPercent = 5,
RowFilter = "station_id 1000",
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "VALIDITY",
Threshold = 0.99,
NonNullExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "council_district",
Dimension = "VALIDITY",
IgnoreNull = true,
Threshold = 0.9,
RangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRangeExpectationArgs
{
MinValue = "1",
MaxValue = "10",
StrictMinEnabled = true,
StrictMaxEnabled = false,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "power_type",
Dimension = "VALIDITY",
IgnoreNull = false,
RegexExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRegexExpectationArgs
{
Regex = ".*solar.*",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "property_type",
Dimension = "VALIDITY",
IgnoreNull = false,
SetExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSetExpectationArgs
{
Values = new[]
{
"sidewalk",
"parkland",
},
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "UNIQUENESS",
UniquenessExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "number_of_docks",
Dimension = "VALIDITY",
StatisticRangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs
{
Statistic = "MEAN",
MinValue = "5",
MaxValue = "15",
StrictMinEnabled = true,
StrictMaxEnabled = true,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "footprint_length",
Dimension = "VALIDITY",
RowConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRowConditionExpectationArgs
{
SqlExpression = "footprint_length 0 AND footprint_length <= 10",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
SqlAssertion = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSqlAssertionArgs
{
SqlStatement = "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "full_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Quality"),
DataScanId: pulumi.String("dataquality-full"),
Description: pulumi.String("Example resource - Full Datascan Quality"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
Field: pulumi.String("modified_date"),
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
SamplingPercent: pulumi.Float64(5),
RowFilter: pulumi.String("station_id 1000"),
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("VALIDITY"),
Threshold: pulumi.Float64(0.99),
NonNullExpectation: nil,
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("council_district"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(true),
Threshold: pulumi.Float64(0.9),
RangeExpectation: &dataplex.DatascanDataQualitySpecRuleRangeExpectationArgs{
MinValue: pulumi.String("1"),
MaxValue: pulumi.String("10"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(false),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("power_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
RegexExpectation: &dataplex.DatascanDataQualitySpecRuleRegexExpectationArgs{
Regex: pulumi.String(".*solar.*"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("property_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
SetExpectation: &dataplex.DatascanDataQualitySpecRuleSetExpectationArgs{
Values: pulumi.StringArray{
pulumi.String("sidewalk"),
pulumi.String("parkland"),
},
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("UNIQUENESS"),
UniquenessExpectation: nil,
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("number_of_docks"),
Dimension: pulumi.String("VALIDITY"),
StatisticRangeExpectation: &dataplex.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs{
Statistic: pulumi.String("MEAN"),
MinValue: pulumi.String("5"),
MaxValue: pulumi.String("15"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(true),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("footprint_length"),
Dimension: pulumi.String("VALIDITY"),
RowConditionExpectation: &dataplex.DatascanDataQualitySpecRuleRowConditionExpectationArgs{
SqlExpression: pulumi.String("footprint_length 0 AND footprint_length <= 10"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
SqlAssertion: &dataplex.DatascanDataQualitySpecRuleSqlAssertionArgs{
SqlStatement: pulumi.String("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var fullQuality = new Datascan("fullQuality", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Quality")
.dataScanId("dataquality-full")
.description("Example resource - Full Datascan Quality")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.field("modified_date")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.samplingPercent(5)
.rowFilter("station_id 1000")
.rules(
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("VALIDITY")
.threshold(0.99)
.nonNullExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("council_district")
.dimension("VALIDITY")
.ignoreNull(true)
.threshold(0.9)
.rangeExpectation(DatascanDataQualitySpecRuleRangeExpectationArgs.builder()
.minValue(1)
.maxValue(10)
.strictMinEnabled(true)
.strictMaxEnabled(false)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("power_type")
.dimension("VALIDITY")
.ignoreNull(false)
.regexExpectation(DatascanDataQualitySpecRuleRegexExpectationArgs.builder()
.regex(".*solar.*")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("property_type")
.dimension("VALIDITY")
.ignoreNull(false)
.setExpectation(DatascanDataQualitySpecRuleSetExpectationArgs.builder()
.values(
"sidewalk",
"parkland")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("UNIQUENESS")
.uniquenessExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("number_of_docks")
.dimension("VALIDITY")
.statisticRangeExpectation(DatascanDataQualitySpecRuleStatisticRangeExpectationArgs.builder()
.statistic("MEAN")
.minValue(5)
.maxValue(15)
.strictMinEnabled(true)
.strictMaxEnabled(true)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("footprint_length")
.dimension("VALIDITY")
.rowConditionExpectation(DatascanDataQualitySpecRuleRowConditionExpectationArgs.builder()
.sqlExpression("footprint_length 0 AND footprint_length <= 10")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.sqlAssertion(DatascanDataQualitySpecRuleSqlAssertionArgs.builder()
.sqlStatement("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
fullQuality:
type: gcp:dataplex:Datascan
name: full_quality
properties:
location: us-central1
displayName: Full Datascan Quality
dataScanId: dataquality-full
description: Example resource - Full Datascan Quality
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
field: modified_date
dataQualitySpec:
samplingPercent: 5
rowFilter: station_id 1000
rules:
- column: address
dimension: VALIDITY
threshold: 0.99
nonNullExpectation: {}
- column: council_district
dimension: VALIDITY
ignoreNull: true
threshold: 0.9
rangeExpectation:
minValue: 1
maxValue: 10
strictMinEnabled: true
strictMaxEnabled: false
- column: power_type
dimension: VALIDITY
ignoreNull: false
regexExpectation:
regex: .*solar.*
- column: property_type
dimension: VALIDITY
ignoreNull: false
setExpectation:
values:
- sidewalk
- parkland
- column: address
dimension: UNIQUENESS
uniquenessExpectation: {}
- column: number_of_docks
dimension: VALIDITY
statisticRangeExpectation:
statistic: MEAN
minValue: 5
maxValue: 15
strictMinEnabled: true
strictMaxEnabled: true
- column: footprint_length
dimension: VALIDITY
rowConditionExpectation:
sqlExpression: footprint_length 0 AND footprint_length <= 10
- dimension: VALIDITY
tableConditionExpectation:
sqlExpression: COUNT(*) 0
- dimension: VALIDITY
sqlAssertion:
sqlStatement: select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null
project: my-project-name

Import

Datascan can be imported using any of these accepted formats:

  • projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}

  • {{project}}/{{location}}/{{data_scan_id}}

  • {{location}}/{{data_scan_id}}

  • {{data_scan_id}} When using the pulumi import command, Datascan can be imported using one of the formats above. For example:

$ pulumi import gcp:dataplex/datascan:Datascan default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{project}}/{{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{data_scan_id}}

Properties

Link copied to clipboard
val createTime: Output<String>

The time when the scan was created.

Link copied to clipboard
val data: Output<DatascanData>

The data source for DataScan. Structure is documented below.

Link copied to clipboard

DataProfileScan related setting.

Link copied to clipboard

DataQualityScan related setting.

Link copied to clipboard
val dataScanId: Output<String>

DataScan identifier. Must contain only lowercase letters, numbers and hyphens. Must start with a letter. Must end with a number or a letter.

Link copied to clipboard
val description: Output<String>?

Description of the scan.

Link copied to clipboard
val displayName: Output<String>?

User friendly display name.

Link copied to clipboard

All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Pulumi, other clients and services.

Link copied to clipboard

DataScan execution settings. Structure is documented below.

Link copied to clipboard

Status of the data scan execution. Structure is documented below.

Link copied to clipboard
val id: Output<String>
Link copied to clipboard
val labels: Output<Map<String, String>>?

User-defined labels for the scan. A list of key->value pairs. Note: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field 'effective_labels' for all of the labels present on the resource.

Link copied to clipboard
val location: Output<String>

The location where the data scan should reside.

Link copied to clipboard
val name: Output<String>

The relative resource name of the scan, of the form: projects/{project}/locations/{locationId}/dataScans/{datascan_id}, where project refers to a project_id or project_number and locationId refers to a GCP region.

Link copied to clipboard
val project: Output<String>
Link copied to clipboard
val pulumiChildResources: Set<KotlinResource>
Link copied to clipboard
val pulumiLabels: Output<Map<String, String>>

The combination of labels configured directly on the resource and default labels configured on the provider.

Link copied to clipboard
Link copied to clipboard
Link copied to clipboard
val state: Output<String>

Current state of the DataScan.

Link copied to clipboard
val type: Output<String>

The type of DataScan.

Link copied to clipboard
val uid: Output<String>

System generated globally unique ID for the scan. This ID will be different if the scan is deleted and re-created with the same name.

Link copied to clipboard
val updateTime: Output<String>

The time when the scan was last updated.

Link copied to clipboard
val urn: Output<String>