DatascanArgs

data class DatascanArgs(val data: Output<DatascanDataArgs>? = null, val dataProfileSpec: Output<DatascanDataProfileSpecArgs>? = null, val dataQualitySpec: Output<DatascanDataQualitySpecArgs>? = null, val dataScanId: Output<String>? = null, val description: Output<String>? = null, val displayName: Output<String>? = null, val executionSpec: Output<DatascanExecutionSpecArgs>? = null, val labels: Output<Map<String, String>>? = null, val location: Output<String>? = null, val project: Output<String>? = null) : ConvertibleToJava<DatascanArgs>

Represents a user-visible job which provides the insights for the related data source. To get more information about Datascan, see:

Example Usage

Dataplex Datascan Basic Profile

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicProfile = new gcp.dataplex.Datascan("basic_profile", {
location: "us-central1",
dataScanId: "dataprofile-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataProfileSpec: {},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_profile = gcp.dataplex.Datascan("basic_profile",
location="us-central1",
data_scan_id="dataprofile-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_profile_spec={},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicProfile = new Gcp.DataPlex.Datascan("basic_profile", new()
{
Location = "us-central1",
DataScanId = "dataprofile-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataProfileSpec = null,
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataprofile-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: nil,
},
},
DataProfileSpec: nil,
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicProfile = new Datascan("basicProfile", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataprofile-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.dataProfileSpec()
.project("my-project-name")
.build());
}
}
resources:
basicProfile:
type: gcp:dataplex:Datascan
name: basic_profile
properties:
location: us-central1
dataScanId: dataprofile-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataProfileSpec: {}
project: my-project-name

Dataplex Datascan Full Profile

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const source = new gcp.bigquery.Dataset("source", {
datasetId: "dataplex_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
deleteContentsOnDestroy: true,
});
const fullProfile = new gcp.dataplex.Datascan("full_profile", {
location: "us-central1",
displayName: "Full Datascan Profile",
dataScanId: "dataprofile-full",
description: "Example resource - Full Datascan Profile",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
},
dataProfileSpec: {
samplingPercent: 80,
rowFilter: "word_count 10",
includeFields: {
fieldNames: ["word_count"],
},
excludeFields: {
fieldNames: ["property_type"],
},
postScanActions: {
bigqueryExport: {
resultsTable: "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project: "my-project-name",
}, {
dependsOn: [source],
});
import pulumi
import pulumi_gcp as gcp
source = gcp.bigquery.Dataset("source",
dataset_id="dataplex_dataset",
friendly_name="test",
description="This is a test description",
location="US",
delete_contents_on_destroy=True)
full_profile = gcp.dataplex.Datascan("full_profile",
location="us-central1",
display_name="Full Datascan Profile",
data_scan_id="dataprofile-full",
description="Example resource - Full Datascan Profile",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
},
data_profile_spec={
"sampling_percent": 80,
"row_filter": "word_count 10",
"include_fields": {
"field_names": ["word_count"],
},
"exclude_fields": {
"field_names": ["property_type"],
},
"post_scan_actions": {
"bigquery_export": {
"results_table": "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
project="my-project-name",
opts = pulumi.ResourceOptions(depends_on=[source]))
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var source = new Gcp.BigQuery.Dataset("source", new()
{
DatasetId = "dataplex_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DeleteContentsOnDestroy = true,
});
var fullProfile = new Gcp.DataPlex.Datascan("full_profile", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Profile",
DataScanId = "dataprofile-full",
Description = "Example resource - Full Datascan Profile",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
},
DataProfileSpec = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecArgs
{
SamplingPercent = 80,
RowFilter = "word_count 10",
IncludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecIncludeFieldsArgs
{
FieldNames = new[]
{
"word_count",
},
},
ExcludeFields = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecExcludeFieldsArgs
{
FieldNames = new[]
{
"property_type",
},
},
PostScanActions = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsArgs
{
BigqueryExport = new Gcp.DataPlex.Inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs
{
ResultsTable = "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export",
},
},
},
Project = "my-project-name",
}, new CustomResourceOptions
{
DependsOn =
{
source,
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
source, err := bigquery.NewDataset(ctx, "source", &bigquery.DatasetArgs{
DatasetId: pulumi.String("dataplex_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DeleteContentsOnDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = dataplex.NewDatascan(ctx, "full_profile", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Profile"),
DataScanId: pulumi.String("dataprofile-full"),
Description: pulumi.String("Example resource - Full Datascan Profile"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
},
DataProfileSpec: &dataplex.DatascanDataProfileSpecArgs{
SamplingPercent: pulumi.Float64(80),
RowFilter: pulumi.String("word_count 10"),
IncludeFields: &dataplex.DatascanDataProfileSpecIncludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("word_count"),
},
},
ExcludeFields: &dataplex.DatascanDataProfileSpecExcludeFieldsArgs{
FieldNames: pulumi.StringArray{
pulumi.String("property_type"),
},
},
PostScanActions: &dataplex.DatascanDataProfileSpecPostScanActionsArgs{
BigqueryExport: &dataplex.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs{
ResultsTable: pulumi.String("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export"),
},
},
},
Project: pulumi.String("my-project-name"),
}, pulumi.DependsOn([]pulumi.Resource{
source,
}))
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecIncludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecExcludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var source = new Dataset("source", DatasetArgs.builder()
.datasetId("dataplex_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.deleteContentsOnDestroy(true)
.build());
var fullProfile = new Datascan("fullProfile", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Profile")
.dataScanId("dataprofile-full")
.description("Example resource - Full Datascan Profile")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.samplingPercent(80)
.rowFilter("word_count 10")
.includeFields(DatascanDataProfileSpecIncludeFieldsArgs.builder()
.fieldNames("word_count")
.build())
.excludeFields(DatascanDataProfileSpecExcludeFieldsArgs.builder()
.fieldNames("property_type")
.build())
.postScanActions(DatascanDataProfileSpecPostScanActionsArgs.builder()
.bigqueryExport(DatascanDataProfileSpecPostScanActionsBigqueryExportArgs.builder()
.resultsTable("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export")
.build())
.build())
.build())
.project("my-project-name")
.build(), CustomResourceOptions.builder()
.dependsOn(source)
.build());
}
}
resources:
fullProfile:
type: gcp:dataplex:Datascan
name: full_profile
properties:
location: us-central1
displayName: Full Datascan Profile
dataScanId: dataprofile-full
description: Example resource - Full Datascan Profile
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
dataProfileSpec:
samplingPercent: 80
rowFilter: word_count 10
includeFields:
fieldNames:
- word_count
excludeFields:
fieldNames:
- property_type
postScanActions:
bigqueryExport:
resultsTable: //bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export
project: my-project-name
options:
dependson:
- ${source}
source:
type: gcp:bigquery:Dataset
properties:
datasetId: dataplex_dataset
friendlyName: test
description: This is a test description
location: US
deleteContentsOnDestroy: true

Dataplex Datascan Basic Quality

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basicQuality = new gcp.dataplex.Datascan("basic_quality", {
location: "us-central1",
dataScanId: "dataquality-basic",
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
executionSpec: {
trigger: {
onDemand: {},
},
},
dataQualitySpec: {
rules: [{
dimension: "VALIDITY",
name: "rule1",
description: "rule 1 for validity dimension",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
}],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
basic_quality = gcp.dataplex.Datascan("basic_quality",
location="us-central1",
data_scan_id="dataquality-basic",
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
execution_spec={
"trigger": {
"on_demand": {},
},
},
data_quality_spec={
"rules": [{
"dimension": "VALIDITY",
"name": "rule1",
"description": "rule 1 for validity dimension",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
}],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basicQuality = new Gcp.DataPlex.Datascan("basic_quality", new()
{
Location = "us-central1",
DataScanId = "dataquality-basic",
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
OnDemand = null,
},
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
Name = "rule1",
Description = "rule 1 for validity dimension",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "basic_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DataScanId: pulumi.String("dataquality-basic"),
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
OnDemand: nil,
},
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
Name: pulumi.String("rule1"),
Description: pulumi.String("rule 1 for validity dimension"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicQuality = new Datascan("basicQuality", DatascanArgs.builder()
.location("us-central1")
.dataScanId("dataquality-basic")
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rules(DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.name("rule1")
.description("rule 1 for validity dimension")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
basicQuality:
type: gcp:dataplex:Datascan
name: basic_quality
properties:
location: us-central1
dataScanId: dataquality-basic
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare
executionSpec:
trigger:
onDemand: {}
dataQualitySpec:
rules:
- dimension: VALIDITY
name: rule1
description: rule 1 for validity dimension
tableConditionExpectation:
sqlExpression: COUNT(*) 0
project: my-project-name

Dataplex Datascan Full Quality

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const fullQuality = new gcp.dataplex.Datascan("full_quality", {
location: "us-central1",
displayName: "Full Datascan Quality",
dataScanId: "dataquality-full",
description: "Example resource - Full Datascan Quality",
labels: {
author: "billing",
},
data: {
resource: "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
executionSpec: {
trigger: {
schedule: {
cron: "TZ=America/New_York 1 1 * * *",
},
},
field: "modified_date",
},
dataQualitySpec: {
samplingPercent: 5,
rowFilter: "station_id 1000",
rules: [
{
column: "address",
dimension: "VALIDITY",
threshold: 0.99,
nonNullExpectation: {},
},
{
column: "council_district",
dimension: "VALIDITY",
ignoreNull: true,
threshold: 0.9,
rangeExpectation: {
minValue: "1",
maxValue: "10",
strictMinEnabled: true,
strictMaxEnabled: false,
},
},
{
column: "power_type",
dimension: "VALIDITY",
ignoreNull: false,
regexExpectation: {
regex: ".*solar.*",
},
},
{
column: "property_type",
dimension: "VALIDITY",
ignoreNull: false,
setExpectation: {
values: [
"sidewalk",
"parkland",
],
},
},
{
column: "address",
dimension: "UNIQUENESS",
uniquenessExpectation: {},
},
{
column: "number_of_docks",
dimension: "VALIDITY",
statisticRangeExpectation: {
statistic: "MEAN",
minValue: "5",
maxValue: "15",
strictMinEnabled: true,
strictMaxEnabled: true,
},
},
{
column: "footprint_length",
dimension: "VALIDITY",
rowConditionExpectation: {
sqlExpression: "footprint_length 0 AND footprint_length <= 10",
},
},
{
dimension: "VALIDITY",
tableConditionExpectation: {
sqlExpression: "COUNT(*) 0",
},
},
{
dimension: "VALIDITY",
sqlAssertion: {
sqlStatement: "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project: "my-project-name",
});
import pulumi
import pulumi_gcp as gcp
full_quality = gcp.dataplex.Datascan("full_quality",
location="us-central1",
display_name="Full Datascan Quality",
data_scan_id="dataquality-full",
description="Example resource - Full Datascan Quality",
labels={
"author": "billing",
},
data={
"resource": "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
execution_spec={
"trigger": {
"schedule": {
"cron": "TZ=America/New_York 1 1 * * *",
},
},
"field": "modified_date",
},
data_quality_spec={
"sampling_percent": 5,
"row_filter": "station_id 1000",
"rules": [
{
"column": "address",
"dimension": "VALIDITY",
"threshold": 0.99,
"non_null_expectation": {},
},
{
"column": "council_district",
"dimension": "VALIDITY",
"ignore_null": True,
"threshold": 0.9,
"range_expectation": {
"min_value": "1",
"max_value": "10",
"strict_min_enabled": True,
"strict_max_enabled": False,
},
},
{
"column": "power_type",
"dimension": "VALIDITY",
"ignore_null": False,
"regex_expectation": {
"regex": ".*solar.*",
},
},
{
"column": "property_type",
"dimension": "VALIDITY",
"ignore_null": False,
"set_expectation": {
"values": [
"sidewalk",
"parkland",
],
},
},
{
"column": "address",
"dimension": "UNIQUENESS",
"uniqueness_expectation": {},
},
{
"column": "number_of_docks",
"dimension": "VALIDITY",
"statistic_range_expectation": {
"statistic": "MEAN",
"min_value": "5",
"max_value": "15",
"strict_min_enabled": True,
"strict_max_enabled": True,
},
},
{
"column": "footprint_length",
"dimension": "VALIDITY",
"row_condition_expectation": {
"sql_expression": "footprint_length 0 AND footprint_length <= 10",
},
},
{
"dimension": "VALIDITY",
"table_condition_expectation": {
"sql_expression": "COUNT(*) 0",
},
},
{
"dimension": "VALIDITY",
"sql_assertion": {
"sql_statement": "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
],
},
project="my-project-name")
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var fullQuality = new Gcp.DataPlex.Datascan("full_quality", new()
{
Location = "us-central1",
DisplayName = "Full Datascan Quality",
DataScanId = "dataquality-full",
Description = "Example resource - Full Datascan Quality",
Labels =
{
{ "author", "billing" },
},
Data = new Gcp.DataPlex.Inputs.DatascanDataArgs
{
Resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations",
},
ExecutionSpec = new Gcp.DataPlex.Inputs.DatascanExecutionSpecArgs
{
Trigger = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerArgs
{
Schedule = new Gcp.DataPlex.Inputs.DatascanExecutionSpecTriggerScheduleArgs
{
Cron = "TZ=America/New_York 1 1 * * *",
},
},
Field = "modified_date",
},
DataQualitySpec = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecArgs
{
SamplingPercent = 5,
RowFilter = "station_id 1000",
Rules = new[]
{
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "VALIDITY",
Threshold = 0.99,
NonNullExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "council_district",
Dimension = "VALIDITY",
IgnoreNull = true,
Threshold = 0.9,
RangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRangeExpectationArgs
{
MinValue = "1",
MaxValue = "10",
StrictMinEnabled = true,
StrictMaxEnabled = false,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "power_type",
Dimension = "VALIDITY",
IgnoreNull = false,
RegexExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRegexExpectationArgs
{
Regex = ".*solar.*",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "property_type",
Dimension = "VALIDITY",
IgnoreNull = false,
SetExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSetExpectationArgs
{
Values = new[]
{
"sidewalk",
"parkland",
},
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "address",
Dimension = "UNIQUENESS",
UniquenessExpectation = null,
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "number_of_docks",
Dimension = "VALIDITY",
StatisticRangeExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs
{
Statistic = "MEAN",
MinValue = "5",
MaxValue = "15",
StrictMinEnabled = true,
StrictMaxEnabled = true,
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Column = "footprint_length",
Dimension = "VALIDITY",
RowConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleRowConditionExpectationArgs
{
SqlExpression = "footprint_length 0 AND footprint_length <= 10",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
TableConditionExpectation = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleTableConditionExpectationArgs
{
SqlExpression = "COUNT(*) 0",
},
},
new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleArgs
{
Dimension = "VALIDITY",
SqlAssertion = new Gcp.DataPlex.Inputs.DatascanDataQualitySpecRuleSqlAssertionArgs
{
SqlStatement = "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null",
},
},
},
},
Project = "my-project-name",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v7/go/gcp/dataplex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataplex.NewDatascan(ctx, "full_quality", &dataplex.DatascanArgs{
Location: pulumi.String("us-central1"),
DisplayName: pulumi.String("Full Datascan Quality"),
DataScanId: pulumi.String("dataquality-full"),
Description: pulumi.String("Example resource - Full Datascan Quality"),
Labels: pulumi.StringMap{
"author": pulumi.String("billing"),
},
Data: &dataplex.DatascanDataArgs{
Resource: pulumi.String("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"),
},
ExecutionSpec: &dataplex.DatascanExecutionSpecArgs{
Trigger: &dataplex.DatascanExecutionSpecTriggerArgs{
Schedule: &dataplex.DatascanExecutionSpecTriggerScheduleArgs{
Cron: pulumi.String("TZ=America/New_York 1 1 * * *"),
},
},
Field: pulumi.String("modified_date"),
},
DataQualitySpec: &dataplex.DatascanDataQualitySpecArgs{
SamplingPercent: pulumi.Float64(5),
RowFilter: pulumi.String("station_id 1000"),
Rules: dataplex.DatascanDataQualitySpecRuleArray{
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("VALIDITY"),
Threshold: pulumi.Float64(0.99),
NonNullExpectation: nil,
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("council_district"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(true),
Threshold: pulumi.Float64(0.9),
RangeExpectation: &dataplex.DatascanDataQualitySpecRuleRangeExpectationArgs{
MinValue: pulumi.String("1"),
MaxValue: pulumi.String("10"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(false),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("power_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
RegexExpectation: &dataplex.DatascanDataQualitySpecRuleRegexExpectationArgs{
Regex: pulumi.String(".*solar.*"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("property_type"),
Dimension: pulumi.String("VALIDITY"),
IgnoreNull: pulumi.Bool(false),
SetExpectation: &dataplex.DatascanDataQualitySpecRuleSetExpectationArgs{
Values: pulumi.StringArray{
pulumi.String("sidewalk"),
pulumi.String("parkland"),
},
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("address"),
Dimension: pulumi.String("UNIQUENESS"),
UniquenessExpectation: nil,
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("number_of_docks"),
Dimension: pulumi.String("VALIDITY"),
StatisticRangeExpectation: &dataplex.DatascanDataQualitySpecRuleStatisticRangeExpectationArgs{
Statistic: pulumi.String("MEAN"),
MinValue: pulumi.String("5"),
MaxValue: pulumi.String("15"),
StrictMinEnabled: pulumi.Bool(true),
StrictMaxEnabled: pulumi.Bool(true),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Column: pulumi.String("footprint_length"),
Dimension: pulumi.String("VALIDITY"),
RowConditionExpectation: &dataplex.DatascanDataQualitySpecRuleRowConditionExpectationArgs{
SqlExpression: pulumi.String("footprint_length 0 AND footprint_length <= 10"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
TableConditionExpectation: &dataplex.DatascanDataQualitySpecRuleTableConditionExpectationArgs{
SqlExpression: pulumi.String("COUNT(*) 0"),
},
},
&dataplex.DatascanDataQualitySpecRuleArgs{
Dimension: pulumi.String("VALIDITY"),
SqlAssertion: &dataplex.DatascanDataQualitySpecRuleSqlAssertionArgs{
SqlStatement: pulumi.String("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null"),
},
},
},
},
Project: pulumi.String("my-project-name"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var fullQuality = new Datascan("fullQuality", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Quality")
.dataScanId("dataquality-full")
.description("Example resource - Full Datascan Quality")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.field("modified_date")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.samplingPercent(5)
.rowFilter("station_id 1000")
.rules(
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("VALIDITY")
.threshold(0.99)
.nonNullExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("council_district")
.dimension("VALIDITY")
.ignoreNull(true)
.threshold(0.9)
.rangeExpectation(DatascanDataQualitySpecRuleRangeExpectationArgs.builder()
.minValue(1)
.maxValue(10)
.strictMinEnabled(true)
.strictMaxEnabled(false)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("power_type")
.dimension("VALIDITY")
.ignoreNull(false)
.regexExpectation(DatascanDataQualitySpecRuleRegexExpectationArgs.builder()
.regex(".*solar.*")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("property_type")
.dimension("VALIDITY")
.ignoreNull(false)
.setExpectation(DatascanDataQualitySpecRuleSetExpectationArgs.builder()
.values(
"sidewalk",
"parkland")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("UNIQUENESS")
.uniquenessExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("number_of_docks")
.dimension("VALIDITY")
.statisticRangeExpectation(DatascanDataQualitySpecRuleStatisticRangeExpectationArgs.builder()
.statistic("MEAN")
.minValue(5)
.maxValue(15)
.strictMinEnabled(true)
.strictMaxEnabled(true)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("footprint_length")
.dimension("VALIDITY")
.rowConditionExpectation(DatascanDataQualitySpecRuleRowConditionExpectationArgs.builder()
.sqlExpression("footprint_length 0 AND footprint_length <= 10")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.sqlAssertion(DatascanDataQualitySpecRuleSqlAssertionArgs.builder()
.sqlStatement("select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null")
.build())
.build())
.build())
.project("my-project-name")
.build());
}
}
resources:
fullQuality:
type: gcp:dataplex:Datascan
name: full_quality
properties:
location: us-central1
displayName: Full Datascan Quality
dataScanId: dataquality-full
description: Example resource - Full Datascan Quality
labels:
author: billing
data:
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
executionSpec:
trigger:
schedule:
cron: TZ=America/New_York 1 1 * * *
field: modified_date
dataQualitySpec:
samplingPercent: 5
rowFilter: station_id 1000
rules:
- column: address
dimension: VALIDITY
threshold: 0.99
nonNullExpectation: {}
- column: council_district
dimension: VALIDITY
ignoreNull: true
threshold: 0.9
rangeExpectation:
minValue: 1
maxValue: 10
strictMinEnabled: true
strictMaxEnabled: false
- column: power_type
dimension: VALIDITY
ignoreNull: false
regexExpectation:
regex: .*solar.*
- column: property_type
dimension: VALIDITY
ignoreNull: false
setExpectation:
values:
- sidewalk
- parkland
- column: address
dimension: UNIQUENESS
uniquenessExpectation: {}
- column: number_of_docks
dimension: VALIDITY
statisticRangeExpectation:
statistic: MEAN
minValue: 5
maxValue: 15
strictMinEnabled: true
strictMaxEnabled: true
- column: footprint_length
dimension: VALIDITY
rowConditionExpectation:
sqlExpression: footprint_length 0 AND footprint_length <= 10
- dimension: VALIDITY
tableConditionExpectation:
sqlExpression: COUNT(*) 0
- dimension: VALIDITY
sqlAssertion:
sqlStatement: select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null
project: my-project-name

Import

Datascan can be imported using any of these accepted formats:

  • projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}

  • {{project}}/{{location}}/{{data_scan_id}}

  • {{location}}/{{data_scan_id}}

  • {{data_scan_id}} When using the pulumi import command, Datascan can be imported using one of the formats above. For example:

$ pulumi import gcp:dataplex/datascan:Datascan default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{project}}/{{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{location}}/{{data_scan_id}}
$ pulumi import gcp:dataplex/datascan:Datascan default {{data_scan_id}}

Constructors

Link copied to clipboard
constructor(data: Output<DatascanDataArgs>? = null, dataProfileSpec: Output<DatascanDataProfileSpecArgs>? = null, dataQualitySpec: Output<DatascanDataQualitySpecArgs>? = null, dataScanId: Output<String>? = null, description: Output<String>? = null, displayName: Output<String>? = null, executionSpec: Output<DatascanExecutionSpecArgs>? = null, labels: Output<Map<String, String>>? = null, location: Output<String>? = null, project: Output<String>? = null)

Properties

Link copied to clipboard
val data: Output<DatascanDataArgs>? = null

The data source for DataScan. Structure is documented below.

Link copied to clipboard

DataProfileScan related setting.

Link copied to clipboard

DataQualityScan related setting.

Link copied to clipboard
val dataScanId: Output<String>? = null

DataScan identifier. Must contain only lowercase letters, numbers and hyphens. Must start with a letter. Must end with a number or a letter.

Link copied to clipboard
val description: Output<String>? = null

Description of the scan.

Link copied to clipboard
val displayName: Output<String>? = null

User friendly display name.

Link copied to clipboard

DataScan execution settings. Structure is documented below.

Link copied to clipboard
val labels: Output<Map<String, String>>? = null

User-defined labels for the scan. A list of key->value pairs. Note: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field 'effective_labels' for all of the labels present on the resource.

Link copied to clipboard
val location: Output<String>? = null

The location where the data scan should reside.

Link copied to clipboard
val project: Output<String>? = null

Functions

Link copied to clipboard
open override fun toJava(): DatascanArgs