Datascan
Represents a user-visible job which provides the insights for the related data source. To get more information about Datascan, see:
How-to Guides
Example Usage
Dataplex Datascan Basic Profile
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicProfile = new Datascan("basicProfile", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.dataProfileSpec()
.dataScanId("dataprofile-basic")
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Dataplex Datascan Full Profile
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecIncludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecExcludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var source = new Dataset("source", DatasetArgs.builder()
.datasetId("dataplex_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.deleteContentsOnDestroy(true)
.build());
var fullProfile = new Datascan("fullProfile", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Profile")
.dataScanId("dataprofile-full")
.description("Example resource - Full Datascan Profile")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.samplingPercent(80)
.rowFilter("word_count 10")
.includeFields(DatascanDataProfileSpecIncludeFieldsArgs.builder()
.fieldNames("word_count")
.build())
.excludeFields(DatascanDataProfileSpecExcludeFieldsArgs.builder()
.fieldNames("property_type")
.build())
.postScanActions(DatascanDataProfileSpecPostScanActionsArgs.builder()
.bigqueryExport(DatascanDataProfileSpecPostScanActionsBigqueryExportArgs.builder()
.resultsTable("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export")
.build())
.build())
.build())
.project("my-project-name")
.build(), CustomResourceOptions.builder()
.dependsOn(source)
.build());
}
}
Content copied to clipboard
Dataplex Datascan Basic Quality
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicQuality = new Datascan("basicQuality", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rules(DatascanDataQualitySpecRuleArgs.builder()
.description("rule 1 for validity dimension")
.dimension("VALIDITY")
.name("rule1")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.build())
.dataScanId("dataquality-basic")
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Dataplex Datascan Full Quality
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var fullQuality = new Datascan("fullQuality", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rowFilter("station_id 1000")
.rules(
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("VALIDITY")
.nonNullExpectation()
.threshold(0.99)
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("council_district")
.dimension("VALIDITY")
.ignoreNull(true)
.rangeExpectation(DatascanDataQualitySpecRuleRangeExpectationArgs.builder()
.maxValue(10)
.minValue(1)
.strictMaxEnabled(false)
.strictMinEnabled(true)
.build())
.threshold(0.9)
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("power_type")
.dimension("VALIDITY")
.ignoreNull(false)
.regexExpectation(DatascanDataQualitySpecRuleRegexExpectationArgs.builder()
.regex(".*solar.*")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("property_type")
.dimension("VALIDITY")
.ignoreNull(false)
.setExpectation(DatascanDataQualitySpecRuleSetExpectationArgs.builder()
.values(
"sidewalk",
"parkland")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("UNIQUENESS")
.uniquenessExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("number_of_docks")
.dimension("VALIDITY")
.statisticRangeExpectation(DatascanDataQualitySpecRuleStatisticRangeExpectationArgs.builder()
.maxValue(15)
.minValue(5)
.statistic("MEAN")
.strictMaxEnabled(true)
.strictMinEnabled(true)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("footprint_length")
.dimension("VALIDITY")
.rowConditionExpectation(DatascanDataQualitySpecRuleRowConditionExpectationArgs.builder()
.sqlExpression("footprint_length 0 AND footprint_length <= 10")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.samplingPercent(5)
.build())
.dataScanId("dataquality-full")
.description("Example resource - Full Datascan Quality")
.displayName("Full Datascan Quality")
.executionSpec(DatascanExecutionSpecArgs.builder()
.field("modified_date")
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.labels(Map.of("author", "billing"))
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Import
Datascan can be imported using any of these accepted formats
$ pulumi import gcp:dataplex/datascan:Datascan default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{project}}/{{location}}/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{location}}/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{data_scan_id}}
Content copied to clipboard
Properties
Link copied to clipboard
Link copied to clipboard
Link copied to clipboard
Link copied to clipboard