Datascan Args
data class DatascanArgs(val data: Output<DatascanDataArgs>? = null, val dataProfileSpec: Output<DatascanDataProfileSpecArgs>? = null, val dataQualitySpec: Output<DatascanDataQualitySpecArgs>? = null, val dataScanId: Output<String>? = null, val description: Output<String>? = null, val displayName: Output<String>? = null, val executionSpec: Output<DatascanExecutionSpecArgs>? = null, val labels: Output<Map<String, String>>? = null, val location: Output<String>? = null, val project: Output<String>? = null) : ConvertibleToJava<DatascanArgs>
Represents a user-visible job which provides the insights for the related data source. To get more information about Datascan, see:
How-to Guides
Example Usage
Dataplex Datascan Basic Profile
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicProfile = new Datascan("basicProfile", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.dataProfileSpec()
.dataScanId("dataprofile-basic")
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Dataplex Datascan Full Profile
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecIncludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecExcludeFieldsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataProfileSpecPostScanActionsBigqueryExportArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var source = new Dataset("source", DatasetArgs.builder()
.datasetId("dataplex_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.deleteContentsOnDestroy(true)
.build());
var fullProfile = new Datascan("fullProfile", DatascanArgs.builder()
.location("us-central1")
.displayName("Full Datascan Profile")
.dataScanId("dataprofile-full")
.description("Example resource - Full Datascan Profile")
.labels(Map.of("author", "billing"))
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.dataProfileSpec(DatascanDataProfileSpecArgs.builder()
.samplingPercent(80)
.rowFilter("word_count 10")
.includeFields(DatascanDataProfileSpecIncludeFieldsArgs.builder()
.fieldNames("word_count")
.build())
.excludeFields(DatascanDataProfileSpecExcludeFieldsArgs.builder()
.fieldNames("property_type")
.build())
.postScanActions(DatascanDataProfileSpecPostScanActionsArgs.builder()
.bigqueryExport(DatascanDataProfileSpecPostScanActionsBigqueryExportArgs.builder()
.resultsTable("//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export")
.build())
.build())
.build())
.project("my-project-name")
.build(), CustomResourceOptions.builder()
.dependsOn(source)
.build());
}
}
Content copied to clipboard
Dataplex Datascan Basic Quality
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerOnDemandArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basicQuality = new Datascan("basicQuality", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rules(DatascanDataQualitySpecRuleArgs.builder()
.description("rule 1 for validity dimension")
.dimension("VALIDITY")
.name("rule1")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.build())
.dataScanId("dataquality-basic")
.executionSpec(DatascanExecutionSpecArgs.builder()
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.onDemand()
.build())
.build())
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Dataplex Datascan Full Quality
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataplex.Datascan;
import com.pulumi.gcp.dataplex.DatascanArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanDataQualitySpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerArgs;
import com.pulumi.gcp.dataplex.inputs.DatascanExecutionSpecTriggerScheduleArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var fullQuality = new Datascan("fullQuality", DatascanArgs.builder()
.data(DatascanDataArgs.builder()
.resource("//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations")
.build())
.dataQualitySpec(DatascanDataQualitySpecArgs.builder()
.rowFilter("station_id 1000")
.rules(
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("VALIDITY")
.nonNullExpectation()
.threshold(0.99)
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("council_district")
.dimension("VALIDITY")
.ignoreNull(true)
.rangeExpectation(DatascanDataQualitySpecRuleRangeExpectationArgs.builder()
.maxValue(10)
.minValue(1)
.strictMaxEnabled(false)
.strictMinEnabled(true)
.build())
.threshold(0.9)
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("power_type")
.dimension("VALIDITY")
.ignoreNull(false)
.regexExpectation(DatascanDataQualitySpecRuleRegexExpectationArgs.builder()
.regex(".*solar.*")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("property_type")
.dimension("VALIDITY")
.ignoreNull(false)
.setExpectation(DatascanDataQualitySpecRuleSetExpectationArgs.builder()
.values(
"sidewalk",
"parkland")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("address")
.dimension("UNIQUENESS")
.uniquenessExpectation()
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("number_of_docks")
.dimension("VALIDITY")
.statisticRangeExpectation(DatascanDataQualitySpecRuleStatisticRangeExpectationArgs.builder()
.maxValue(15)
.minValue(5)
.statistic("MEAN")
.strictMaxEnabled(true)
.strictMinEnabled(true)
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.column("footprint_length")
.dimension("VALIDITY")
.rowConditionExpectation(DatascanDataQualitySpecRuleRowConditionExpectationArgs.builder()
.sqlExpression("footprint_length 0 AND footprint_length <= 10")
.build())
.build(),
DatascanDataQualitySpecRuleArgs.builder()
.dimension("VALIDITY")
.tableConditionExpectation(DatascanDataQualitySpecRuleTableConditionExpectationArgs.builder()
.sqlExpression("COUNT(*) 0")
.build())
.build())
.samplingPercent(5)
.build())
.dataScanId("dataquality-full")
.description("Example resource - Full Datascan Quality")
.displayName("Full Datascan Quality")
.executionSpec(DatascanExecutionSpecArgs.builder()
.field("modified_date")
.trigger(DatascanExecutionSpecTriggerArgs.builder()
.schedule(DatascanExecutionSpecTriggerScheduleArgs.builder()
.cron("TZ=America/New_York 1 1 * * *")
.build())
.build())
.build())
.labels(Map.of("author", "billing"))
.location("us-central1")
.project("my-project-name")
.build());
}
}
Content copied to clipboard
Import
Datascan can be imported using any of these accepted formats
$ pulumi import gcp:dataplex/datascan:Datascan default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{project}}/{{location}}/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{location}}/{{data_scan_id}}
Content copied to clipboard
$ pulumi import gcp:dataplex/datascan:Datascan default {{data_scan_id}}
Content copied to clipboard
Constructors
Link copied to clipboard
constructor(data: Output<DatascanDataArgs>? = null, dataProfileSpec: Output<DatascanDataProfileSpecArgs>? = null, dataQualitySpec: Output<DatascanDataQualitySpecArgs>? = null, dataScanId: Output<String>? = null, description: Output<String>? = null, displayName: Output<String>? = null, executionSpec: Output<DatascanExecutionSpecArgs>? = null, labels: Output<Map<String, String>>? = null, location: Output<String>? = null, project: Output<String>? = null)
Properties
Link copied to clipboard
The data source for DataScan. Structure is documented below.
Link copied to clipboard
DataProfileScan related setting. Structure is documented below.
Link copied to clipboard
DataQualityScan related setting. Structure is documented below.
Link copied to clipboard
DataScan identifier. Must contain only lowercase letters, numbers and hyphens. Must start with a letter. Must end with a number or a letter.
Link copied to clipboard
Description of the rule. The maximum length is 1,024 characters.
Link copied to clipboard
User friendly display name.
Link copied to clipboard
DataScan execution settings. Structure is documented below.