Beta - job's task is legacy (spark_submit_task)
This product is not supported for your selected
Datadog site. (
).
Id: 375cdab9-3f94-4ae0-b1e3-8fbdf9cdf4d7
Cloud Provider: Databricks
Platform: Terraform
Severity: Medium
Category: Best Practices
Learn More
Description
This rule detects databricks_job resources with task.spark_submit_task and reports an IncorrectValue issue.
Use of spark_submit_task in job tasks is not allowed.
Compliant Code Examples
resource "databricks_job" "negative1" {
name = "Job with multiple tasks"
job_cluster {
job_cluster_key = "j"
new_cluster {
num_workers = 2
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
}
task {
task_key = "a"
new_cluster {
num_workers = 1
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
notebook_task {
notebook_path = databricks_notebook.this.path
}
}
}
resource "databricks_job" "negative1" {
name = "Job with multiple tasks"
job_cluster {
job_cluster_key = "j"
new_cluster {
num_workers = 2
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
}
task {
task_key = "a"
new_cluster {
num_workers = 1
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
notebook_task {
notebook_path = databricks_notebook.this.path
}
}
task {
task_key = "b"
//this task will only run after task a
depends_on {
task_key = "a"
}
existing_cluster_id = databricks_cluster.shared.id
spark_jar_task {
main_class_name = "com.acme.data.Main"
}
}
task {
task_key = "c"
job_cluster_key = "j"
notebook_task {
notebook_path = databricks_notebook.this.path
}
}
//this task starts a Delta Live Tables pipline update
task {
task_key = "d"
pipeline_task {
pipeline_id = databricks_pipeline.this.id
}
}
}
Non-Compliant Code Examples
resource "databricks_job" "positive" {
name = "Job with multiple tasks"
job_cluster {
job_cluster_key = "j"
new_cluster {
num_workers = 2
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
}
task {
task_key = "a"
existing_cluster_id = databricks_cluster.shared.id
spark_submit_task {
main_class_name = "com.acme.data.Main"
}
}
}
resource "databricks_job" "positive" {
name = "Job with multiple tasks"
job_cluster {
job_cluster_key = "j"
new_cluster {
num_workers = 2
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
}
task {
task_key = "a"
new_cluster {
num_workers = 1
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
notebook_task {
notebook_path = databricks_notebook.this.path
}
}
task {
task_key = "b"
//this task will only run after task a
depends_on {
task_key = "a"
}
existing_cluster_id = databricks_cluster.shared.id
spark_submit_task {
main_class_name = "com.acme.data.Main"
}
}
task {
task_key = "c"
job_cluster_key = "j"
notebook_task {
notebook_path = databricks_notebook.this.path
}
}
//this task starts a Delta Live Tables pipline update
task {
task_key = "d"
pipeline_task {
pipeline_id = databricks_pipeline.this.id
}
}
}