From c28c8884048fec093e9113b73e671875d8aaed0d Mon Sep 17 00:00:00 2001 From: oabrivard Date: Fri, 13 Mar 2026 11:55:25 +0100 Subject: [PATCH] Finished D1 Tasks - Subscription provisioning --- .gitignore | 2 + .../environments/prod/.terraform.lock.hcl | 36 +++ .../Terraform/environments/prod/backend.tf | 6 +- .../environments/prod/terraform.tfvars | 4 +- .../Terraform/modules/networking/main.tf | 8 +- Implementation/first_steps.md | 32 +- Implementation/second_steps.md | 303 ++++++++++++++++++ 7 files changed, 366 insertions(+), 25 deletions(-) create mode 100644 .gitignore create mode 100644 Implementation/Terraform/environments/prod/.terraform.lock.hcl create mode 100644 Implementation/second_steps.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..449c873 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +.terraform/ \ No newline at end of file diff --git a/Implementation/Terraform/environments/prod/.terraform.lock.hcl b/Implementation/Terraform/environments/prod/.terraform.lock.hcl new file mode 100644 index 0000000..96cb99b --- /dev/null +++ b/Implementation/Terraform/environments/prod/.terraform.lock.hcl @@ -0,0 +1,36 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/databricks/databricks" { + version = "1.111.0" + constraints = "~> 1.60" + hashes = [ + "h1:jAVcFkHrzsQtU8grk5ebWQ0hm+XIiuSi/tI52m6WTxM=", + "zh:1efec838824ce84d276ab3a60a40b06d25d6cb72ce81f6c0981bc9838511da42", + "zh:71fcedf3e129bec81b23cf769d6e176636644c7ff5ab8661601a352507698ffb", + "zh:8518ed14453b0920772067dd8ac26c82e4a3c996ff2200b2b6042e45160d5fef", + "zh:c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9", + "zh:c9bdb775627d280729a758fa7aa09b3fa852d9d5e41612fbdb2a91c0912dc481", + "zh:ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9", + ] +} + +provider "registry.terraform.io/hashicorp/azurerm" { + version = "4.64.0" + constraints = "~> 4.0" + hashes = [ + "h1:ye1NRDdXYVI8cPL1tNRTol0Payus55wnT3byIu5viuQ=", + "zh:14c016b3814b0820a15a3c265f59da026881e8d49601a6699bea44e04d44c0db", + "zh:16106d132cf01ae6c32d3ced77b745d3ffc437bb1318b587a99cff7eb4b214c1", + "zh:2cc0ba16f21d481e92cfabe24003455a9be4c70d4650c65dc6f659d82f4cb8c7", + "zh:4a2fda3f1e3d82f3f7dfa5746150b767459f50a90585029dde3e33aea913a87a", + "zh:4e9f930e0f47b418216cfdaa01bf26116e355858194157114e1fbea5062c4a5c", + "zh:734814677c4ec38175b0aedd13dcf03d7dad9667747a7bd357b1da4ffe35faa9", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7c045cf66604cd196ec2d5e2310dd29e40d5ce8e0144049d26c3f043d4286a1c", + "zh:9fb94995c6fe9b0c16c8d44cb2c84f07985e599b69bfe2f9acc5ed728f4d9d10", + "zh:b328f10eacf5a1a9eae9178e66bf169777f902471bfb7496e4fa6cdee7c9e811", + "zh:be08fcc215d0b3048977a52e701bee4c0f1c9e6ef4d0db1e8fe7bcacefa24352", + "zh:e2d7987e5d47ef175d517fec9abfdc1a6088fe2334c0c6de63d556d7d21bc9e3", + ] +} diff --git a/Implementation/Terraform/environments/prod/backend.tf b/Implementation/Terraform/environments/prod/backend.tf index 780ba2e..41752ba 100644 --- a/Implementation/Terraform/environments/prod/backend.tf +++ b/Implementation/Terraform/environments/prod/backend.tf @@ -5,9 +5,9 @@ terraform { backend "azurerm" { - resource_group_name = "rg-terraform-state" - storage_account_name = "stgreenfielttfstate" - container_name = "mdp-prod" + resource_group_name = "rg-mdp-tfstate" + storage_account_name = "staccmdptfstate" + container_name = "tfstate" key = "terraform.tfstate" use_azuread_auth = true } diff --git a/Implementation/Terraform/environments/prod/terraform.tfvars b/Implementation/Terraform/environments/prod/terraform.tfvars index 2c634c1..d67ed72 100644 --- a/Implementation/Terraform/environments/prod/terraform.tfvars +++ b/Implementation/Terraform/environments/prod/terraform.tfvars @@ -3,7 +3,7 @@ # IMPORTANT: Do NOT commit secrets. Use environment variables or Key Vault. ############################################################################### -subscription_id = "REPLACE-WITH-PROD-SUBSCRIPTION-ID" +subscription_id = "52274961-4dda-48cc-a79b-3a0b74f8eaac" databricks_account_id = "REPLACE-WITH-DATABRICKS-ACCOUNT-ID" location = "canadacentral" environment = "prod" @@ -29,7 +29,7 @@ adls_replication_type = "GRS" tags = { project = "mdp" environment = "prod" - cost-center = "REPLACE" + cost-center = "Greenfield-CDO" owner = "data-office" managed-by = "terraform" } diff --git a/Implementation/Terraform/modules/networking/main.tf b/Implementation/Terraform/modules/networking/main.tf index 45d9ea7..110ddf9 100644 --- a/Implementation/Terraform/modules/networking/main.tf +++ b/Implementation/Terraform/modules/networking/main.tf @@ -104,8 +104,8 @@ resource "azurerm_subnet" "private_endpoints" { name = "snet-${local.prefix}-pe" resource_group_name = azurerm_resource_group.network.name virtual_network_name = azurerm_virtual_network.main.name - address_prefixes = [var.private_endpoints_subnet_cidr] - private_endpoint_network_policies_enabled = true + address_prefixes = [var.private_endpoints_subnet_cidr] + private_endpoint_network_policies = "Enabled" } # ============================================================================= @@ -124,8 +124,8 @@ resource "azurerm_subnet" "transit" { name = "snet-${local.prefix}-transit" resource_group_name = azurerm_resource_group.network.name virtual_network_name = azurerm_virtual_network.transit.name - address_prefixes = [var.transit_subnet_cidr] - private_endpoint_network_policies_enabled = true + address_prefixes = [var.transit_subnet_cidr] + private_endpoint_network_policies = "Enabled" } # ============================================================================= diff --git a/Implementation/first_steps.md b/Implementation/first_steps.md index 17b2217..22fc350 100644 --- a/Implementation/first_steps.md +++ b/Implementation/first_steps.md @@ -115,7 +115,7 @@ az account set --subscription "" az group create --name rg-mdp-tfstate --location canadacentral az storage account create \ - --name stmdptfstate \ + --name staccmdptfstate \ --resource-group rg-mdp-tfstate \ --location canadacentral \ --sku Standard_GRS \ @@ -125,10 +125,10 @@ az storage account create \ az storage container create \ --name tfstate \ - --account-name stmdptfstate + --account-name staccmdptfstate az storage account blob-service-properties update \ - --account-name stmdptfstate \ + --account-name staccmdptfstate \ --enable-versioning true ``` @@ -169,7 +169,7 @@ terraform plan ```bash az policy assignment list \ - --scope "/subscriptions/" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \ --query "[].{Name:displayName, Scope:scope, Policy:policyDefinitionId}" \ -o table ``` @@ -183,7 +183,7 @@ az policy assignment create \ --name "mdp-allowed-locations" \ --display-name "MDP - Restrict to Canada regions" \ --policy "e56962a6-4747-49cd-b67b-bf8b01975c4c" \ - --scope "/subscriptions/" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \ --params '{ "listOfAllowedLocations": { "value": ["canadacentral", "canadaeast", "global"] @@ -195,13 +195,13 @@ az policy assignment create \ ### 2. Mandatory Tagging (Initiative Approach) -Built-in policy "Require a tag on resources": `871b6d14-10aa-478d-b466-ce391a2e1549` +Built-in policy "Require a tag on resources": `871b6d14-10aa-478d-b590-94f262ecfa99` **Quick approach — loop per tag:** ```bash -SCOPE="/subscriptions/" -POLICY_ID="871b6d14-10aa-478d-b466-ce391a2e1549" +SCOPE="/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" +POLICY_ID="871b6d14-10aa-478d-b590-94f262ecfa99" for TAG in project environment cost-center owner data-classification; do az policy assignment create \ @@ -227,23 +227,23 @@ Create `mandatory-tags-initiative.json`: "parameters": {}, "policyDefinitions": [ { - "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549", + "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99", "parameters": { "tagName": { "value": "project" } } }, { - "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549", + "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99", "parameters": { "tagName": { "value": "environment" } } }, { - "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549", + "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99", "parameters": { "tagName": { "value": "cost-center" } } }, { - "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549", + "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99", "parameters": { "tagName": { "value": "owner" } } }, { - "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549", + "policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99", "parameters": { "tagName": { "value": "data-classification" } } } ] @@ -256,13 +256,13 @@ az policy set-definition create \ --name "mdp-mandatory-tags" \ --display-name "MDP - Mandatory Tags" \ --definitions @mandatory-tags-initiative.json \ - --subscription "" + --subscription "52274961-4dda-48cc-a79b-3a0b74f8eaac" az policy assignment create \ --name "mdp-mandatory-tags" \ --display-name "MDP - Mandatory Tags" \ --policy-set-definition "mdp-mandatory-tags" \ - --scope "/subscriptions/" + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" ``` ### 3. Allowed VM SKUs @@ -274,7 +274,7 @@ az policy assignment create \ --name "mdp-allowed-vm-skus" \ --display-name "MDP - Allowed VM SKUs" \ --policy "cccc23c7-8427-4f53-ad12-b6a63eb452b3" \ - --scope "/subscriptions/" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \ --params '{ "listOfAllowedSKUs": { "value": [ diff --git a/Implementation/second_steps.md b/Implementation/second_steps.md new file mode 100644 index 0000000..4d2329f --- /dev/null +++ b/Implementation/second_steps.md @@ -0,0 +1,303 @@ +# MDP Databricks-Primary Architecture — Second Steps + +## Context + +Continuation of the Azure setup for Greenfield's Modern Data Platform (MDP), picking up from `first_steps.md`. This session covers Step 4 through Step 7 of the initial provisioning guide: RBAC setup, Key Vault creation, Entra ID group creation, subscription hierarchy, and Terraform initialization. + +--- + +## Step 4 — Set Up RBAC on the Subscription + +The two `az role assignment create` commands use a placeholder ``. Retrieve the actual App ID first: + +```bash +SP_APP_ID=$(az ad sp list --display-name "sp-mdp-terraform-prod" --query "[0].appId" -o tsv) +echo $SP_APP_ID # verify before assigning +``` + +Then run the assignments: + +```bash +SUB_ID=$(az account show --query id -o tsv) + +# Contributor — allows creating/managing all resources +az role assignment create \ + --assignee $SP_APP_ID \ + --role "Contributor" \ + --scope "/subscriptions/$SUB_ID" + +# User Access Administrator — needed for Terraform to assign roles (e.g., on storage, Key Vault, Databricks) +az role assignment create \ + --assignee $SP_APP_ID \ + --role "User Access Administrator" \ + --scope "/subscriptions/$SUB_ID" +``` + +For group assignments, use Object IDs: + +```bash +ADMIN_GRP_ID=$(az ad group show --group "grp-mdp-account-admins" --query id -o tsv) +ENG_GRP_ID=$(az ad group show --group "grp-mdp-platform-engineers" --query id -o tsv) + +az role assignment create --assignee $ADMIN_GRP_ID --role "Owner" --scope "/subscriptions/$SUB_ID" +az role assignment create --assignee $ENG_GRP_ID --role "Contributor" --scope "/subscriptions/$SUB_ID" +``` + +Verify all assignments: + +```bash +az role assignment list --scope "/subscriptions/$SUB_ID" \ + --query "[].{Principal:principalName, Role:roleDefinitionName}" -o table +``` + +> **Note:** `User Access Administrator` on the Terraform SP is necessary because downstream Terraform modules (Unity Catalog storage, Key Vault, private endpoints) will create their own role assignments. Without it, those `apply` runs will fail with `AuthorizationFailed`. + +--- + +## Service Principal — `sp-mdp-terraform-prod` + +This is the Azure AD service principal (application identity) that Terraform uses to authenticate against Azure and deploy resources. It is a non-human "robot account" with scoped, auditable permissions — required for CI/CD pipelines and to avoid using personal user accounts. + +### Create the SP + +```bash +az ad sp create-for-rbac \ + --name "sp-mdp-terraform-prod" \ + --role "Contributor" \ + --scopes "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \ + --years 1 +``` + +Output: + +```json +{ + "appId": "...", # client_id → save this + "displayName": "sp-mdp-terraform-prod", + "password": "...", # client_secret → shown only once + "tenant": "..." # tenant_id → save this +} +``` + +> **Important:** The `password` is shown only once. Store it in Key Vault immediately. + +```bash +az keyvault secret set \ + --vault-name "kv-mdp-prod-001" \ + --name "sp-mdp-terraform-prod-secret" \ + --value "" +``` + +Then assign `User Access Administrator` separately: + +```bash +az role assignment create \ + --assignee \ + --role "User Access Administrator" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" +``` + +> `create-for-rbac` already assigns `Contributor` at creation time — only the second role needs a separate command. + +--- + +## Key Vault Creation + +Based on the existing resource group `rg-mdp-prod-keyvault` from D1-T2: + +```bash +az keyvault create \ + --name "kv-mdp-prod-001" \ + --resource-group "rg-mdp-prod-keyvault" \ + --location "canadacentral" \ + --sku "premium" \ + --enable-purge-protection true \ + --retention-days 90 \ + --enable-rbac-authorization true +``` + +**SKU and option rationale:** +- `premium` — HSM-backed keys (OSFI expectation for secrets at rest) +- `enable-purge-protection` — prevents hard-delete; mandatory in regulated environments +- `retention-days 90` — soft-delete window +- `enable-rbac-authorization` — use Azure RBAC instead of legacy access policies + +Grant your own account access to manage secrets: + +```bash +MY_ID=$(az ad signed-in-user show --query id -o tsv) +KV_ID=$(az keyvault show --name "kv-mdp-prod-001" --query id -o tsv) + +az role assignment create \ + --assignee $MY_ID \ + --role "Key Vault Administrator" \ + --scope $KV_ID +``` + +> Key Vault names must be globally unique across Azure — adjust `kv-mdp-prod-001` if it conflicts. + +--- + +## Entra ID Group Creation + +### grp-mdp-account-admins + +```bash +az ad group create \ + --display-name "grp-mdp-account-admins" \ + --mail-nickname "grp-mdp-account-admins" \ + --description "MDP production account administrators - Owner access on greenfield-mdp-prod subscription" + +# Get group Object ID +GRP_ID=$(az ad group show --group "grp-mdp-account-admins" --query id -o tsv) + +# Add a member +az ad group member add \ + --group "grp-mdp-account-admins" \ + --member-id $(az ad user show --id "user@greenfield.ca" --query id -o tsv) + +# Assign Owner on subscription +az role assignment create \ + --assignee $GRP_ID \ + --role "Owner" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" +``` + +### grp-mdp-platform-engineers + +```bash +az ad group create \ + --display-name "grp-mdp-platform-engineers" \ + --mail-nickname "grp-mdp-platform-engineers" \ + --description "MDP production platform engineers - Contributor access on greenfield-mdp-prod subscription" + +GRP_ID=$(az ad group show --group "grp-mdp-platform-engineers" --query id -o tsv) + +az role assignment create \ + --assignee $GRP_ID \ + --role "Contributor" \ + --scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" + +# Add a member +az ad group member add \ + --group "grp-mdp-platform-engineers" \ + --member-id $(az ad user show --id "engineer@greenfield.ca" --query id -o tsv) +``` + +> **Note:** Group creation in Entra ID requires the **User Administrator** or **Groups Administrator** Entra role. If you get `Authorization_RequestDenied`, this needs to go through your IAM team. + +--- + +## Subscription Hierarchy — Where to Place `shared-infra` + +Recommended structure aligned with Azure Landing Zone / CAF conventions: + +``` +Tenant Root Group +└── mg-greenfield (root MG) + ├── mg-greenfield-platform ← shared-infra goes here + │ ├── mg-greenfield-connectivity (hub network, ExpressRoute, DNS) + │ └── mg-greenfield-management (logging, monitoring, tfstate, shared tools) + ├── mg-greenfield-prod + │ └── greenfield-mdp-prod (workload subscription) + ├── mg-greenfield-nonprod + └── mg-greenfield-sandbox +``` + +The `shared-infra` subscription belongs under **`mg-greenfield-management`**. It hosts cross-cutting operational resources not tied to any workload: +- Terraform state storage (`staccmdptfstate`) +- Shared Key Vault for pipeline secrets +- Log Analytics workspace +- Azure Monitor / alerting + +The tfstate account must exist before any workload subscription is deployed — placing it under the management MG keeps it lifecycle-independent and avoids circular dependency. + +> At Greenfield, a management/shared-services subscription may already exist in the EA enrollment. Confirm with Cloud Infra before creating a new one — you may only need a new resource group. + +--- + +## Step 6 — Update `terraform.tfvars` + +### Values updated + +| Variable | Value | +|---|---| +| `subscription_id` | `52274961-4dda-48cc-a79b-3a0b74f8eaac` | +| `databricks_account_id` | Retrieve from `accounts.azuredatabricks.net` → top-right menu | +| `cost-center` tag | `Greenfield-CDO` | + +Get the Databricks account ID from the account console (no az CLI command available for this): + +``` +https://accounts.azuredatabricks.net → top-right menu → your account name +``` + +### `backend.tf` alignment + +The `backend.tf` values must match what was created in Step 5: + +| Parameter | Value | +|---|---| +| `resource_group_name` | `rg-mdp-tfstate` | +| `storage_account_name` | `staccmdptfstate` | +| `container_name` | `tfstate` | + +--- + +## Step 7 — Terraform Init Troubleshooting + +### Error: 403 AuthorizationPermissionMismatch on `terraform init` + +``` +Error: Failed to get existing workspaces: listing blobs: executing request: unexpected status 403 +(403 This request is not authorized to perform this operation using this permission.) +AuthorizationPermissionMismatch +``` + +**Root cause:** `backend.tf` uses `use_azuread_auth = true`. Terraform authenticates to the storage account via Entra ID rather than a storage access key. The `Contributor` role alone only covers the management-plane (ARM) — it does not grant blob read/write access on the data-plane. A `Storage Blob Data*` role is required explicitly. + +**Fix:** + +```bash +MY_ID=$(az ad signed-in-user show --query id -o tsv) + +SA_ID=$(az storage account show \ + --name "staccmdptfstate" \ + --resource-group "rg-mdp-tfstate" \ + --query id -o tsv) + +az role assignment create \ + --assignee $MY_ID \ + --role "Storage Blob Data Contributor" \ + --scope $SA_ID +``` + +Wait ~2 minutes for role assignment propagation, then re-run `terraform init`. + +Also grant the same role to the Terraform SP for CI/CD: + +```bash +SP_APP_ID=$(az ad sp list --display-name "sp-mdp-terraform-prod" --query "[0].appId" -o tsv) + +az role assignment create \ + --assignee $SP_APP_ID \ + --role "Storage Blob Data Contributor" \ + --scope $SA_ID +``` + +--- + +## Summary — Actions Completed This Session + +| Action | Status | +|---|---| +| RBAC assignments for Terraform SP | Done | +| Created `sp-mdp-terraform-prod` service principal | Done | +| Created `kv-mdp-prod-001` Key Vault | Done | +| Stored SP secret in Key Vault | Done | +| Created `grp-mdp-account-admins` | Done | +| Created `grp-mdp-platform-engineers` | Done | +| Updated `terraform.tfvars` with subscription ID and cost-center | Done | +| Aligned `backend.tf` with actual storage account names | Done | +| Resolved `terraform init` 403 error (Storage Blob Data Contributor) | Done | +| `databricks_account_id` in `terraform.tfvars` | Pending — retrieve from account console | \ No newline at end of file