Finished D1 Tasks - Subscription provisioning

master
oabrivard 3 days ago
parent d8e56c0dd9
commit c28c888404

2
.gitignore vendored

@ -0,0 +1,2 @@
.env
.terraform/

@ -0,0 +1,36 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/databricks/databricks" {
version = "1.111.0"
constraints = "~> 1.60"
hashes = [
"h1:jAVcFkHrzsQtU8grk5ebWQ0hm+XIiuSi/tI52m6WTxM=",
"zh:1efec838824ce84d276ab3a60a40b06d25d6cb72ce81f6c0981bc9838511da42",
"zh:71fcedf3e129bec81b23cf769d6e176636644c7ff5ab8661601a352507698ffb",
"zh:8518ed14453b0920772067dd8ac26c82e4a3c996ff2200b2b6042e45160d5fef",
"zh:c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9",
"zh:c9bdb775627d280729a758fa7aa09b3fa852d9d5e41612fbdb2a91c0912dc481",
"zh:ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9",
]
}
provider "registry.terraform.io/hashicorp/azurerm" {
version = "4.64.0"
constraints = "~> 4.0"
hashes = [
"h1:ye1NRDdXYVI8cPL1tNRTol0Payus55wnT3byIu5viuQ=",
"zh:14c016b3814b0820a15a3c265f59da026881e8d49601a6699bea44e04d44c0db",
"zh:16106d132cf01ae6c32d3ced77b745d3ffc437bb1318b587a99cff7eb4b214c1",
"zh:2cc0ba16f21d481e92cfabe24003455a9be4c70d4650c65dc6f659d82f4cb8c7",
"zh:4a2fda3f1e3d82f3f7dfa5746150b767459f50a90585029dde3e33aea913a87a",
"zh:4e9f930e0f47b418216cfdaa01bf26116e355858194157114e1fbea5062c4a5c",
"zh:734814677c4ec38175b0aedd13dcf03d7dad9667747a7bd357b1da4ffe35faa9",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:7c045cf66604cd196ec2d5e2310dd29e40d5ce8e0144049d26c3f043d4286a1c",
"zh:9fb94995c6fe9b0c16c8d44cb2c84f07985e599b69bfe2f9acc5ed728f4d9d10",
"zh:b328f10eacf5a1a9eae9178e66bf169777f902471bfb7496e4fa6cdee7c9e811",
"zh:be08fcc215d0b3048977a52e701bee4c0f1c9e6ef4d0db1e8fe7bcacefa24352",
"zh:e2d7987e5d47ef175d517fec9abfdc1a6088fe2334c0c6de63d556d7d21bc9e3",
]
}

@ -5,9 +5,9 @@
terraform {
backend "azurerm" {
resource_group_name = "rg-terraform-state"
storage_account_name = "stgreenfielttfstate"
container_name = "mdp-prod"
resource_group_name = "rg-mdp-tfstate"
storage_account_name = "staccmdptfstate"
container_name = "tfstate"
key = "terraform.tfstate"
use_azuread_auth = true
}

@ -3,7 +3,7 @@
# IMPORTANT: Do NOT commit secrets. Use environment variables or Key Vault.
###############################################################################
subscription_id = "REPLACE-WITH-PROD-SUBSCRIPTION-ID"
subscription_id = "52274961-4dda-48cc-a79b-3a0b74f8eaac"
databricks_account_id = "REPLACE-WITH-DATABRICKS-ACCOUNT-ID"
location = "canadacentral"
environment = "prod"
@ -29,7 +29,7 @@ adls_replication_type = "GRS"
tags = {
project = "mdp"
environment = "prod"
cost-center = "REPLACE"
cost-center = "Greenfield-CDO"
owner = "data-office"
managed-by = "terraform"
}

@ -105,7 +105,7 @@ resource "azurerm_subnet" "private_endpoints" {
resource_group_name = azurerm_resource_group.network.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [var.private_endpoints_subnet_cidr]
private_endpoint_network_policies_enabled = true
private_endpoint_network_policies = "Enabled"
}
# =============================================================================
@ -125,7 +125,7 @@ resource "azurerm_subnet" "transit" {
resource_group_name = azurerm_resource_group.network.name
virtual_network_name = azurerm_virtual_network.transit.name
address_prefixes = [var.transit_subnet_cidr]
private_endpoint_network_policies_enabled = true
private_endpoint_network_policies = "Enabled"
}
# =============================================================================

@ -115,7 +115,7 @@ az account set --subscription "<shared-infra-subscription>"
az group create --name rg-mdp-tfstate --location canadacentral
az storage account create \
--name stmdptfstate \
--name staccmdptfstate \
--resource-group rg-mdp-tfstate \
--location canadacentral \
--sku Standard_GRS \
@ -125,10 +125,10 @@ az storage account create \
az storage container create \
--name tfstate \
--account-name stmdptfstate
--account-name staccmdptfstate
az storage account blob-service-properties update \
--account-name stmdptfstate \
--account-name staccmdptfstate \
--enable-versioning true
```
@ -169,7 +169,7 @@ terraform plan
```bash
az policy assignment list \
--scope "/subscriptions/<sub-id>" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \
--query "[].{Name:displayName, Scope:scope, Policy:policyDefinitionId}" \
-o table
```
@ -183,7 +183,7 @@ az policy assignment create \
--name "mdp-allowed-locations" \
--display-name "MDP - Restrict to Canada regions" \
--policy "e56962a6-4747-49cd-b67b-bf8b01975c4c" \
--scope "/subscriptions/<sub-id>" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \
--params '{
"listOfAllowedLocations": {
"value": ["canadacentral", "canadaeast", "global"]
@ -195,13 +195,13 @@ az policy assignment create \
### 2. Mandatory Tagging (Initiative Approach)
Built-in policy "Require a tag on resources": `871b6d14-10aa-478d-b466-ce391a2e1549`
Built-in policy "Require a tag on resources": `871b6d14-10aa-478d-b590-94f262ecfa99`
**Quick approach — loop per tag:**
```bash
SCOPE="/subscriptions/<sub-id>"
POLICY_ID="871b6d14-10aa-478d-b466-ce391a2e1549"
SCOPE="/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac"
POLICY_ID="871b6d14-10aa-478d-b590-94f262ecfa99"
for TAG in project environment cost-center owner data-classification; do
az policy assignment create \
@ -227,23 +227,23 @@ Create `mandatory-tags-initiative.json`:
"parameters": {},
"policyDefinitions": [
{
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549",
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99",
"parameters": { "tagName": { "value": "project" } }
},
{
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549",
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99",
"parameters": { "tagName": { "value": "environment" } }
},
{
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549",
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99",
"parameters": { "tagName": { "value": "cost-center" } }
},
{
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549",
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99",
"parameters": { "tagName": { "value": "owner" } }
},
{
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b466-ce391a2e1549",
"policyDefinitionId": "/providers/Microsoft.Authorization/policyDefinitions/871b6d14-10aa-478d-b590-94f262ecfa99",
"parameters": { "tagName": { "value": "data-classification" } }
}
]
@ -256,13 +256,13 @@ az policy set-definition create \
--name "mdp-mandatory-tags" \
--display-name "MDP - Mandatory Tags" \
--definitions @mandatory-tags-initiative.json \
--subscription "<sub-id>"
--subscription "52274961-4dda-48cc-a79b-3a0b74f8eaac"
az policy assignment create \
--name "mdp-mandatory-tags" \
--display-name "MDP - Mandatory Tags" \
--policy-set-definition "mdp-mandatory-tags" \
--scope "/subscriptions/<sub-id>"
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac"
```
### 3. Allowed VM SKUs
@ -274,7 +274,7 @@ az policy assignment create \
--name "mdp-allowed-vm-skus" \
--display-name "MDP - Allowed VM SKUs" \
--policy "cccc23c7-8427-4f53-ad12-b6a63eb452b3" \
--scope "/subscriptions/<sub-id>" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \
--params '{
"listOfAllowedSKUs": {
"value": [

@ -0,0 +1,303 @@
# MDP Databricks-Primary Architecture — Second Steps
## Context
Continuation of the Azure setup for Greenfield's Modern Data Platform (MDP), picking up from `first_steps.md`. This session covers Step 4 through Step 7 of the initial provisioning guide: RBAC setup, Key Vault creation, Entra ID group creation, subscription hierarchy, and Terraform initialization.
---
## Step 4 — Set Up RBAC on the Subscription
The two `az role assignment create` commands use a placeholder `<terraform-sp-app-id>`. Retrieve the actual App ID first:
```bash
SP_APP_ID=$(az ad sp list --display-name "sp-mdp-terraform-prod" --query "[0].appId" -o tsv)
echo $SP_APP_ID # verify before assigning
```
Then run the assignments:
```bash
SUB_ID=$(az account show --query id -o tsv)
# Contributor — allows creating/managing all resources
az role assignment create \
--assignee $SP_APP_ID \
--role "Contributor" \
--scope "/subscriptions/$SUB_ID"
# User Access Administrator — needed for Terraform to assign roles (e.g., on storage, Key Vault, Databricks)
az role assignment create \
--assignee $SP_APP_ID \
--role "User Access Administrator" \
--scope "/subscriptions/$SUB_ID"
```
For group assignments, use Object IDs:
```bash
ADMIN_GRP_ID=$(az ad group show --group "grp-mdp-account-admins" --query id -o tsv)
ENG_GRP_ID=$(az ad group show --group "grp-mdp-platform-engineers" --query id -o tsv)
az role assignment create --assignee $ADMIN_GRP_ID --role "Owner" --scope "/subscriptions/$SUB_ID"
az role assignment create --assignee $ENG_GRP_ID --role "Contributor" --scope "/subscriptions/$SUB_ID"
```
Verify all assignments:
```bash
az role assignment list --scope "/subscriptions/$SUB_ID" \
--query "[].{Principal:principalName, Role:roleDefinitionName}" -o table
```
> **Note:** `User Access Administrator` on the Terraform SP is necessary because downstream Terraform modules (Unity Catalog storage, Key Vault, private endpoints) will create their own role assignments. Without it, those `apply` runs will fail with `AuthorizationFailed`.
---
## Service Principal — `sp-mdp-terraform-prod`
This is the Azure AD service principal (application identity) that Terraform uses to authenticate against Azure and deploy resources. It is a non-human "robot account" with scoped, auditable permissions — required for CI/CD pipelines and to avoid using personal user accounts.
### Create the SP
```bash
az ad sp create-for-rbac \
--name "sp-mdp-terraform-prod" \
--role "Contributor" \
--scopes "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac" \
--years 1
```
Output:
```json
{
"appId": "...", # client_id → save this
"displayName": "sp-mdp-terraform-prod",
"password": "...", # client_secret → shown only once
"tenant": "..." # tenant_id → save this
}
```
> **Important:** The `password` is shown only once. Store it in Key Vault immediately.
```bash
az keyvault secret set \
--vault-name "kv-mdp-prod-001" \
--name "sp-mdp-terraform-prod-secret" \
--value "<password-from-above>"
```
Then assign `User Access Administrator` separately:
```bash
az role assignment create \
--assignee <appId-from-above> \
--role "User Access Administrator" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac"
```
> `create-for-rbac` already assigns `Contributor` at creation time — only the second role needs a separate command.
---
## Key Vault Creation
Based on the existing resource group `rg-mdp-prod-keyvault` from D1-T2:
```bash
az keyvault create \
--name "kv-mdp-prod-001" \
--resource-group "rg-mdp-prod-keyvault" \
--location "canadacentral" \
--sku "premium" \
--enable-purge-protection true \
--retention-days 90 \
--enable-rbac-authorization true
```
**SKU and option rationale:**
- `premium` — HSM-backed keys (OSFI expectation for secrets at rest)
- `enable-purge-protection` — prevents hard-delete; mandatory in regulated environments
- `retention-days 90` — soft-delete window
- `enable-rbac-authorization` — use Azure RBAC instead of legacy access policies
Grant your own account access to manage secrets:
```bash
MY_ID=$(az ad signed-in-user show --query id -o tsv)
KV_ID=$(az keyvault show --name "kv-mdp-prod-001" --query id -o tsv)
az role assignment create \
--assignee $MY_ID \
--role "Key Vault Administrator" \
--scope $KV_ID
```
> Key Vault names must be globally unique across Azure — adjust `kv-mdp-prod-001` if it conflicts.
---
## Entra ID Group Creation
### grp-mdp-account-admins
```bash
az ad group create \
--display-name "grp-mdp-account-admins" \
--mail-nickname "grp-mdp-account-admins" \
--description "MDP production account administrators - Owner access on greenfield-mdp-prod subscription"
# Get group Object ID
GRP_ID=$(az ad group show --group "grp-mdp-account-admins" --query id -o tsv)
# Add a member
az ad group member add \
--group "grp-mdp-account-admins" \
--member-id $(az ad user show --id "user@greenfield.ca" --query id -o tsv)
# Assign Owner on subscription
az role assignment create \
--assignee $GRP_ID \
--role "Owner" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac"
```
### grp-mdp-platform-engineers
```bash
az ad group create \
--display-name "grp-mdp-platform-engineers" \
--mail-nickname "grp-mdp-platform-engineers" \
--description "MDP production platform engineers - Contributor access on greenfield-mdp-prod subscription"
GRP_ID=$(az ad group show --group "grp-mdp-platform-engineers" --query id -o tsv)
az role assignment create \
--assignee $GRP_ID \
--role "Contributor" \
--scope "/subscriptions/52274961-4dda-48cc-a79b-3a0b74f8eaac"
# Add a member
az ad group member add \
--group "grp-mdp-platform-engineers" \
--member-id $(az ad user show --id "engineer@greenfield.ca" --query id -o tsv)
```
> **Note:** Group creation in Entra ID requires the **User Administrator** or **Groups Administrator** Entra role. If you get `Authorization_RequestDenied`, this needs to go through your IAM team.
---
## Subscription Hierarchy — Where to Place `shared-infra`
Recommended structure aligned with Azure Landing Zone / CAF conventions:
```
Tenant Root Group
└── mg-greenfield (root MG)
├── mg-greenfield-platform ← shared-infra goes here
│ ├── mg-greenfield-connectivity (hub network, ExpressRoute, DNS)
│ └── mg-greenfield-management (logging, monitoring, tfstate, shared tools)
├── mg-greenfield-prod
│ └── greenfield-mdp-prod (workload subscription)
├── mg-greenfield-nonprod
└── mg-greenfield-sandbox
```
The `shared-infra` subscription belongs under **`mg-greenfield-management`**. It hosts cross-cutting operational resources not tied to any workload:
- Terraform state storage (`staccmdptfstate`)
- Shared Key Vault for pipeline secrets
- Log Analytics workspace
- Azure Monitor / alerting
The tfstate account must exist before any workload subscription is deployed — placing it under the management MG keeps it lifecycle-independent and avoids circular dependency.
> At Greenfield, a management/shared-services subscription may already exist in the EA enrollment. Confirm with Cloud Infra before creating a new one — you may only need a new resource group.
---
## Step 6 — Update `terraform.tfvars`
### Values updated
| Variable | Value |
|---|---|
| `subscription_id` | `52274961-4dda-48cc-a79b-3a0b74f8eaac` |
| `databricks_account_id` | Retrieve from `accounts.azuredatabricks.net` → top-right menu |
| `cost-center` tag | `Greenfield-CDO` |
Get the Databricks account ID from the account console (no az CLI command available for this):
```
https://accounts.azuredatabricks.net → top-right menu → your account name
```
### `backend.tf` alignment
The `backend.tf` values must match what was created in Step 5:
| Parameter | Value |
|---|---|
| `resource_group_name` | `rg-mdp-tfstate` |
| `storage_account_name` | `staccmdptfstate` |
| `container_name` | `tfstate` |
---
## Step 7 — Terraform Init Troubleshooting
### Error: 403 AuthorizationPermissionMismatch on `terraform init`
```
Error: Failed to get existing workspaces: listing blobs: executing request: unexpected status 403
(403 This request is not authorized to perform this operation using this permission.)
AuthorizationPermissionMismatch
```
**Root cause:** `backend.tf` uses `use_azuread_auth = true`. Terraform authenticates to the storage account via Entra ID rather than a storage access key. The `Contributor` role alone only covers the management-plane (ARM) — it does not grant blob read/write access on the data-plane. A `Storage Blob Data*` role is required explicitly.
**Fix:**
```bash
MY_ID=$(az ad signed-in-user show --query id -o tsv)
SA_ID=$(az storage account show \
--name "staccmdptfstate" \
--resource-group "rg-mdp-tfstate" \
--query id -o tsv)
az role assignment create \
--assignee $MY_ID \
--role "Storage Blob Data Contributor" \
--scope $SA_ID
```
Wait ~2 minutes for role assignment propagation, then re-run `terraform init`.
Also grant the same role to the Terraform SP for CI/CD:
```bash
SP_APP_ID=$(az ad sp list --display-name "sp-mdp-terraform-prod" --query "[0].appId" -o tsv)
az role assignment create \
--assignee $SP_APP_ID \
--role "Storage Blob Data Contributor" \
--scope $SA_ID
```
---
## Summary — Actions Completed This Session
| Action | Status |
|---|---|
| RBAC assignments for Terraform SP | Done |
| Created `sp-mdp-terraform-prod` service principal | Done |
| Created `kv-mdp-prod-001` Key Vault | Done |
| Stored SP secret in Key Vault | Done |
| Created `grp-mdp-account-admins` | Done |
| Created `grp-mdp-platform-engineers` | Done |
| Updated `terraform.tfvars` with subscription ID and cost-center | Done |
| Aligned `backend.tf` with actual storage account names | Done |
| Resolved `terraform init` 403 error (Storage Blob Data Contributor) | Done |
| `databricks_account_id` in `terraform.tfvars` | Pending — retrieve from account console |
Loading…
Cancel
Save