mdp/generate_databricks_prime_p...

from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE

OUTPUT = "/Users/oabrivard/Projects/mdp/databricks_prime.pptx"

prs = Presentation()
prs.slide_width = Inches(13.333)
prs.slide_height = Inches(7.5)

# Design system
COLORS = {
    "navy": RGBColor(13, 35, 64),
    "blue": RGBColor(0, 83, 155),
    "teal": RGBColor(0, 122, 128),
    "light_bg": RGBColor(244, 247, 251),
    "text": RGBColor(38, 50, 56),
    "muted": RGBColor(96, 112, 128),
    "white": RGBColor(255, 255, 255),
    "gold": RGBColor(179, 140, 0),
}

MARGIN_X = Inches(0.6)
CONTENT_TOP = Inches(1.25)
CONTENT_W = Inches(12.1)
CONTENT_H = Inches(5.7)


def set_run_font(run, size=20, bold=False, color=None, name="Segoe UI"):
    run.font.size = Pt(size)
    run.font.bold = bold
    run.font.name = name
    run.font.color.rgb = color or COLORS["text"]


def add_brand(slide, title, subtitle=None):
    bg = slide.background
    bg.fill.solid()
    bg.fill.fore_color.rgb = COLORS["light_bg"]

    header = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, 0, prs.slide_width, Inches(0.9))
    header.fill.solid()
    header.fill.fore_color.rgb = COLORS["navy"]
    header.line.fill.background()

    title_box = slide.shapes.add_textbox(MARGIN_X, Inches(0.14), Inches(9.8), Inches(0.5))
    tf = title_box.text_frame
    tf.clear()
    p = tf.paragraphs[0]
    r = p.add_run()
    r.text = title
    set_run_font(r, size=24, bold=True, color=COLORS["white"])

    if subtitle:
        sub_box = slide.shapes.add_textbox(MARGIN_X, Inches(0.72), Inches(11.5), Inches(0.4))
        stf = sub_box.text_frame
        stf.clear()
        p = stf.paragraphs[0]
        r = p.add_run()
        r.text = subtitle
        set_run_font(r, size=12, color=COLORS["muted"])

    footer = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, Inches(7.15), prs.slide_width, Inches(0.35))
    footer.fill.solid()
    footer.fill.fore_color.rgb = COLORS["white"]
    footer.line.fill.background()

    left = slide.shapes.add_textbox(MARGIN_X, Inches(7.2), Inches(6), Inches(0.2))
    ltf = left.text_frame
    p = ltf.paragraphs[0]
    r = p.add_run()
    r.text = "Greenfield | Data & AI Solutions | Internal - Confidential"
    set_run_font(r, size=9, color=COLORS["muted"])


def add_section_divider(title, subtitle, accent="blue"):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    bg = slide.background
    bg.fill.solid()
    bg.fill.fore_color.rgb = COLORS["navy"]

    band = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, Inches(0), Inches(5.7), prs.slide_width, Inches(1.8))
    band.fill.solid()
    band.fill.fore_color.rgb = COLORS[accent]
    band.line.fill.background()

    box = slide.shapes.add_textbox(Inches(0.9), Inches(2.2), Inches(11.5), Inches(2.3))
    tf = box.text_frame
    tf.clear()
    p1 = tf.paragraphs[0]
    r1 = p1.add_run()
    r1.text = title
    set_run_font(r1, size=42, bold=True, color=COLORS["white"])

    p2 = tf.add_paragraph()
    r2 = p2.add_run()
    r2.text = subtitle
    set_run_font(r2, size=18, color=COLORS["white"])

    return slide


def add_bullet_slide(title, bullets, subtitle=None, icon_letter=None):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    add_brand(slide, title, subtitle)

    if icon_letter:
        icon = slide.shapes.add_shape(MSO_SHAPE.OVAL, Inches(11.65), Inches(0.15), Inches(0.5), Inches(0.5))
        icon.fill.solid()
        icon.fill.fore_color.rgb = COLORS["teal"]
        icon.line.fill.background()
        t = icon.text_frame
        t.clear()
        p = t.paragraphs[0]
        p.alignment = PP_ALIGN.CENTER
        r = p.add_run()
        r.text = icon_letter
        set_run_font(r, size=14, bold=True, color=COLORS["white"])

    box = slide.shapes.add_textbox(MARGIN_X, CONTENT_TOP, CONTENT_W, CONTENT_H)
    tf = box.text_frame
    tf.clear()
    tf.word_wrap = True

    for i, item in enumerate(bullets):
        p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
        p.level = 0
        p.space_after = Pt(8)
        r = p.add_run()
        r.text = item
        set_run_font(r, size=20, color=COLORS["text"])

    return slide


def add_two_col_slide(title, left_title, left_bullets, right_title, right_bullets, subtitle=None):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    add_brand(slide, title, subtitle)

    lx, rx = MARGIN_X, Inches(6.8)
    w = Inches(5.8)

    for x, h in [(lx, left_title), (rx, right_title)]:
        card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, x, CONTENT_TOP, w, CONTENT_H)
        card.fill.solid()
        card.fill.fore_color.rgb = COLORS["white"]
        card.line.color.rgb = RGBColor(220, 228, 236)

        hb = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, CONTENT_TOP, w, Inches(0.55))
        hb.fill.solid()
        hb.fill.fore_color.rgb = COLORS["blue"]
        hb.line.fill.background()

        tb = slide.shapes.add_textbox(x + Inches(0.2), CONTENT_TOP + Inches(0.12), w - Inches(0.4), Inches(0.3))
        tf = tb.text_frame
        p = tf.paragraphs[0]
        r = p.add_run()
        r.text = h
        set_run_font(r, size=15, bold=True, color=COLORS["white"])

    def write_bullets(x, items):
        b = slide.shapes.add_textbox(x + Inches(0.25), CONTENT_TOP + Inches(0.7), w - Inches(0.5), CONTENT_H - Inches(0.8))
        tf = b.text_frame
        tf.clear()
        tf.word_wrap = True
        for i, item in enumerate(items):
            p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
            p.level = 0
            p.space_after = Pt(6)
            r = p.add_run()
            r.text = item
            set_run_font(r, size=15)

    write_bullets(lx, left_bullets)
    write_bullets(rx, right_bullets)
    return slide


def add_image_slide(title, image_path, caption=None, subtitle=None):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    add_brand(slide, title, subtitle)
    slide.shapes.add_picture(image_path, MARGIN_X, Inches(1.25), width=Inches(12.1), height=Inches(5.6))
    if caption:
        cap = slide.shapes.add_textbox(MARGIN_X, Inches(6.95), Inches(12), Inches(0.2))
        tf = cap.text_frame
        p = tf.paragraphs[0]
        r = p.add_run()
        r.text = caption
        set_run_font(r, size=10, color=COLORS["muted"])


def add_flow_slide(title, steps, subtitle=None):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    add_brand(slide, title, subtitle)

    x = MARGIN_X
    y = Inches(2.5)
    total_w = Inches(12.1)
    gap = Inches(0.15)
    step_w = (total_w - gap * (len(steps) - 1)) / len(steps)

    for i, s in enumerate(steps):
        bx = x + i * (step_w + gap)
        shp = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, bx, y, step_w, Inches(1.6))
        shp.fill.solid()
        shp.fill.fore_color.rgb = COLORS["white"]
        shp.line.color.rgb = COLORS["blue"]
        tf = shp.text_frame
        tf.clear()
        p = tf.paragraphs[0]
        p.alignment = PP_ALIGN.CENTER
        r = p.add_run()
        r.text = s
        set_run_font(r, size=14, bold=True)

        if i < len(steps) - 1:
            arr = slide.shapes.add_shape(MSO_SHAPE.CHEVRON, bx + step_w - Inches(0.02), y + Inches(0.58), Inches(0.2), Inches(0.44))
            arr.fill.solid()
            arr.fill.fore_color.rgb = COLORS["teal"]
            arr.line.fill.background()


def add_table_slide(title, columns, rows, subtitle=None):
    slide = prs.slides.add_slide(prs.slide_layouts[6])
    add_brand(slide, title, subtitle)
    tbl_shape = slide.shapes.add_table(len(rows) + 1, len(columns), MARGIN_X, CONTENT_TOP, CONTENT_W, CONTENT_H)
    table = tbl_shape.table

    for j, col in enumerate(columns):
        cell = table.cell(0, j)
        cell.text = col
        cell.fill.solid()
        cell.fill.fore_color.rgb = COLORS["blue"]
        for p in cell.text_frame.paragraphs:
            for r in p.runs:
                set_run_font(r, size=12, bold=True, color=COLORS["white"])

    for i, row in enumerate(rows, start=1):
        for j, val in enumerate(row):
            cell = table.cell(i, j)
            cell.text = val
            if i % 2 == 0:
                cell.fill.solid()
                cell.fill.fore_color.rgb = RGBColor(236, 242, 248)
            for p in cell.text_frame.paragraphs:
                for r in p.runs:
                    set_run_font(r, size=11, color=COLORS["text"])

# Slides 1-6: Opening
slide = prs.slides.add_slide(prs.slide_layouts[6])
bg = slide.background
bg.fill.solid(); bg.fill.fore_color.rgb = COLORS["navy"]
hero = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, Inches(0), Inches(4.8), prs.slide_width, Inches(2.7))
hero.fill.solid(); hero.fill.fore_color.rgb = COLORS["blue"]; hero.line.fill.background()
box = slide.shapes.add_textbox(Inches(0.9), Inches(1.0), Inches(11.5), Inches(3.0))
tf = box.text_frame; tf.clear()
p = tf.paragraphs[0]; r = p.add_run(); r.text = "Greenfield Modern Data Platform"
set_run_font(r, size=46, bold=True, color=COLORS["white"])
p2 = tf.add_paragraph(); r2 = p2.add_run(); r2.text = "Databricks-Primary Architecture (v8.0)"
set_run_font(r2, size=28, color=COLORS["white"])
p3 = tf.add_paragraph(); r3 = p3.add_run(); r3.text = "Executive Architecture Deck | March 2026"
set_run_font(r3, size=16, color=COLORS["white"])
f = slide.shapes.add_textbox(Inches(0.9), Inches(6.95), Inches(11), Inches(0.3))
fr = f.text_frame.paragraphs[0].add_run(); fr.text = "Classification: Internal - Confidential | Owner: VP, Chief Data Officer"
set_run_font(fr, size=10, color=COLORS["white"])

add_bullet_slide("Presentation Agenda", [
    "1. Strategic context and architecture decisions",
    "2. Logical architecture and medallion implementation",
    "3. Governance, quality, and security/privacy controls",
    "4. Azure infrastructure deployment model",
    "5. Operating model, roadmap, and executive decisions",
], subtitle="50-slide executive briefing", icon_letter="A")

add_bullet_slide("Executive Summary", [
    "Databricks is the primary platform for engineering, warehousing, and AI/ML workloads.",
    "Microsoft Fabric is scoped to BI serving (Power BI Direct Lake) with strict guardrails.",
    "SAS Viya Compute Server remains for regulated actuarial and risk analytics.",
    "Purview + Unity Catalog + Manta delivers full governance and cross-platform lineage.",
    "Architecture is designed for AMF, OSFI, and Law 25 compliance in Canadian regions.",
], icon_letter="E")

add_two_col_slide(
    "Strategic Drivers and Outcomes",
    "Primary Drivers",
    [
        "Regulatory compliance and auditability",
        "Enterprise-scale data democratization",
        "AI-readiness across business units",
        "Cost discipline through clear platform boundaries",
    ],
    "Target Outcomes",
    [
        "Single governed source of truth",
        "Faster delivery of reusable data products",
        "Risk-managed self-service analytics",
        "Scalable architecture for 55,000 employees",
    ],
)

add_bullet_slide("Vision and Guiding Principles", [
    "Data as a Product with clear ownership and quality accountability.",
    "Unified governance with federated execution where capabilities are mature.",
    "Security by design with RLS, CLS, DDM, and zero-trust controls.",
    "Right tool for the right workload to avoid capability overlap.",
    "AI-ready-by-default architecture across engineering and serving layers.",
], icon_letter="V")

add_table_slide(
    "Key Architecture Decisions (AD-01 to AD-09)",
    ["Decision", "Direction", "Executive Intent"],
    [
        ["AD-01", "Delta Lake as canonical format", "Portability, consistency, and open standards"],
        ["AD-02", "Databricks as primary platform", "Consolidate ETL, SQL, and ML execution"],
        ["AD-03", "Fabric as BI serving layer", "Optimize Power BI cost/performance at scale"],
        ["AD-04", "SAS Viya Compute Server", "Support regulated actuarial/risk workloads"],
        ["AD-05", "Purview + UC + Manta", "End-to-end governance and lineage"],
        ["AD-06", "Canadian-only regions", "Satisfy residency and resilience constraints"],
        ["AD-07", "ADLS Gen2 shared substrate", "Single physical data foundation"],
    ],
)

# 7-18 Logical architecture
add_section_divider("Logical Architecture", "Layered design and platform responsibilities", accent="teal")
add_table_slide(
    "Seven Logical Layers",
    ["Layer", "Purpose", "Primary Technologies"],
    [
        ["Ingestion", "Acquire and track changes", "ADF, Auto Loader, Event Hub"],
        ["Bronze", "Immutable raw storage", "ADLS Gen2 Delta, Unity Catalog"],
        ["Silver", "Conformance and quality", "DLT, dbt, Great Expectations"],
        ["Gold", "Business-ready products", "Databricks SQL, Fabric Direct Lake"],
        ["Semantic", "KPI business language", "Power BI Semantic Models, dbt Metrics"],
        ["Serving", "End-user consumption", "Power BI, SQL, APIs, SAS VA"],
        ["AI/ML", "Model lifecycle", "MLflow, Feature Store, Model Serving"],
    ],
)

add_two_col_slide(
    "Cross-Cutting Controls",
    "Governance & Metadata",
    [
        "Enterprise catalog and business glossary in Purview",
        "Technical governance and grants in Unity Catalog",
        "Unified lineage graph through Manta integration",
    ],
    "Security & Operations",
    [
        "Identity, RBAC/ABAC, masking, and encryption",
        "Observability for pipelines, cost, and SLAs",
        "Policy enforcement via automation and review gates",
    ],
)

add_table_slide(
    "Platform Responsibility Matrix",
    ["Capability", "Databricks", "Fabric", "SAS Viya"],
    [
        ["Data Engineering", "Primary", "Restricted", "Targeted"],
        ["Data Warehousing", "Primary", "BI serving only", "-"],
        ["BI & Reporting", "Secondary", "Primary", "Targeted"],
        ["ML / AI", "Primary", "-", "Specialized"],
        ["Lineage", "Contributing", "Contributing", "Manta source"],
        ["Semantic Intelligence", "Contributing", "Evaluate (H2/H3)", "-"],
    ],
)

add_flow_slide("Data Flow Between Platforms", [
    "Sources",
    "ADF / Auto Loader",
    "ADLS + Delta",
    "Databricks\nSilver/Gold",
    "Fabric Direct Lake\n+ SAS JDBC",
], subtitle="Single source of truth with governed consumption paths")

add_image_slide(
    "Medallion Architecture: End-to-End View",
    "/Users/oabrivard/Projects/mdp/.tmp_assets/medallion1.png",
    caption="Source systems to serving channels with governance, security, and observability overlays.",
)

add_bullet_slide("Ingestion Patterns", [
    "Batch file ingestion with Auto Loader schema inference/evolution.",
    "Database CDC and incremental extraction with Azure Data Factory.",
    "Streaming ingestion through Event Hub and Structured Streaming.",
    "API ingestion orchestrated with Databricks Workflows or ADF.",
    "SAS-native ingestion via JDBC LIBNAME or authorized ADLS paths.",
], icon_letter="I")

add_flow_slide("Pre-Bronze Data Quality Gate", [
    "Land to Staging",
    "Purview DQ Sampling",
    "Pass/Fail Decision",
    "Bronze or Quarantine",
    "Steward Alerting",
], subtitle="Tier-1 DQ before raw persistence")

add_bullet_slide("Bronze Layer Implementation", [
    "Immutable append-only Delta storage by source and entity.",
    "Ingestion metadata columns for provenance and replay.",
    "Schema evolution enabled with controlled drift handling.",
    "90-day Delta time travel baseline with domain-specific retention.",
    "Quarantine sub-tables for structural failures.",
], icon_letter="B")

add_bullet_slide("Silver Layer Implementation", [
    "Standardization, cleansing, deduplication, and key conformance.",
    "DLT expectations enforce quality rules during transformation.",
    "Reference/master data enrichment and survivorship logic.",
    "PII normalization and policy tagging for downstream controls.",
    "Lineage captured to support full audit traceability.",
], icon_letter="S")

add_bullet_slide("Gold Layer and Data Products", [
    "Business-ready dimensional and analytical models.",
    "Certified KPIs with semantic alignment and stewardship signoff.",
    "Feature-ready curated datasets for AI/ML consumption.",
    "Power BI Direct Lake consumption through OneLake shortcuts.",
    "SAS consumption via JDBC path to preserve Unity Catalog controls.",
], icon_letter="G")

add_table_slide(
    "Serving Channels and Interfaces",
    ["Consumer", "Interface", "Governance Pattern"],
    [
        ["Power BI", "Fabric Direct Lake", "Purview + Semantic model governance"],
        ["Analysts", "Databricks SQL", "Unity Catalog grants, row/column controls"],
        ["Applications", "APIs / JDBC / ODBC", "Service principals and contracts"],
        ["Data Science", "Notebooks + Feature Store", "Model governance and lineage"],
        ["Actuarial Teams", "SAS Viya Compute Server", "JDBC first; restricted ADLS exception"],
    ],
)

# 19-26 Governance
add_section_divider("Governance & Data Quality", "Three-tier control model across platforms", accent="blue")
add_bullet_slide("Governance Operating Model", [
    "Centralized governance standards with federated delivery execution.",
    "Data ownership defined at domain and product levels.",
    "Stewardship workflows tied to quality and policy exceptions.",
    "Architecture review gates prevent workload drift and duplication.",
], icon_letter="G")

add_flow_slide("Three-Tier Catalog Strategy", [
    "Purview\nEnterprise Plane",
    "Unity Catalog\nTechnical Enforcement",
    "Manta\nCross-Platform Lineage",
], subtitle="Complementary strengths instead of overlapping catalogs")

add_bullet_slide("Tier 1: Microsoft Purview", [
    "Enterprise inventory, glossary, classification, and policy management.",
    "Data quality dashboards and stewardship collaboration workflows.",
    "Visibility layer for business and compliance stakeholders.",
    "Lineage consumption endpoint for integrated Manta metadata.",
], icon_letter="P")

add_bullet_slide("Tier 2: Databricks Unity Catalog", [
    "Central metastore and object-level permissions.",
    "Row filters and column masks enforced at query time.",
    "Audit events and lineage for data engineering and SQL workloads.",
    "Primary technical governance plane for Delta assets.",
], icon_letter="U")

add_bullet_slide("Tier 3: Manta Lineage Engine", [
    "Code-level lineage extraction across Databricks, ADF, and SAS.",
    "Bridges lineage gaps no single platform can cover end-to-end.",
    "Feeds lineage views in Purview and technical diagnostics in UC.",
    "Critical for regulated change impact assessment.",
], icon_letter="M")

add_table_slide(
    "Three-Tier Data Quality Framework",
    ["Tier", "Execution Point", "Gate Outcome"],
    [
        ["Tier 1", "Ingestion (Purview DQ Sampling)", "Block or quarantine defective loads"],
        ["Tier 2", "Medallion transitions (DLT + GE)", "Reject non-conforming records"],
        ["Tier 3", "Gold certification SLA checks", "Publish only certified products"],
        ["Unified", "DQ observability dashboards", "Steward accountability and trend control"],
    ],
)

add_bullet_slide("Master Data and Reference Data", [
    "Golden identifiers and reference dimensions centralized in curated zones.",
    "Conformance rules propagated across domains via reusable pipelines.",
    "Customer 360 and core reference entities treated as enterprise products.",
    "Lifecycle governance aligns with DAMA-DMBOK and DCAM practices.",
], icon_letter="D")

# 27-35 Security
add_section_divider("Security & Privacy", "Defense-in-depth controls for regulated operations", accent="gold")
add_bullet_slide("Identity and Authentication Foundation", [
    "Azure Entra ID is the mandatory identity provider across all components.",
    "MFA, compliant device, and conditional access are always enforced.",
    "SCIM/SAML/OAuth integrations standardize workforce access patterns.",
    "Managed identities preferred; static credentials prohibited.",
], icon_letter="I")

add_table_slide(
    "RBAC Model (Azure Resource Layer)",
    ["Security Group", "Primary Role", "Scope"],
    [
        ["sg-data-platform-admins", "Contributor", "Data platform subscriptions"],
        ["sg-data-engineers", "Databricks + Storage Contributor", "Engineering workspaces and ADLS"],
        ["sg-data-scientists", "Sandbox contributor + reader", "Exploration and governed read zones"],
        ["sg-governance-admins", "Purview curator + KV admin", "Governance stack"],
        ["sg-fabric-admins", "Capacity administrator", "Fabric BI capacity"],
    ],
)

add_bullet_slide("ABAC in Unity Catalog", [
    "Policy decisions are based on role, business unit, region, and clearance attributes.",
    "Catalog/schema/table hierarchies simplify consistent grant propagation.",
    "Attribute provisioning aligns with joiner-mover-leaver lifecycle.",
    "ABAC path extends to downstream BI and SAS controls via governed interfaces.",
], icon_letter="A")

add_bullet_slide("Row-Level Security Strategy", [
    "Unity Catalog row filters enforce least-privilege data visibility.",
    "Rules use user context and domain attributes at query runtime.",
    "Power BI RLS complements semantic layer access boundaries.",
    "Security logic is centrally versioned and auditable.",
], icon_letter="R")

add_bullet_slide("Column Security and Dynamic Masking", [
    "Column-level policies protect PII and regulated attributes.",
    "Masking modes include nulling, partial reveal, and deterministic hash.",
    "Purview classification labels drive mask policy assignment.",
    "Sensitive fields remain controlled across SQL, BI, and API paths.",
], icon_letter="C")

add_bullet_slide("Privacy Controls (Law 25 / PIPEDA)", [
    "Consent metadata influences access and downstream usage policies.",
    "DSAR workflows support discover, extract, and evidence generation.",
    "Right-to-erasure handled through orchestrated purge and retention rules.",
    "Pseudonymization/anonymization patterns applied by domain sensitivity.",
], icon_letter="L")

add_two_col_slide(
    "Encryption and Exfiltration Controls",
    "Encryption",
    [
        "AES-256 at rest with MMK/CMK strategy",
        "TLS 1.2+ for all in-transit communications",
        "Key Vault managed key lifecycle and rotation",
    ],
    "Exfiltration Prevention",
    [
        "Private endpoints and disabled public access",
        "Firewall and NSG deny-by-default stance",
        "DLP policies and export restrictions in BI layers",
    ],
)

add_bullet_slide("Security Audit and Monitoring", [
    "Centralized logs from Entra, Databricks, Fabric, SAS, and Azure services.",
    "Security alerting integrated with incident response operations.",
    "Compliance evidence packs generated for internal/external audits.",
    "Policy-as-code controls continuously validated through CI/CD.",
], icon_letter="M")

# 36-45 Infrastructure
add_section_divider("Azure Infrastructure", "Landing zone, network, compute, and operations", accent="teal")
add_bullet_slide("Landing Zone Architecture", [
    "CAF-aligned enterprise landing zone with policy inheritance.",
    "Hub-spoke topology isolates workloads and centralizes control points.",
    "All spokes route egress through Azure Firewall Premium inspection.",
    "Private DNS and Bastion complete the secure connectivity model.",
], icon_letter="L")

add_image_slide(
    "Azure Infrastructure Topology",
    "/Users/oabrivard/Projects/mdp/.tmp_assets/azure_topology.png",
    caption="Physical deployment across subscriptions, VNets, services, and control planes.",
)

add_table_slide(
    "Region and Disaster Recovery Strategy",
    ["Domain", "Primary", "Secondary / DR"],
    [
        ["Region", "Canada Central", "Canada East"],
        ["Role", "Production and non-production execution", "DR replication and recovery"],
        ["Compliance", "Canadian residency", "Canadian residency maintained"],
        ["Constraint", "No non-Canadian replication", "Enforced by Azure Policy"],
    ],
)

add_table_slide(
    "Subscription and Resource Organization",
    ["Subscription", "Purpose", "Key Controls"],
    [
        ["sub-data-connectivity", "Hub networking", "Firewall, ER, DNS, Bastion"],
        ["sub-data-platform-prod", "Production platform", "Databricks, ADLS, Purview, KV"],
        ["sub-data-platform-nonprod", "Dev/stage/sandbox", "Cost and quota boundaries"],
        ["sub-data-sas-prod", "SAS workloads", "Isolated compute and RBAC"],
        ["sub-data-fabric-prod", "BI capacity", "F-SKU governance"],
        ["sub-data-management", "Ops and monitoring", "DevOps, Log Analytics, Sentinel"],
    ],
)

add_bullet_slide("Networking and Private Endpoints", [
    "Databricks VNet injection uses dedicated /22 host and container subnets.",
    "Private endpoint-only access for ADLS, Key Vault, Purview, ADF, and Event Hub.",
    "NSG baseline is deny-all with explicit allow rules per flow.",
    "No direct spoke-to-spoke communication; all traffic inspectable.",
], icon_letter="N")

add_table_slide(
    "Databricks Compute Sizing and Policies",
    ["Workspace", "Primary Role", "Control Highlights"],
    [
        ["dbw-data-eng-prod", "Pipelines and transformations", "Cluster policy guardrails, Photon, autoscaling"],
        ["dbw-analytics-prod", "SQL and dashboards", "Serverless SQL, autosuspend, workload mgmt"],
        ["dbw-mlops-prod", "Modeling and serving", "GPU pools, model serving autoscale"],
        ["dbw-data-eng-dev", "Dev/test", "Reduced limits and strict auto-terminate"],
        ["dbw-sandbox", "Exploration", "Read-only sensitive zones, budget caps"],
    ],
)

add_two_col_slide(
    "SAS Viya Compute Server Model",
    "Deployment Pattern",
    [
        "Private AKS deployment recommended",
        "Compute Server engine (sequential batch)",
        "Memory-optimized pods for actuarial models",
        "Dedicated license and model manager services",
    ],
    "Data Access Rules",
    [
        "JDBC to Databricks SQL is default path",
        "ADLS direct path only for pre-authorized non-sensitive zones",
        "SAS service principal scoped with minimal RBAC",
        "All access paths are logged for audit and lineage",
    ],
)

add_bullet_slide("Fabric Capacity Guardrails", [
    "Capacity is sized exclusively for BI serving workloads.",
    "No Fabric warehouse/lakehouse duplication of Databricks managed data.",
    "No Fabric Data Factory or notebook shadow engineering layer.",
    "Capacity expansions require architecture board approval.",
], icon_letter="F")

add_table_slide(
    "Observability, FinOps, and DevOps",
    ["Domain", "Tooling", "Expected Outcome"],
    [
        ["Monitoring", "Azure Monitor + Log Analytics + Databricks metrics", "SLA visibility and incident detection"],
        ["Security Ops", "Sentinel + alert rules", "Threat detection and response"],
        ["FinOps", "Tags, budgets, cost dashboards", "Chargeback and optimization"],
        ["IaC", "Terraform modules + CI/CD", "Repeatable and auditable infrastructure"],
    ],
)

# 46-50 Roadmap and close
add_section_divider("Roadmap and Decisions", "Phased execution with governance gates", accent="blue")
add_bullet_slide("Horizon 1 (0-12 Months): Foundation", [
    "Finalize Databricks-primary architecture baseline and guardrails.",
    "Deploy production landing zone, networking, and core governance stack.",
    "Industrialize Bronze/Silver/Gold pipelines for priority domains.",
    "Implement tiered DQ controls and initial certification process.",
], icon_letter="1")

add_bullet_slide("Horizon 2 (12-24 Months): Scale and Intelligence", [
    "Expand domain data products and semantic KPI coverage.",
    "Increase self-service capabilities within governed boundaries.",
    "Pilot Fabric IQ capabilities under strict access prerequisites.",
    "Strengthen cross-platform lineage and model risk controls.",
], icon_letter="2")

add_two_col_slide(
    "Horizon 3 (24-36 Months): Optimize and Innovate",
    "Optimization Focus",
    [
        "Platform cost/performance rebalancing",
        "Automated policy assurance and controls testing",
        "Expanded AI/ML productization and monitoring",
    ],
    "Innovation Focus",
    [
        "Domain agent use cases with safe data grounding",
        "Advanced semantic intelligence on certified products",
        "Continuous architecture modernization under guardrails",
    ],
)

add_table_slide(
    "Executive Decision Requests",
    ["Decision", "Required Direction", "Impact"],
    [
        ["Architecture Baseline", "Approve Databricks-primary v8.0", "Locks platform scope and investment alignment"],
        ["Fabric Guardrails", "Approve BI-serving-only boundary", "Prevents duplication and cost drift"],
        ["Governance Model", "Approve Purview + UC + Manta operating model", "Enables auditable enterprise control"],
        ["Roadmap Funding", "Approve phased delivery plan", "Accelerates time-to-value with risk control"],
    ],
    subtitle="Final steering decisions",
)

# Validate slide count
assert len(prs.slides) == 50, f"Expected 50 slides, got {len(prs.slides)}"
prs.save(OUTPUT)
print(f"Created {OUTPUT} with {len(prs.slides)} slides")