You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
704 lines
29 KiB
Python
704 lines
29 KiB
Python
from pptx import Presentation
|
|
from pptx.util import Inches, Pt
|
|
from pptx.dml.color import RGBColor
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.enum.shapes import MSO_SHAPE
|
|
|
|
OUTPUT = "/Users/oabrivard/Projects/mdp/databricks_prime.pptx"
|
|
|
|
prs = Presentation()
|
|
prs.slide_width = Inches(13.333)
|
|
prs.slide_height = Inches(7.5)
|
|
|
|
# Design system
|
|
COLORS = {
|
|
"navy": RGBColor(13, 35, 64),
|
|
"blue": RGBColor(0, 83, 155),
|
|
"teal": RGBColor(0, 122, 128),
|
|
"light_bg": RGBColor(244, 247, 251),
|
|
"text": RGBColor(38, 50, 56),
|
|
"muted": RGBColor(96, 112, 128),
|
|
"white": RGBColor(255, 255, 255),
|
|
"gold": RGBColor(179, 140, 0),
|
|
}
|
|
|
|
MARGIN_X = Inches(0.6)
|
|
CONTENT_TOP = Inches(1.25)
|
|
CONTENT_W = Inches(12.1)
|
|
CONTENT_H = Inches(5.7)
|
|
|
|
|
|
def set_run_font(run, size=20, bold=False, color=None, name="Segoe UI"):
|
|
run.font.size = Pt(size)
|
|
run.font.bold = bold
|
|
run.font.name = name
|
|
run.font.color.rgb = color or COLORS["text"]
|
|
|
|
|
|
def add_brand(slide, title, subtitle=None):
|
|
bg = slide.background
|
|
bg.fill.solid()
|
|
bg.fill.fore_color.rgb = COLORS["light_bg"]
|
|
|
|
header = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, 0, prs.slide_width, Inches(0.9))
|
|
header.fill.solid()
|
|
header.fill.fore_color.rgb = COLORS["navy"]
|
|
header.line.fill.background()
|
|
|
|
title_box = slide.shapes.add_textbox(MARGIN_X, Inches(0.14), Inches(9.8), Inches(0.5))
|
|
tf = title_box.text_frame
|
|
tf.clear()
|
|
p = tf.paragraphs[0]
|
|
r = p.add_run()
|
|
r.text = title
|
|
set_run_font(r, size=24, bold=True, color=COLORS["white"])
|
|
|
|
if subtitle:
|
|
sub_box = slide.shapes.add_textbox(MARGIN_X, Inches(0.72), Inches(11.5), Inches(0.4))
|
|
stf = sub_box.text_frame
|
|
stf.clear()
|
|
p = stf.paragraphs[0]
|
|
r = p.add_run()
|
|
r.text = subtitle
|
|
set_run_font(r, size=12, color=COLORS["muted"])
|
|
|
|
footer = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, Inches(7.15), prs.slide_width, Inches(0.35))
|
|
footer.fill.solid()
|
|
footer.fill.fore_color.rgb = COLORS["white"]
|
|
footer.line.fill.background()
|
|
|
|
left = slide.shapes.add_textbox(MARGIN_X, Inches(7.2), Inches(6), Inches(0.2))
|
|
ltf = left.text_frame
|
|
p = ltf.paragraphs[0]
|
|
r = p.add_run()
|
|
r.text = "Greenfield | Data & AI Solutions | Internal - Confidential"
|
|
set_run_font(r, size=9, color=COLORS["muted"])
|
|
|
|
|
|
def add_section_divider(title, subtitle, accent="blue"):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
bg = slide.background
|
|
bg.fill.solid()
|
|
bg.fill.fore_color.rgb = COLORS["navy"]
|
|
|
|
band = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, Inches(0), Inches(5.7), prs.slide_width, Inches(1.8))
|
|
band.fill.solid()
|
|
band.fill.fore_color.rgb = COLORS[accent]
|
|
band.line.fill.background()
|
|
|
|
box = slide.shapes.add_textbox(Inches(0.9), Inches(2.2), Inches(11.5), Inches(2.3))
|
|
tf = box.text_frame
|
|
tf.clear()
|
|
p1 = tf.paragraphs[0]
|
|
r1 = p1.add_run()
|
|
r1.text = title
|
|
set_run_font(r1, size=42, bold=True, color=COLORS["white"])
|
|
|
|
p2 = tf.add_paragraph()
|
|
r2 = p2.add_run()
|
|
r2.text = subtitle
|
|
set_run_font(r2, size=18, color=COLORS["white"])
|
|
|
|
return slide
|
|
|
|
|
|
def add_bullet_slide(title, bullets, subtitle=None, icon_letter=None):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
add_brand(slide, title, subtitle)
|
|
|
|
if icon_letter:
|
|
icon = slide.shapes.add_shape(MSO_SHAPE.OVAL, Inches(11.65), Inches(0.15), Inches(0.5), Inches(0.5))
|
|
icon.fill.solid()
|
|
icon.fill.fore_color.rgb = COLORS["teal"]
|
|
icon.line.fill.background()
|
|
t = icon.text_frame
|
|
t.clear()
|
|
p = t.paragraphs[0]
|
|
p.alignment = PP_ALIGN.CENTER
|
|
r = p.add_run()
|
|
r.text = icon_letter
|
|
set_run_font(r, size=14, bold=True, color=COLORS["white"])
|
|
|
|
box = slide.shapes.add_textbox(MARGIN_X, CONTENT_TOP, CONTENT_W, CONTENT_H)
|
|
tf = box.text_frame
|
|
tf.clear()
|
|
tf.word_wrap = True
|
|
|
|
for i, item in enumerate(bullets):
|
|
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
|
|
p.level = 0
|
|
p.space_after = Pt(8)
|
|
r = p.add_run()
|
|
r.text = item
|
|
set_run_font(r, size=20, color=COLORS["text"])
|
|
|
|
return slide
|
|
|
|
|
|
def add_two_col_slide(title, left_title, left_bullets, right_title, right_bullets, subtitle=None):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
add_brand(slide, title, subtitle)
|
|
|
|
lx, rx = MARGIN_X, Inches(6.8)
|
|
w = Inches(5.8)
|
|
|
|
for x, h in [(lx, left_title), (rx, right_title)]:
|
|
card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, x, CONTENT_TOP, w, CONTENT_H)
|
|
card.fill.solid()
|
|
card.fill.fore_color.rgb = COLORS["white"]
|
|
card.line.color.rgb = RGBColor(220, 228, 236)
|
|
|
|
hb = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, CONTENT_TOP, w, Inches(0.55))
|
|
hb.fill.solid()
|
|
hb.fill.fore_color.rgb = COLORS["blue"]
|
|
hb.line.fill.background()
|
|
|
|
tb = slide.shapes.add_textbox(x + Inches(0.2), CONTENT_TOP + Inches(0.12), w - Inches(0.4), Inches(0.3))
|
|
tf = tb.text_frame
|
|
p = tf.paragraphs[0]
|
|
r = p.add_run()
|
|
r.text = h
|
|
set_run_font(r, size=15, bold=True, color=COLORS["white"])
|
|
|
|
def write_bullets(x, items):
|
|
b = slide.shapes.add_textbox(x + Inches(0.25), CONTENT_TOP + Inches(0.7), w - Inches(0.5), CONTENT_H - Inches(0.8))
|
|
tf = b.text_frame
|
|
tf.clear()
|
|
tf.word_wrap = True
|
|
for i, item in enumerate(items):
|
|
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
|
|
p.level = 0
|
|
p.space_after = Pt(6)
|
|
r = p.add_run()
|
|
r.text = item
|
|
set_run_font(r, size=15)
|
|
|
|
write_bullets(lx, left_bullets)
|
|
write_bullets(rx, right_bullets)
|
|
return slide
|
|
|
|
|
|
def add_image_slide(title, image_path, caption=None, subtitle=None):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
add_brand(slide, title, subtitle)
|
|
slide.shapes.add_picture(image_path, MARGIN_X, Inches(1.25), width=Inches(12.1), height=Inches(5.6))
|
|
if caption:
|
|
cap = slide.shapes.add_textbox(MARGIN_X, Inches(6.95), Inches(12), Inches(0.2))
|
|
tf = cap.text_frame
|
|
p = tf.paragraphs[0]
|
|
r = p.add_run()
|
|
r.text = caption
|
|
set_run_font(r, size=10, color=COLORS["muted"])
|
|
|
|
|
|
def add_flow_slide(title, steps, subtitle=None):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
add_brand(slide, title, subtitle)
|
|
|
|
x = MARGIN_X
|
|
y = Inches(2.5)
|
|
total_w = Inches(12.1)
|
|
gap = Inches(0.15)
|
|
step_w = (total_w - gap * (len(steps) - 1)) / len(steps)
|
|
|
|
for i, s in enumerate(steps):
|
|
bx = x + i * (step_w + gap)
|
|
shp = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, bx, y, step_w, Inches(1.6))
|
|
shp.fill.solid()
|
|
shp.fill.fore_color.rgb = COLORS["white"]
|
|
shp.line.color.rgb = COLORS["blue"]
|
|
tf = shp.text_frame
|
|
tf.clear()
|
|
p = tf.paragraphs[0]
|
|
p.alignment = PP_ALIGN.CENTER
|
|
r = p.add_run()
|
|
r.text = s
|
|
set_run_font(r, size=14, bold=True)
|
|
|
|
if i < len(steps) - 1:
|
|
arr = slide.shapes.add_shape(MSO_SHAPE.CHEVRON, bx + step_w - Inches(0.02), y + Inches(0.58), Inches(0.2), Inches(0.44))
|
|
arr.fill.solid()
|
|
arr.fill.fore_color.rgb = COLORS["teal"]
|
|
arr.line.fill.background()
|
|
|
|
|
|
def add_table_slide(title, columns, rows, subtitle=None):
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
add_brand(slide, title, subtitle)
|
|
tbl_shape = slide.shapes.add_table(len(rows) + 1, len(columns), MARGIN_X, CONTENT_TOP, CONTENT_W, CONTENT_H)
|
|
table = tbl_shape.table
|
|
|
|
for j, col in enumerate(columns):
|
|
cell = table.cell(0, j)
|
|
cell.text = col
|
|
cell.fill.solid()
|
|
cell.fill.fore_color.rgb = COLORS["blue"]
|
|
for p in cell.text_frame.paragraphs:
|
|
for r in p.runs:
|
|
set_run_font(r, size=12, bold=True, color=COLORS["white"])
|
|
|
|
for i, row in enumerate(rows, start=1):
|
|
for j, val in enumerate(row):
|
|
cell = table.cell(i, j)
|
|
cell.text = val
|
|
if i % 2 == 0:
|
|
cell.fill.solid()
|
|
cell.fill.fore_color.rgb = RGBColor(236, 242, 248)
|
|
for p in cell.text_frame.paragraphs:
|
|
for r in p.runs:
|
|
set_run_font(r, size=11, color=COLORS["text"])
|
|
|
|
# Slides 1-6: Opening
|
|
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
|
bg = slide.background
|
|
bg.fill.solid(); bg.fill.fore_color.rgb = COLORS["navy"]
|
|
hero = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, Inches(0), Inches(4.8), prs.slide_width, Inches(2.7))
|
|
hero.fill.solid(); hero.fill.fore_color.rgb = COLORS["blue"]; hero.line.fill.background()
|
|
box = slide.shapes.add_textbox(Inches(0.9), Inches(1.0), Inches(11.5), Inches(3.0))
|
|
tf = box.text_frame; tf.clear()
|
|
p = tf.paragraphs[0]; r = p.add_run(); r.text = "Greenfield Modern Data Platform"
|
|
set_run_font(r, size=46, bold=True, color=COLORS["white"])
|
|
p2 = tf.add_paragraph(); r2 = p2.add_run(); r2.text = "Databricks-Primary Architecture (v8.0)"
|
|
set_run_font(r2, size=28, color=COLORS["white"])
|
|
p3 = tf.add_paragraph(); r3 = p3.add_run(); r3.text = "Executive Architecture Deck | March 2026"
|
|
set_run_font(r3, size=16, color=COLORS["white"])
|
|
f = slide.shapes.add_textbox(Inches(0.9), Inches(6.95), Inches(11), Inches(0.3))
|
|
fr = f.text_frame.paragraphs[0].add_run(); fr.text = "Classification: Internal - Confidential | Owner: VP, Chief Data Officer"
|
|
set_run_font(fr, size=10, color=COLORS["white"])
|
|
|
|
add_bullet_slide("Presentation Agenda", [
|
|
"1. Strategic context and architecture decisions",
|
|
"2. Logical architecture and medallion implementation",
|
|
"3. Governance, quality, and security/privacy controls",
|
|
"4. Azure infrastructure deployment model",
|
|
"5. Operating model, roadmap, and executive decisions",
|
|
], subtitle="50-slide executive briefing", icon_letter="A")
|
|
|
|
add_bullet_slide("Executive Summary", [
|
|
"Databricks is the primary platform for engineering, warehousing, and AI/ML workloads.",
|
|
"Microsoft Fabric is scoped to BI serving (Power BI Direct Lake) with strict guardrails.",
|
|
"SAS Viya Compute Server remains for regulated actuarial and risk analytics.",
|
|
"Purview + Unity Catalog + Manta delivers full governance and cross-platform lineage.",
|
|
"Architecture is designed for AMF, OSFI, and Law 25 compliance in Canadian regions.",
|
|
], icon_letter="E")
|
|
|
|
add_two_col_slide(
|
|
"Strategic Drivers and Outcomes",
|
|
"Primary Drivers",
|
|
[
|
|
"Regulatory compliance and auditability",
|
|
"Enterprise-scale data democratization",
|
|
"AI-readiness across business units",
|
|
"Cost discipline through clear platform boundaries",
|
|
],
|
|
"Target Outcomes",
|
|
[
|
|
"Single governed source of truth",
|
|
"Faster delivery of reusable data products",
|
|
"Risk-managed self-service analytics",
|
|
"Scalable architecture for 55,000 employees",
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("Vision and Guiding Principles", [
|
|
"Data as a Product with clear ownership and quality accountability.",
|
|
"Unified governance with federated execution where capabilities are mature.",
|
|
"Security by design with RLS, CLS, DDM, and zero-trust controls.",
|
|
"Right tool for the right workload to avoid capability overlap.",
|
|
"AI-ready-by-default architecture across engineering and serving layers.",
|
|
], icon_letter="V")
|
|
|
|
add_table_slide(
|
|
"Key Architecture Decisions (AD-01 to AD-09)",
|
|
["Decision", "Direction", "Executive Intent"],
|
|
[
|
|
["AD-01", "Delta Lake as canonical format", "Portability, consistency, and open standards"],
|
|
["AD-02", "Databricks as primary platform", "Consolidate ETL, SQL, and ML execution"],
|
|
["AD-03", "Fabric as BI serving layer", "Optimize Power BI cost/performance at scale"],
|
|
["AD-04", "SAS Viya Compute Server", "Support regulated actuarial/risk workloads"],
|
|
["AD-05", "Purview + UC + Manta", "End-to-end governance and lineage"],
|
|
["AD-06", "Canadian-only regions", "Satisfy residency and resilience constraints"],
|
|
["AD-07", "ADLS Gen2 shared substrate", "Single physical data foundation"],
|
|
],
|
|
)
|
|
|
|
# 7-18 Logical architecture
|
|
add_section_divider("Logical Architecture", "Layered design and platform responsibilities", accent="teal")
|
|
add_table_slide(
|
|
"Seven Logical Layers",
|
|
["Layer", "Purpose", "Primary Technologies"],
|
|
[
|
|
["Ingestion", "Acquire and track changes", "ADF, Auto Loader, Event Hub"],
|
|
["Bronze", "Immutable raw storage", "ADLS Gen2 Delta, Unity Catalog"],
|
|
["Silver", "Conformance and quality", "DLT, dbt, Great Expectations"],
|
|
["Gold", "Business-ready products", "Databricks SQL, Fabric Direct Lake"],
|
|
["Semantic", "KPI business language", "Power BI Semantic Models, dbt Metrics"],
|
|
["Serving", "End-user consumption", "Power BI, SQL, APIs, SAS VA"],
|
|
["AI/ML", "Model lifecycle", "MLflow, Feature Store, Model Serving"],
|
|
],
|
|
)
|
|
|
|
add_two_col_slide(
|
|
"Cross-Cutting Controls",
|
|
"Governance & Metadata",
|
|
[
|
|
"Enterprise catalog and business glossary in Purview",
|
|
"Technical governance and grants in Unity Catalog",
|
|
"Unified lineage graph through Manta integration",
|
|
],
|
|
"Security & Operations",
|
|
[
|
|
"Identity, RBAC/ABAC, masking, and encryption",
|
|
"Observability for pipelines, cost, and SLAs",
|
|
"Policy enforcement via automation and review gates",
|
|
],
|
|
)
|
|
|
|
add_table_slide(
|
|
"Platform Responsibility Matrix",
|
|
["Capability", "Databricks", "Fabric", "SAS Viya"],
|
|
[
|
|
["Data Engineering", "Primary", "Restricted", "Targeted"],
|
|
["Data Warehousing", "Primary", "BI serving only", "-"],
|
|
["BI & Reporting", "Secondary", "Primary", "Targeted"],
|
|
["ML / AI", "Primary", "-", "Specialized"],
|
|
["Lineage", "Contributing", "Contributing", "Manta source"],
|
|
["Semantic Intelligence", "Contributing", "Evaluate (H2/H3)", "-"],
|
|
],
|
|
)
|
|
|
|
add_flow_slide("Data Flow Between Platforms", [
|
|
"Sources",
|
|
"ADF / Auto Loader",
|
|
"ADLS + Delta",
|
|
"Databricks\nSilver/Gold",
|
|
"Fabric Direct Lake\n+ SAS JDBC",
|
|
], subtitle="Single source of truth with governed consumption paths")
|
|
|
|
add_image_slide(
|
|
"Medallion Architecture: End-to-End View",
|
|
"/Users/oabrivard/Projects/mdp/.tmp_assets/medallion1.png",
|
|
caption="Source systems to serving channels with governance, security, and observability overlays.",
|
|
)
|
|
|
|
add_bullet_slide("Ingestion Patterns", [
|
|
"Batch file ingestion with Auto Loader schema inference/evolution.",
|
|
"Database CDC and incremental extraction with Azure Data Factory.",
|
|
"Streaming ingestion through Event Hub and Structured Streaming.",
|
|
"API ingestion orchestrated with Databricks Workflows or ADF.",
|
|
"SAS-native ingestion via JDBC LIBNAME or authorized ADLS paths.",
|
|
], icon_letter="I")
|
|
|
|
add_flow_slide("Pre-Bronze Data Quality Gate", [
|
|
"Land to Staging",
|
|
"Purview DQ Sampling",
|
|
"Pass/Fail Decision",
|
|
"Bronze or Quarantine",
|
|
"Steward Alerting",
|
|
], subtitle="Tier-1 DQ before raw persistence")
|
|
|
|
add_bullet_slide("Bronze Layer Implementation", [
|
|
"Immutable append-only Delta storage by source and entity.",
|
|
"Ingestion metadata columns for provenance and replay.",
|
|
"Schema evolution enabled with controlled drift handling.",
|
|
"90-day Delta time travel baseline with domain-specific retention.",
|
|
"Quarantine sub-tables for structural failures.",
|
|
], icon_letter="B")
|
|
|
|
add_bullet_slide("Silver Layer Implementation", [
|
|
"Standardization, cleansing, deduplication, and key conformance.",
|
|
"DLT expectations enforce quality rules during transformation.",
|
|
"Reference/master data enrichment and survivorship logic.",
|
|
"PII normalization and policy tagging for downstream controls.",
|
|
"Lineage captured to support full audit traceability.",
|
|
], icon_letter="S")
|
|
|
|
add_bullet_slide("Gold Layer and Data Products", [
|
|
"Business-ready dimensional and analytical models.",
|
|
"Certified KPIs with semantic alignment and stewardship signoff.",
|
|
"Feature-ready curated datasets for AI/ML consumption.",
|
|
"Power BI Direct Lake consumption through OneLake shortcuts.",
|
|
"SAS consumption via JDBC path to preserve Unity Catalog controls.",
|
|
], icon_letter="G")
|
|
|
|
add_table_slide(
|
|
"Serving Channels and Interfaces",
|
|
["Consumer", "Interface", "Governance Pattern"],
|
|
[
|
|
["Power BI", "Fabric Direct Lake", "Purview + Semantic model governance"],
|
|
["Analysts", "Databricks SQL", "Unity Catalog grants, row/column controls"],
|
|
["Applications", "APIs / JDBC / ODBC", "Service principals and contracts"],
|
|
["Data Science", "Notebooks + Feature Store", "Model governance and lineage"],
|
|
["Actuarial Teams", "SAS Viya Compute Server", "JDBC first; restricted ADLS exception"],
|
|
],
|
|
)
|
|
|
|
# 19-26 Governance
|
|
add_section_divider("Governance & Data Quality", "Three-tier control model across platforms", accent="blue")
|
|
add_bullet_slide("Governance Operating Model", [
|
|
"Centralized governance standards with federated delivery execution.",
|
|
"Data ownership defined at domain and product levels.",
|
|
"Stewardship workflows tied to quality and policy exceptions.",
|
|
"Architecture review gates prevent workload drift and duplication.",
|
|
], icon_letter="G")
|
|
|
|
add_flow_slide("Three-Tier Catalog Strategy", [
|
|
"Purview\nEnterprise Plane",
|
|
"Unity Catalog\nTechnical Enforcement",
|
|
"Manta\nCross-Platform Lineage",
|
|
], subtitle="Complementary strengths instead of overlapping catalogs")
|
|
|
|
add_bullet_slide("Tier 1: Microsoft Purview", [
|
|
"Enterprise inventory, glossary, classification, and policy management.",
|
|
"Data quality dashboards and stewardship collaboration workflows.",
|
|
"Visibility layer for business and compliance stakeholders.",
|
|
"Lineage consumption endpoint for integrated Manta metadata.",
|
|
], icon_letter="P")
|
|
|
|
add_bullet_slide("Tier 2: Databricks Unity Catalog", [
|
|
"Central metastore and object-level permissions.",
|
|
"Row filters and column masks enforced at query time.",
|
|
"Audit events and lineage for data engineering and SQL workloads.",
|
|
"Primary technical governance plane for Delta assets.",
|
|
], icon_letter="U")
|
|
|
|
add_bullet_slide("Tier 3: Manta Lineage Engine", [
|
|
"Code-level lineage extraction across Databricks, ADF, and SAS.",
|
|
"Bridges lineage gaps no single platform can cover end-to-end.",
|
|
"Feeds lineage views in Purview and technical diagnostics in UC.",
|
|
"Critical for regulated change impact assessment.",
|
|
], icon_letter="M")
|
|
|
|
add_table_slide(
|
|
"Three-Tier Data Quality Framework",
|
|
["Tier", "Execution Point", "Gate Outcome"],
|
|
[
|
|
["Tier 1", "Ingestion (Purview DQ Sampling)", "Block or quarantine defective loads"],
|
|
["Tier 2", "Medallion transitions (DLT + GE)", "Reject non-conforming records"],
|
|
["Tier 3", "Gold certification SLA checks", "Publish only certified products"],
|
|
["Unified", "DQ observability dashboards", "Steward accountability and trend control"],
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("Master Data and Reference Data", [
|
|
"Golden identifiers and reference dimensions centralized in curated zones.",
|
|
"Conformance rules propagated across domains via reusable pipelines.",
|
|
"Customer 360 and core reference entities treated as enterprise products.",
|
|
"Lifecycle governance aligns with DAMA-DMBOK and DCAM practices.",
|
|
], icon_letter="D")
|
|
|
|
# 27-35 Security
|
|
add_section_divider("Security & Privacy", "Defense-in-depth controls for regulated operations", accent="gold")
|
|
add_bullet_slide("Identity and Authentication Foundation", [
|
|
"Azure Entra ID is the mandatory identity provider across all components.",
|
|
"MFA, compliant device, and conditional access are always enforced.",
|
|
"SCIM/SAML/OAuth integrations standardize workforce access patterns.",
|
|
"Managed identities preferred; static credentials prohibited.",
|
|
], icon_letter="I")
|
|
|
|
add_table_slide(
|
|
"RBAC Model (Azure Resource Layer)",
|
|
["Security Group", "Primary Role", "Scope"],
|
|
[
|
|
["sg-data-platform-admins", "Contributor", "Data platform subscriptions"],
|
|
["sg-data-engineers", "Databricks + Storage Contributor", "Engineering workspaces and ADLS"],
|
|
["sg-data-scientists", "Sandbox contributor + reader", "Exploration and governed read zones"],
|
|
["sg-governance-admins", "Purview curator + KV admin", "Governance stack"],
|
|
["sg-fabric-admins", "Capacity administrator", "Fabric BI capacity"],
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("ABAC in Unity Catalog", [
|
|
"Policy decisions are based on role, business unit, region, and clearance attributes.",
|
|
"Catalog/schema/table hierarchies simplify consistent grant propagation.",
|
|
"Attribute provisioning aligns with joiner-mover-leaver lifecycle.",
|
|
"ABAC path extends to downstream BI and SAS controls via governed interfaces.",
|
|
], icon_letter="A")
|
|
|
|
add_bullet_slide("Row-Level Security Strategy", [
|
|
"Unity Catalog row filters enforce least-privilege data visibility.",
|
|
"Rules use user context and domain attributes at query runtime.",
|
|
"Power BI RLS complements semantic layer access boundaries.",
|
|
"Security logic is centrally versioned and auditable.",
|
|
], icon_letter="R")
|
|
|
|
add_bullet_slide("Column Security and Dynamic Masking", [
|
|
"Column-level policies protect PII and regulated attributes.",
|
|
"Masking modes include nulling, partial reveal, and deterministic hash.",
|
|
"Purview classification labels drive mask policy assignment.",
|
|
"Sensitive fields remain controlled across SQL, BI, and API paths.",
|
|
], icon_letter="C")
|
|
|
|
add_bullet_slide("Privacy Controls (Law 25 / PIPEDA)", [
|
|
"Consent metadata influences access and downstream usage policies.",
|
|
"DSAR workflows support discover, extract, and evidence generation.",
|
|
"Right-to-erasure handled through orchestrated purge and retention rules.",
|
|
"Pseudonymization/anonymization patterns applied by domain sensitivity.",
|
|
], icon_letter="L")
|
|
|
|
add_two_col_slide(
|
|
"Encryption and Exfiltration Controls",
|
|
"Encryption",
|
|
[
|
|
"AES-256 at rest with MMK/CMK strategy",
|
|
"TLS 1.2+ for all in-transit communications",
|
|
"Key Vault managed key lifecycle and rotation",
|
|
],
|
|
"Exfiltration Prevention",
|
|
[
|
|
"Private endpoints and disabled public access",
|
|
"Firewall and NSG deny-by-default stance",
|
|
"DLP policies and export restrictions in BI layers",
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("Security Audit and Monitoring", [
|
|
"Centralized logs from Entra, Databricks, Fabric, SAS, and Azure services.",
|
|
"Security alerting integrated with incident response operations.",
|
|
"Compliance evidence packs generated for internal/external audits.",
|
|
"Policy-as-code controls continuously validated through CI/CD.",
|
|
], icon_letter="M")
|
|
|
|
# 36-45 Infrastructure
|
|
add_section_divider("Azure Infrastructure", "Landing zone, network, compute, and operations", accent="teal")
|
|
add_bullet_slide("Landing Zone Architecture", [
|
|
"CAF-aligned enterprise landing zone with policy inheritance.",
|
|
"Hub-spoke topology isolates workloads and centralizes control points.",
|
|
"All spokes route egress through Azure Firewall Premium inspection.",
|
|
"Private DNS and Bastion complete the secure connectivity model.",
|
|
], icon_letter="L")
|
|
|
|
add_image_slide(
|
|
"Azure Infrastructure Topology",
|
|
"/Users/oabrivard/Projects/mdp/.tmp_assets/azure_topology.png",
|
|
caption="Physical deployment across subscriptions, VNets, services, and control planes.",
|
|
)
|
|
|
|
add_table_slide(
|
|
"Region and Disaster Recovery Strategy",
|
|
["Domain", "Primary", "Secondary / DR"],
|
|
[
|
|
["Region", "Canada Central", "Canada East"],
|
|
["Role", "Production and non-production execution", "DR replication and recovery"],
|
|
["Compliance", "Canadian residency", "Canadian residency maintained"],
|
|
["Constraint", "No non-Canadian replication", "Enforced by Azure Policy"],
|
|
],
|
|
)
|
|
|
|
add_table_slide(
|
|
"Subscription and Resource Organization",
|
|
["Subscription", "Purpose", "Key Controls"],
|
|
[
|
|
["sub-data-connectivity", "Hub networking", "Firewall, ER, DNS, Bastion"],
|
|
["sub-data-platform-prod", "Production platform", "Databricks, ADLS, Purview, KV"],
|
|
["sub-data-platform-nonprod", "Dev/stage/sandbox", "Cost and quota boundaries"],
|
|
["sub-data-sas-prod", "SAS workloads", "Isolated compute and RBAC"],
|
|
["sub-data-fabric-prod", "BI capacity", "F-SKU governance"],
|
|
["sub-data-management", "Ops and monitoring", "DevOps, Log Analytics, Sentinel"],
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("Networking and Private Endpoints", [
|
|
"Databricks VNet injection uses dedicated /22 host and container subnets.",
|
|
"Private endpoint-only access for ADLS, Key Vault, Purview, ADF, and Event Hub.",
|
|
"NSG baseline is deny-all with explicit allow rules per flow.",
|
|
"No direct spoke-to-spoke communication; all traffic inspectable.",
|
|
], icon_letter="N")
|
|
|
|
add_table_slide(
|
|
"Databricks Compute Sizing and Policies",
|
|
["Workspace", "Primary Role", "Control Highlights"],
|
|
[
|
|
["dbw-data-eng-prod", "Pipelines and transformations", "Cluster policy guardrails, Photon, autoscaling"],
|
|
["dbw-analytics-prod", "SQL and dashboards", "Serverless SQL, autosuspend, workload mgmt"],
|
|
["dbw-mlops-prod", "Modeling and serving", "GPU pools, model serving autoscale"],
|
|
["dbw-data-eng-dev", "Dev/test", "Reduced limits and strict auto-terminate"],
|
|
["dbw-sandbox", "Exploration", "Read-only sensitive zones, budget caps"],
|
|
],
|
|
)
|
|
|
|
add_two_col_slide(
|
|
"SAS Viya Compute Server Model",
|
|
"Deployment Pattern",
|
|
[
|
|
"Private AKS deployment recommended",
|
|
"Compute Server engine (sequential batch)",
|
|
"Memory-optimized pods for actuarial models",
|
|
"Dedicated license and model manager services",
|
|
],
|
|
"Data Access Rules",
|
|
[
|
|
"JDBC to Databricks SQL is default path",
|
|
"ADLS direct path only for pre-authorized non-sensitive zones",
|
|
"SAS service principal scoped with minimal RBAC",
|
|
"All access paths are logged for audit and lineage",
|
|
],
|
|
)
|
|
|
|
add_bullet_slide("Fabric Capacity Guardrails", [
|
|
"Capacity is sized exclusively for BI serving workloads.",
|
|
"No Fabric warehouse/lakehouse duplication of Databricks managed data.",
|
|
"No Fabric Data Factory or notebook shadow engineering layer.",
|
|
"Capacity expansions require architecture board approval.",
|
|
], icon_letter="F")
|
|
|
|
add_table_slide(
|
|
"Observability, FinOps, and DevOps",
|
|
["Domain", "Tooling", "Expected Outcome"],
|
|
[
|
|
["Monitoring", "Azure Monitor + Log Analytics + Databricks metrics", "SLA visibility and incident detection"],
|
|
["Security Ops", "Sentinel + alert rules", "Threat detection and response"],
|
|
["FinOps", "Tags, budgets, cost dashboards", "Chargeback and optimization"],
|
|
["IaC", "Terraform modules + CI/CD", "Repeatable and auditable infrastructure"],
|
|
],
|
|
)
|
|
|
|
# 46-50 Roadmap and close
|
|
add_section_divider("Roadmap and Decisions", "Phased execution with governance gates", accent="blue")
|
|
add_bullet_slide("Horizon 1 (0-12 Months): Foundation", [
|
|
"Finalize Databricks-primary architecture baseline and guardrails.",
|
|
"Deploy production landing zone, networking, and core governance stack.",
|
|
"Industrialize Bronze/Silver/Gold pipelines for priority domains.",
|
|
"Implement tiered DQ controls and initial certification process.",
|
|
], icon_letter="1")
|
|
|
|
add_bullet_slide("Horizon 2 (12-24 Months): Scale and Intelligence", [
|
|
"Expand domain data products and semantic KPI coverage.",
|
|
"Increase self-service capabilities within governed boundaries.",
|
|
"Pilot Fabric IQ capabilities under strict access prerequisites.",
|
|
"Strengthen cross-platform lineage and model risk controls.",
|
|
], icon_letter="2")
|
|
|
|
add_two_col_slide(
|
|
"Horizon 3 (24-36 Months): Optimize and Innovate",
|
|
"Optimization Focus",
|
|
[
|
|
"Platform cost/performance rebalancing",
|
|
"Automated policy assurance and controls testing",
|
|
"Expanded AI/ML productization and monitoring",
|
|
],
|
|
"Innovation Focus",
|
|
[
|
|
"Domain agent use cases with safe data grounding",
|
|
"Advanced semantic intelligence on certified products",
|
|
"Continuous architecture modernization under guardrails",
|
|
],
|
|
)
|
|
|
|
add_table_slide(
|
|
"Executive Decision Requests",
|
|
["Decision", "Required Direction", "Impact"],
|
|
[
|
|
["Architecture Baseline", "Approve Databricks-primary v8.0", "Locks platform scope and investment alignment"],
|
|
["Fabric Guardrails", "Approve BI-serving-only boundary", "Prevents duplication and cost drift"],
|
|
["Governance Model", "Approve Purview + UC + Manta operating model", "Enables auditable enterprise control"],
|
|
["Roadmap Funding", "Approve phased delivery plan", "Accelerates time-to-value with risk control"],
|
|
],
|
|
subtitle="Final steering decisions",
|
|
)
|
|
|
|
# Validate slide count
|
|
assert len(prs.slides) == 50, f"Expected 50 slides, got {len(prs.slides)}"
|
|
prs.save(OUTPUT)
|
|
print(f"Created {OUTPUT} with {len(prs.slides)} slides")
|