|
|
const { Document, Packer, Paragraph, Table, TableRow, TableCell, TextRun, PageBreak,
|
|
|
HeadingLevel, BorderStyle, VerticalAlign, AlignmentType, UnderlineType, ShadingType } = require('docx');
|
|
|
const fs = require('fs');
|
|
|
const path = require('path');
|
|
|
|
|
|
// Color scheme
|
|
|
const HEADING_COLOR = '1F4E79';
|
|
|
const HEADER_BG = 'D5E8F0';
|
|
|
const BORDER_COLOR = 'CCCCCC';
|
|
|
|
|
|
function createHeading1(text) {
|
|
|
return new Paragraph({
|
|
|
text: text,
|
|
|
heading: HeadingLevel.HEADING_1,
|
|
|
alignment: AlignmentType.LEFT,
|
|
|
spacing: { before: 240, after: 120 },
|
|
|
color: HEADING_COLOR,
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function createHeading2(text) {
|
|
|
return new Paragraph({
|
|
|
text: text,
|
|
|
heading: HeadingLevel.HEADING_2,
|
|
|
alignment: AlignmentType.LEFT,
|
|
|
spacing: { before: 160, after: 100 },
|
|
|
color: HEADING_COLOR,
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function createParagraph(text, options = {}) {
|
|
|
return new Paragraph({
|
|
|
text: text,
|
|
|
alignment: AlignmentType.LEFT,
|
|
|
spacing: { line: 360, after: 120 },
|
|
|
...options,
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function createBulletPoint(text, level = 0) {
|
|
|
return new Paragraph({
|
|
|
text: text,
|
|
|
alignment: AlignmentType.LEFT,
|
|
|
spacing: { after: 80 },
|
|
|
indent: { left: 720 + (level * 360), hanging: 360 },
|
|
|
bullet: { level: level },
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function createTable(headerCells, rows) {
|
|
|
return new Table({
|
|
|
rows: [
|
|
|
new TableRow({
|
|
|
children: headerCells.map(cell => new TableCell({
|
|
|
children: [new Paragraph({
|
|
|
text: cell,
|
|
|
bold: true,
|
|
|
color: 'FFFFFF',
|
|
|
})],
|
|
|
shading: { fill: HEADER_BG, type: ShadingType.CLEAR },
|
|
|
margins: { top: 80, bottom: 120, left: 80, right: 80 },
|
|
|
})),
|
|
|
}),
|
|
|
...rows.map(row => new TableRow({
|
|
|
children: row.map(cell => new TableCell({
|
|
|
children: [new Paragraph({
|
|
|
text: cell,
|
|
|
spacing: { after: 60 },
|
|
|
})],
|
|
|
margins: { top: 80, bottom: 120, left: 80, right: 80 },
|
|
|
borders: {
|
|
|
top: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
bottom: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
left: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
right: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
},
|
|
|
})),
|
|
|
})),
|
|
|
],
|
|
|
width: { size: 100, type: 'pct' },
|
|
|
borders: {
|
|
|
top: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
bottom: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
left: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
right: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
insideHorizontal: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
insideVertical: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
},
|
|
|
});
|
|
|
}
|
|
|
|
|
|
function createCodeBlock(code) {
|
|
|
const lines = code.split('\n');
|
|
|
return new Table({
|
|
|
rows: lines.map(line => new TableRow({
|
|
|
children: [
|
|
|
new TableCell({
|
|
|
children: [new Paragraph({
|
|
|
text: line || ' ',
|
|
|
font: 'Courier New',
|
|
|
size: 20,
|
|
|
})],
|
|
|
shading: { fill: 'F5F5F5', type: ShadingType.CLEAR },
|
|
|
margins: { top: 60, bottom: 60, left: 80, right: 80 },
|
|
|
}),
|
|
|
],
|
|
|
})),
|
|
|
width: { size: 100, type: 'pct' },
|
|
|
borders: {
|
|
|
top: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
bottom: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
left: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
right: { style: BorderStyle.SINGLE, size: 3, color: BORDER_COLOR },
|
|
|
},
|
|
|
});
|
|
|
}
|
|
|
|
|
|
// Build children array
|
|
|
const children = [
|
|
|
// Title
|
|
|
new Paragraph({
|
|
|
text: 'Appendix A: Security & Privacy Model',
|
|
|
heading: HeadingLevel.HEADING_1,
|
|
|
alignment: AlignmentType.CENTER,
|
|
|
spacing: { before: 0, after: 80 },
|
|
|
color: HEADING_COLOR,
|
|
|
size: 28,
|
|
|
bold: true,
|
|
|
}),
|
|
|
new Paragraph({
|
|
|
text: 'Fabric-Primary Architecture (v1.0)',
|
|
|
heading: HeadingLevel.HEADING_2,
|
|
|
alignment: AlignmentType.CENTER,
|
|
|
spacing: { before: 0, after: 240 },
|
|
|
color: HEADING_COLOR,
|
|
|
size: 24,
|
|
|
}),
|
|
|
new Paragraph({
|
|
|
text: 'Modern Data Platform – Greenfield',
|
|
|
alignment: AlignmentType.CENTER,
|
|
|
spacing: { after: 60 },
|
|
|
italics: true,
|
|
|
}),
|
|
|
new Paragraph({
|
|
|
text: 'March 2026',
|
|
|
alignment: AlignmentType.CENTER,
|
|
|
spacing: { after: 480 },
|
|
|
italics: true,
|
|
|
}),
|
|
|
|
|
|
// Table of Contents
|
|
|
createHeading1('Table of Contents'),
|
|
|
createBulletPoint('1. Introduction & Guiding Principles'),
|
|
|
createBulletPoint('2. Authentication Framework'),
|
|
|
createBulletPoint('3. Role-Based Access Control (RBAC)'),
|
|
|
createBulletPoint('4. OneLake Security as Primary Enforcement Plane'),
|
|
|
createBulletPoint('5. Unity Catalog – Retained for Sensitive Datasets'),
|
|
|
createBulletPoint('6. Dual-Enforcement Model & Dataset Classification'),
|
|
|
createBulletPoint('7. Data Loss Prevention & Exfiltration Controls'),
|
|
|
createBulletPoint('8. Privacy, Consent & Regulatory Compliance'),
|
|
|
createBulletPoint('9. Security Audit & Monitoring'),
|
|
|
createBulletPoint('10. End-to-End Security Flow – Worked Examples'),
|
|
|
createBulletPoint('11. Security Governance & Operating Model'),
|
|
|
createBulletPoint('12. Comparison with Databricks-Primary Model'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 1
|
|
|
createHeading1('1. Introduction & Guiding Principles'),
|
|
|
createParagraph(
|
|
|
'The Fabric-primary architecture fundamentally shifts the security enforcement paradigm for Greenfield\'s Modern Data Platform. Unlike the Databricks-primary model where Unity Catalog is the universal governance plane, Fabric-primary distributes security enforcement across multiple engines, with OneLake workspace and item-level security serving as the PRIMARY enforcement mechanism for the majority of analytics workloads.'
|
|
|
),
|
|
|
createParagraph(
|
|
|
'This appendix describes a dual-enforcement security model: OneLake security (via Fabric workspace RBAC, item-level permissions, Power BI RLS/CLS, SQL DDM) protects the bulk of data flows for Public and Internal datasets, while Unity Catalog is retained in a transitional capacity for Confidential and Restricted datasets requiring row-level filtering, column-level masking, or attribute-based access control (ABAC) finer than Fabric alone provides.'
|
|
|
),
|
|
|
|
|
|
createHeading2('Guiding Principles'),
|
|
|
createBulletPoint('Zero Trust: Verify every access request at every layer; assume breach in design.'),
|
|
|
createBulletPoint('Least Privilege: Grant only the minimum permissions required for a user or service to fulfill their role.'),
|
|
|
createBulletPoint('Defense in Depth: Layer multiple independent security controls (identity, RBAC, attribute-based filtering, audit).'),
|
|
|
createBulletPoint('Unified Identity: All platform access routed through Azure Entra ID with conditional access policies.'),
|
|
|
createBulletPoint('Policy as Code: Security configurations (role assignments, row filters, column masks) stored as IaC; changes tracked in Git.'),
|
|
|
createBulletPoint('Auditability: All access, changes, and sensitive operations logged; audit trail immutable and queryable.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 2
|
|
|
createHeading1('2. Authentication Framework'),
|
|
|
createParagraph(
|
|
|
'Authentication is the foundation of the zero-trust model. All access to Fabric, Databricks, SAS Viya, and supporting services is mediated by Azure Entra ID (formerly Azure Active Directory), ensuring a single authoritative source of identity and enabling policy enforcement at scale.'
|
|
|
),
|
|
|
|
|
|
createHeading2('2.1 Azure Entra ID Integration'),
|
|
|
createBulletPoint('All users authenticate via Azure Entra ID using modern protocols (OAuth 2.0, OIDC, SAML 2.0).'),
|
|
|
createBulletPoint('Conditional Access policies enforce device compliance, network location, and risk-based MFA.'),
|
|
|
createBulletPoint('Example policy: Require MFA for any access from outside Canada or from unmanaged devices.'),
|
|
|
createBulletPoint('Service-to-service authentication uses managed identities (system-assigned and user-assigned) to eliminate credential storage.'),
|
|
|
|
|
|
createHeading2('2.2 Platform-Specific Authentication'),
|
|
|
createParagraph('Each platform (Fabric, Databricks, SAS Viya) implements Entra ID authentication through native connectors:'),
|
|
|
createBulletPoint('Fabric: Native Entra ID integration; users sign in with corporate identity; MFA enforced by Conditional Access.'),
|
|
|
createBulletPoint('Databricks: OAuth 2.0 via Entra ID; service principals authenticate with OAuth tokens stored in Azure Key Vault.'),
|
|
|
createBulletPoint('SAS Viya: LDAP sync with Entra ID; SAS identities mapped to Entra groups for seamless federation.'),
|
|
|
|
|
|
createHeading2('2.3 Managed Identities & Service Principals'),
|
|
|
createParagraph('Automation pipelines, data integration jobs, and inter-service communication use managed identities and service principals, eliminating long-lived passwords:'),
|
|
|
createBulletPoint('System-assigned managed identity: Automatically provisioned for each Fabric/Databricks resource; lifecycle tied to the resource.'),
|
|
|
createBulletPoint('User-assigned managed identity: Created explicitly for shared scenarios (e.g., a data pipeline used by multiple ADF jobs); more flexible for cross-resource access.'),
|
|
|
createBulletPoint('Service principal credentials stored exclusively in Azure Key Vault, rotated automatically every 90 days.'),
|
|
|
createBulletPoint('Example: A Data Factory pipeline integrating external data uses a user-assigned managed identity with RBAC permissions scoped to specific Fabric workspaces and Databricks clusters.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 3
|
|
|
createHeading1('3. Role-Based Access Control (RBAC)'),
|
|
|
createParagraph(
|
|
|
'RBAC in Fabric-primary leverages Azure Entra security groups to map users to predefined roles at the workspace and item level. This simplifies administration by reducing the need to manage individual user permissions and enabling group-based policies.'
|
|
|
),
|
|
|
|
|
|
createHeading2('3.1 Entra Security Group Model'),
|
|
|
createParagraph('All access is granted through Entra security groups, following the pattern: [Org]-[Domain]-[Role]. Example groups:'),
|
|
|
createBulletPoint('Greenfield-Analytics-FabricAdmin: Members who administer Fabric workspaces.'),
|
|
|
createBulletPoint('Greenfield-Risk-FabricMember: Members of the Risk domain who can create and edit content in Fabric.'),
|
|
|
createBulletPoint('Greenfield-Sales-FabricViewer: Members of the Sales domain who can view reports and dashboards.'),
|
|
|
createBulletPoint('Greenfield-DataEng-DatabricksAdmin: Members who administer Databricks workspaces and SQL warehouses.'),
|
|
|
|
|
|
createHeading2('3.2 Fabric Workspace RBAC'),
|
|
|
createParagraph('Fabric workspaces are assigned to business domains (Risk, Sales, Operations, etc.) and protected by four role levels:'),
|
|
|
];
|
|
|
|
|
|
// Add workspace roles table
|
|
|
const workspaceRolesTable = createTable(
|
|
|
['Role', 'Entra Group', 'Permissions', 'Typical Users'],
|
|
|
[
|
|
|
['Admin', 'Greenfield-[Domain]-FabricAdmin', 'Create, edit, delete workspaces; manage members; configure settings; manage Premium capacity; audit logs.', 'Data platform engineers, workspace owners'],
|
|
|
['Member', 'Greenfield-[Domain]-FabricMember', 'Create, edit, delete items (datasets, reports, models); manage item permissions; consume items.', 'Analytics engineers, data scientists, report developers'],
|
|
|
['Contributor', 'Greenfield-[Domain]-FabricContributor', 'Create and edit items but cannot delete; no user management.', 'Junior analysts, contractors (limited scope)'],
|
|
|
['Viewer', 'Greenfield-[Domain]-FabricViewer', 'View and consume reports, dashboards, and datasets; cannot edit or create.', 'Business users, stakeholders'],
|
|
|
]
|
|
|
);
|
|
|
|
|
|
children.push(workspaceRolesTable);
|
|
|
children.push(
|
|
|
createHeading2('3.3 Fabric Capacity Administration'),
|
|
|
createParagraph('Fabric Premium capacities (F-series) are managed separately from workspace RBAC:'),
|
|
|
createBulletPoint('Capacity Admin role (Entra group: Greenfield-Fabric-CapacityAdmin) controls resource allocation, autoscale policies, and compute throttling.'),
|
|
|
createBulletPoint('Capacity admins are distinct from workspace admins; separation of duties prevents a single role from controlling both governance and resource consumption.'),
|
|
|
|
|
|
createHeading2('3.4 Databricks Workspace Permissions (Residual Workloads)'),
|
|
|
createParagraph('Databricks workspaces retained for sensitive datasets and specialized compute enforce role-based access via Databricks-native RBAC and AAD integration:'),
|
|
|
createBulletPoint('Databricks Admin: Full control; manage workspace users, jobs, clusters, compute policies.'),
|
|
|
createBulletPoint('Databricks User: Create and run notebooks; access clusters assigned to their role; execute SQL queries.'),
|
|
|
createBulletPoint('Databricks Viewer: Read-only access to notebooks and job results; no edit or execution capability.'),
|
|
|
createBulletPoint('Cluster Access Control: Clusters (especially those with sensitive data) are restricted to specific Entra groups; users outside the group cannot attach.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 4
|
|
|
createHeading1('4. OneLake Security as Primary Enforcement Plane'),
|
|
|
createParagraph(
|
|
|
'OneLake is Fabric\'s unified data lake built on Azure Data Lake Storage Gen2. In Fabric-primary, OneLake security—enforced through workspace RBAC, item-level permissions, and engine-specific row/column-level security—is the primary defense for the majority of data.'
|
|
|
),
|
|
|
createParagraph(
|
|
|
'CRITICAL GAP: Fabric\'s security is engine-specific. Power BI RLS only protects data viewed through Power BI; SQL Analytics Endpoint security only applies to T-SQL queries; Spark notebooks in Fabric see unmasked data. Organizations must architect around this fundamental limitation.'
|
|
|
),
|
|
|
|
|
|
createHeading2('4.1 Fabric Workspace & Item-Level Permissions'),
|
|
|
createParagraph('OneLake items (Lakehouse, Warehouse, Dataset, Report, Dashboard, Notebook) are secured via item-level permissions:'),
|
|
|
createBulletPoint('Workspace-level role grants baseline permissions (e.g., all Members can edit all items in the workspace).'),
|
|
|
createBulletPoint('Item-level permissions override workspace role; a Viewer can be given "Editor" on a specific sensitive dataset.'),
|
|
|
createBulletPoint('Permissions assigned via Entra groups for scalability.'),
|
|
|
createBulletPoint('Example: A Power BI report in the Risk workspace is restricted to the Greenfield-Risk-ModelReviewers Entra group, even though the workspace itself is accessible to all Risk members.'),
|
|
|
|
|
|
createHeading2('4.2 Power BI Row-Level Security (RLS)'),
|
|
|
createParagraph(
|
|
|
'Power BI RLS uses DAX expressions to filter data at query time based on the logged-in user\'s identity. The RLS applies ONLY when users access the report through Power BI; if the underlying dataset is queried via SQL or Spark, RLS is bypassed.'
|
|
|
),
|
|
|
createParagraph('RLS configuration uses two key DAX functions:'),
|
|
|
createBulletPoint('USERPRINCIPALNAME(): Returns the user\'s email (e.g., john.doe@greenfield.ca).'),
|
|
|
createBulletPoint('USERNAME(): Returns the user\'s domain username (e.g., john.doe).'),
|
|
|
createParagraph('Example RLS rule for a Sales dataset:')
|
|
|
);
|
|
|
|
|
|
// Add code block for RLS
|
|
|
children.push(createCodeBlock(`-- Role: SalesPersonRLS
|
|
|
CREATE ROLE [SalesPersonRLS]
|
|
|
|
|
|
-- DAX filter: User sees only their own region
|
|
|
IF (
|
|
|
PATHCONTAINS([DimSalesPerson][SalesPersonEmail], USERPRINCIPALNAME()),
|
|
|
1,
|
|
|
0
|
|
|
)`));
|
|
|
|
|
|
children.push(
|
|
|
createParagraph(
|
|
|
'This rule ensures that a salesperson in Region A cannot view sales data from Region B, even if both are in the same semantic model. The rule is enforced transparently at report refresh.'
|
|
|
),
|
|
|
|
|
|
createHeading2('4.3 Fabric SQL Analytics Endpoint – T-SQL Row/Column-Level Security'),
|
|
|
createParagraph(
|
|
|
'Lakehouses in Fabric expose a SQL Analytics Endpoint that supports standard T-SQL operations. For structured data (curated tables), T-SQL row-level security (RLS) and column-level permissions can be applied.'
|
|
|
),
|
|
|
|
|
|
createHeading2('4.3.1 Row-Level Security Predicates'),
|
|
|
createParagraph('Row-level security predicates filter rows dynamically based on the executing user:')
|
|
|
);
|
|
|
|
|
|
children.push(createCodeBlock(`-- Create a row-level security predicate
|
|
|
CREATE SECURITY POLICY rls_policy
|
|
|
ADD FILTER PREDICATE rls_by_business_unit([business_unit]) ON [sales_fact]
|
|
|
|
|
|
-- Predicate function
|
|
|
CREATE FUNCTION rls_by_business_unit(@business_unit NVARCHAR(50))
|
|
|
RETURNS TABLE
|
|
|
WITH SCHEMABINDING
|
|
|
AS
|
|
|
RETURN
|
|
|
SELECT 1 AS result
|
|
|
WHERE @business_unit IN (
|
|
|
SELECT business_unit FROM user_permissions
|
|
|
WHERE user_email = USER_NAME()
|
|
|
)
|
|
|
|
|
|
-- Enable the policy
|
|
|
ALTER SECURITY POLICY rls_policy WITH (STATE = ON)`));
|
|
|
|
|
|
children.push(
|
|
|
createHeading2('4.3.2 Column-Level Permissions'),
|
|
|
createParagraph('T-SQL GRANT and DENY statements control which columns users can access:')
|
|
|
);
|
|
|
|
|
|
children.push(createCodeBlock(`-- Deny access to sensitive column for all but Data Privacy Officer
|
|
|
DENY SELECT ON [customer_master].[ssn_encrypted] TO [sales_analyst_role]
|
|
|
GRANT SELECT ON [customer_master].[ssn_encrypted] TO [data_privacy_officer_role]`));
|
|
|
|
|
|
children.push(
|
|
|
createHeading2('4.4 Fabric SQL DDM (Dynamic Data Masking)'),
|
|
|
createParagraph(
|
|
|
'Fabric Warehouse and SQL endpoints support SQL Dynamic Data Masking (DDM), which obfuscates sensitive data in query results without modifying the underlying data.'
|
|
|
),
|
|
|
createParagraph('DDM mask types:'),
|
|
|
createBulletPoint('Default: Display only the first letter and last letter (e.g., A****E for email).'),
|
|
|
createBulletPoint('Email: Expose only domain (e.g., aX@XXXX.com for john.doe@greenfield.ca).'),
|
|
|
createBulletPoint('Random: Replace with random value from range (e.g., phone numbers masked as random integers).'),
|
|
|
createBulletPoint('Custom string: Replace with a fixed pattern (e.g., XXX-XX-XXXX for SSN).'),
|
|
|
|
|
|
createParagraph('Example DDM rule:')
|
|
|
);
|
|
|
|
|
|
children.push(createCodeBlock(`ALTER TABLE [customer_master]
|
|
|
ALTER COLUMN [phone_number] NVARCHAR(20) MASKED WITH (
|
|
|
FUNCTION = 'default()'
|
|
|
)
|
|
|
|
|
|
ALTER TABLE [customer_master]
|
|
|
ALTER COLUMN [email] NVARCHAR(255) MASKED WITH (
|
|
|
FUNCTION = 'email()'
|
|
|
)`));
|
|
|
|
|
|
children.push(
|
|
|
createHeading2('4.5 THE CRITICAL GAP: Spark Notebooks Bypass Fabric Security'),
|
|
|
createParagraph(
|
|
|
'Fabric Spark notebooks and Python notebooks access Lakehouse tables directly via Apache Spark APIs. These notebooks bypass RLS, column-level permissions, and DDM—users see the complete unmasked dataset.'
|
|
|
),
|
|
|
createParagraph('Impact:'),
|
|
|
createBulletPoint('A data scientist with access to a Fabric Spark notebook can see all rows and columns of a Lakehouse table, regardless of RLS or DDM rules applied to the SQL endpoint.'),
|
|
|
createBulletPoint('This is a fundamental architectural limitation of Spark in Fabric; there is no Spark-native RLS equivalent.'),
|
|
|
|
|
|
createHeading2('4.5.1 Mitigations for Spark Data Leakage'),
|
|
|
createParagraph('Greenfield must implement architectural controls to prevent unintended data exposure through Spark:'),
|
|
|
createBulletPoint('Separate Lakehouses: Divide sensitive data into a distinct Lakehouse that only data engineers (not analysts) can access; analysts access aggregated or masked views through SQL endpoints.'),
|
|
|
createBulletPoint('SQL-Only Access: Route all access to sensitive data through SQL Analytics Endpoints with RLS/DDM; Spark is restricted to development and data preparation on non-sensitive layers.'),
|
|
|
createBulletPoint('Lakehouse Ownership & ACLs: Assign sensitive Lakehouses to a restricted Entra group; regular analysts have Member role in the workspace but cannot attach to sensitive Lakehouses.'),
|
|
|
createBulletPoint('Notebook Audit & Governance: All Spark notebooks logged and reviewed monthly; notebooks accessing sensitive data require additional approval and sign-off.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 5
|
|
|
createHeading1('5. Unity Catalog – Retained for Sensitive Datasets'),
|
|
|
createParagraph(
|
|
|
'Datasets classified as Confidential or Restricted cannot be adequately protected by Fabric\'s engine-specific security. These datasets are governed by Unity Catalog on Databricks, which provides unified row-level and column-level enforcement across all compute engines.'
|
|
|
),
|
|
|
|
|
|
createHeading2('5.1 Datasets Requiring Unity Catalog'),
|
|
|
createParagraph('Unity Catalog is mandatory for datasets containing:'),
|
|
|
createBulletPoint('Personal Identifiable Information (PII): Customer SSN, date of birth, address, financial account numbers.'),
|
|
|
createBulletPoint('Regulatory Data: Law 25 regulated personal information; PIPEDA-protected employee records; OSFI-regulated financial metrics.'),
|
|
|
createBulletPoint('Competitive Data: Proprietary customer models, pricing algorithms, underwriting rules.'),
|
|
|
createBulletPoint('Restricted Operational Data: Fraud case details, undercover investigation logs, executive compensation records.'),
|
|
|
|
|
|
createHeading2('5.2 Unity Catalog Row-Level Security (RLS) for Databricks'),
|
|
|
createParagraph(
|
|
|
'Row filters in Unity Catalog are stored as SQL predicates and evaluated uniformly, regardless of whether data is accessed via Spark, SQL, or external BI tools. RLS guarantees that a user with SQL access sees the same filtered data as a Spark user.'
|
|
|
),
|
|
|
createParagraph('Example row filter (same pattern as Databricks-primary):')
|
|
|
);
|
|
|
|
|
|
children.push(createCodeBlock(`-- Row filter catalog: rls_by_business_unit
|
|
|
CREATE OR REPLACE FUNCTION rls_by_business_unit()
|
|
|
RETURNS TABLE(
|
|
|
predicate_func_result BOOLEAN
|
|
|
)
|
|
|
RETURN
|
|
|
SELECT current_user() IN (
|
|
|
SELECT authorized_user FROM greenfield_master.access_control.approved_users
|
|
|
WHERE approved_users.business_unit = CURRENT_BUSINESS_UNIT()
|
|
|
)`));
|
|
|
|
|
|
children.push(
|
|
|
createHeading2('5.3 Unity Catalog Column Masking for PII'),
|
|
|
createParagraph('Column masks dynamically obfuscate sensitive columns for users without explicit access:')
|
|
|
);
|
|
|
|
|
|
children.push(createCodeBlock(`-- Column mask for SIN (Social Insurance Number)
|
|
|
CREATE OR REPLACE FUNCTION mask_sin(sin_value VARCHAR(11))
|
|
|
RETURNS VARCHAR(11)
|
|
|
RETURN CASE
|
|
|
WHEN current_user() IN (
|
|
|
SELECT authorized_user FROM greenfield_master.access_control.approved_users
|
|
|
WHERE role = 'data_privacy_officer'
|
|
|
)
|
|
|
THEN sin_value
|
|
|
ELSE CONCAT('XXX-XX-', SUBSTRING(sin_value, 8, 4))
|
|
|
END
|
|
|
|
|
|
-- Apply mask to column
|
|
|
ALTER TABLE greenfield_curated.customer_sensitive.customer_identity
|
|
|
MODIFY COLUMN sin SET MASK mask_sin()`));
|
|
|
|
|
|
children.push(
|
|
|
createHeading2('5.4 ABAC (Attribute-Based Access Control) via Unity Catalog'),
|
|
|
createParagraph(
|
|
|
'For complex access scenarios (e.g., a user can see Customer 360 data only if their department matches the data\'s owning department AND they are on an approved project), Unity Catalog supports attribute-based access control through Manta (Coherent Catalog) metadata integration.'
|
|
|
),
|
|
|
createBulletPoint('Attributes stored in Manta: business_unit, data_sensitivity_level, owner_department, approved_projects.'),
|
|
|
createBulletPoint('Row filter logic incorporates Manta attributes: Evaluate access based on user attributes vs. data attributes.'),
|
|
|
|
|
|
createHeading2('5.5 SAS Viya & Fabric Access to Unity Catalog–Protected Data'),
|
|
|
createParagraph(
|
|
|
'SAS Viya and Fabric workloads requiring access to sensitive data classified in Unity Catalog must route through Databricks SQL Warehouses, not directly to the Lakehouse.'
|
|
|
),
|
|
|
createParagraph('Architecture:'),
|
|
|
createBulletPoint('Unity Catalog table (e.g., greenfield_curated.customer.customer_identity) is NOT published to OneLake.'),
|
|
|
createBulletPoint('A Databricks SQL Warehouse (with RLS + column masks applied) exposes a public view or external table.'),
|
|
|
createBulletPoint('SAS Viya connects via JDBC to the Databricks SQL Warehouse; Fabric SQL Endpoint can be configured with a linked connection to the same warehouse.'),
|
|
|
createBulletPoint('Access is logged in Databricks audit tables; no bypass possible.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 6
|
|
|
createHeading1('6. Dual-Enforcement Model & Dataset Classification'),
|
|
|
createParagraph(
|
|
|
'Fabric-primary employs a dual-enforcement model: OneLake security for the majority of data, Unity Catalog as a secondary enforcement plane for the most sensitive datasets. The model is anchored in a dataset classification matrix that determines the security path for each dataset.'
|
|
|
),
|
|
|
|
|
|
createHeading2('6.1 Dataset Classification Matrix')
|
|
|
);
|
|
|
|
|
|
const classificationMatrix = createTable(
|
|
|
['Classification', 'Data Examples', 'Primary Security', 'OneLake Controls', 'Unity Catalog', 'Access Route'],
|
|
|
[
|
|
|
['Public', 'Published market data, aggregate statistics, product brochures.', 'OneLake only', 'Workspace role + item permissions.', 'Not used.', 'Fabric SQL, Power BI, Spark.'],
|
|
|
['Internal', 'Operational metrics, department dashboards, internal project docs.', 'OneLake only', 'Workspace role + item permissions + optional RLS.', 'Not used.', 'Fabric SQL, Power BI, Spark.'],
|
|
|
['Confidential', 'Customer aggregates, financial models, PII subsets (with column masking).', 'OneLake primary + optional UC for sensitive subsets.', 'Workspace role + item permissions + RLS + DDM on SQL endpoint.', 'Optional (for columns requiring ABAC).', 'Fabric SQL endpoint (RLS applied) or Databricks SQL Warehouse (UC).'],
|
|
|
['Restricted', 'Full PII, fraud case details, regulatory investigation records, executive comp.', 'Unity Catalog only.', 'Not used.', 'Mandatory. Row filter + column mask enforced.', 'Databricks SQL Warehouse only (via JDBC). No Spark unless approved by compliance.'],
|
|
|
]
|
|
|
);
|
|
|
|
|
|
children.push(classificationMatrix);
|
|
|
children.push(
|
|
|
createHeading2('6.2 Decision Flow: Which Security Plane?'),
|
|
|
createParagraph('For each dataset, determine the appropriate security plane:'),
|
|
|
createBulletPoint('Step 1: Classify the dataset (Public, Internal, Confidential, Restricted) using Greenfield\'s data classification taxonomy.'),
|
|
|
createBulletPoint('Step 2: If Restricted, MANDATORY Unity Catalog; publish to Databricks only.'),
|
|
|
createBulletPoint('Step 3: If Confidential, evaluate whether OneLake RLS + DDM is sufficient.'),
|
|
|
createBulletPoint('Step 4: If row-level filtering is insufficient (e.g., ABAC with complex department/project attributes), add Unity Catalog as dual-enforcement.'),
|
|
|
createBulletPoint('Step 5: If Internal or Public, use OneLake security only; no Databricks governance overhead.'),
|
|
|
|
|
|
createHeading2('6.3 Metadata Synchronization: Unity Catalog to Purview'),
|
|
|
createParagraph(
|
|
|
'When datasets are governed by Unity Catalog, their metadata (table names, columns, RLS rules, column masks, ownership) must be synchronized to Purview for enterprise governance and DLP integration.'
|
|
|
),
|
|
|
createBulletPoint('Databricks Purview Connector: Automated sync from Unity Catalog to Purview Data Catalog.'),
|
|
|
createBulletPoint('Sync frequency: Real-time (changes to UC metadata propagated within minutes).'),
|
|
|
createBulletPoint('Purview displays: Table ownership, column sensitivity labels, RLS rules as lineage annotations.'),
|
|
|
createBulletPoint('Impact: DLP policies in Microsoft 365 can flag attempts to export Purview-cataloged data outside approved channels.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 7
|
|
|
createHeading1('7. Data Loss Prevention & Exfiltration Controls'),
|
|
|
createParagraph(
|
|
|
'Data loss prevention (DLP) combines policy controls (preventing downloads, exports, and sharing) with technical enforcements (managed virtual networks, private endpoints, egress filtering).'
|
|
|
),
|
|
|
|
|
|
createHeading2('7.1 Fabric Tenant-Level Export Restrictions'),
|
|
|
createParagraph('Fabric tenant settings control whether users can export data outside the platform:'),
|
|
|
createBulletPoint('Disable "Export to PDF" for Premium workspaces containing sensitive reports.'),
|
|
|
createBulletPoint('Disable "Export to Excel" for datasets classified as Confidential or Restricted.'),
|
|
|
createBulletPoint('Disable "Copy to clipboard" for dashboards in regulated workspaces.'),
|
|
|
createParagraph('These restrictions are enforced at the Fabric service level and cannot be bypassed by users with local admin privileges.'),
|
|
|
|
|
|
createHeading2('7.2 Fabric Managed Virtual Network (Mandatory for Prod)'),
|
|
|
createParagraph(
|
|
|
'Production Fabric Premium capacities are deployed within Fabric-managed VNets. Traffic is isolated and inspected:'
|
|
|
),
|
|
|
createBulletPoint('Outbound traffic restricted to approved destinations (e.g., Azure Storage, Databricks, SAS Viya only).'),
|
|
|
createBulletPoint('Internet egress blocked by default; exceptions require explicit approval.'),
|
|
|
createBulletPoint('Private endpoints for Databricks and SAS ensure traffic does not traverse the public internet.'),
|
|
|
|
|
|
createHeading2('7.3 Private Endpoints for Databricks & SAS Viya'),
|
|
|
createParagraph('Sensitive workloads (Unity Catalog access, regulated analytics) connect via private endpoints:'),
|
|
|
createBulletPoint('Databricks Private Endpoint: Fabric/SAS reaches Databricks SQL Warehouse without crossing the public internet.'),
|
|
|
createBulletPoint('SAS Private Endpoint: Viya services reached via Azure private link.'),
|
|
|
createBulletPoint('Network traffic logged in Azure Network Watcher; DLP tools monitor for suspicious egress patterns.'),
|
|
|
|
|
|
createHeading2('7.4 Azure Firewall for Databricks Egress'),
|
|
|
createParagraph(
|
|
|
'For Databricks clusters running in non-private mode, Azure Firewall enforces egress policies:'
|
|
|
),
|
|
|
createBulletPoint('Outbound destinations for Databricks clusters are whitelisted (e.g., Azure Storage, Greenfield APIs, approved data sources).'),
|
|
|
createBulletPoint('Attempts to exfiltrate data via curl, wget, or Python requests to external IPs are blocked and logged.'),
|
|
|
createBulletPoint('Example blocked egress: A user attempting to export a sensitive table to a personal AWS S3 bucket is denied.'),
|
|
|
|
|
|
createHeading2('7.5 DLP Integration with Microsoft 365 & Purview'),
|
|
|
createParagraph(
|
|
|
'Microsoft Purview DLP policies extend beyond Microsoft 365 to protect sensitive data exfiltrated through email or file sharing:'
|
|
|
),
|
|
|
createBulletPoint('Sensitive data labels (from Purview) applied to reports/datasets.'),
|
|
|
createBulletPoint('When a user attempts to email a Power BI report labeled "Restricted," Outlook DLP blocks the send.'),
|
|
|
createBulletPoint('When a user attempts to upload a CSV export to OneDrive, OneDrive DLP scans for Purview-labeled data and blocks if policy prohibits it.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 8
|
|
|
createHeading1('8. Privacy, Consent & Regulatory Compliance'),
|
|
|
createParagraph(
|
|
|
'Greenfield operates under strict Canadian privacy regulations (Law 25, PIPEDA) and banking regulations (OSFI, AMF). The security architecture must embed privacy controls directly.'
|
|
|
),
|
|
|
|
|
|
createHeading2('8.1 Law 25 & PIPEDA Compliance'),
|
|
|
createParagraph('Law 25 (Quebec) and PIPEDA (federal) require:'),
|
|
|
createBulletPoint('Consent management: Record and enforce user consent for data collection and use.'),
|
|
|
createBulletPoint('Purpose limitation: Data used only for purposes for which consent was granted.'),
|
|
|
createBulletPoint('Data minimization: Collect and retain only necessary data.'),
|
|
|
createBulletPoint('DSAR (Data Subject Access Request): Provide users with a copy of their personal data within 30 days.'),
|
|
|
createBulletPoint('Right to erasure: Delete user data upon request (with regulatory exceptions, e.g., audit logs).'),
|
|
|
|
|
|
createHeading2('8.2 Consent Management Integration'),
|
|
|
createParagraph(
|
|
|
'Greenfield integrates a consent management platform (CMP) with the data platform to enforce use-case restrictions:'
|
|
|
),
|
|
|
createBulletPoint('Consent record: Each customer has a consent record indicating which products and services they have consented to.'),
|
|
|
createBulletPoint('Data lineage integration: Datasets tagged with the business purposes they serve (e.g., "Risk Analytics," "Marketing Personalization").'),
|
|
|
createBulletPoint('Enforcement: When a user queries a dataset for an unapproved purpose, access is denied.'),
|
|
|
createBulletPoint('Example: A customer has consented to "Risk Analytics" but not "Marketing Personalization." Queries to the customer segment dataset from the Marketing team are logged and rejected.'),
|
|
|
|
|
|
createHeading2('8.3 DSAR (Data Subject Access Request) Automation'),
|
|
|
createParagraph(
|
|
|
'When a customer requests their data (via customer service), the system must locate all personal data and compile it within 30 days.'
|
|
|
),
|
|
|
createBulletPoint('Purview asset search: Automated query of Purview to find all assets tagged with a customer\'s ID.'),
|
|
|
createBulletPoint('Data lineage extraction: Trace data flow from source systems through Bronze, Silver, Gold layers; identify all transformations.'),
|
|
|
createBulletPoint('Aggregation & anonymization: Remove sensitive operational fields (e.g., database user IDs, internal notes).'),
|
|
|
createBulletPoint('Delivery: Package data as CSV/PDF and deliver securely to the customer.'),
|
|
|
|
|
|
createHeading2('8.4 Right to Erasure & Pseudonymization'),
|
|
|
createParagraph('When a customer requests deletion of their data:'),
|
|
|
createBulletPoint('Transactional data (Bronze/Silver): Records deleted or masked depending on retention policy and regulatory hold.'),
|
|
|
createBulletPoint('Analytical data (Gold): Customer records removed from aggregates or replaced with pseudonymous equivalents.'),
|
|
|
createBulletPoint('Audit logs: Retention per audit policy (typically 7 years); not deleted.'),
|
|
|
createBulletPoint('Tracking: Each deletion logged in Purview; data lineage updated to reflect the removal.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 9
|
|
|
createHeading1('9. Security Audit & Monitoring'),
|
|
|
createParagraph(
|
|
|
'Comprehensive audit logging and monitoring enable detection and response to security threats and compliance violations.'
|
|
|
),
|
|
|
|
|
|
createHeading2('9.1 Fabric Audit Logs'),
|
|
|
createParagraph('Fabric records all administrative actions and user activities:'),
|
|
|
createBulletPoint('Workspace creation/deletion, user additions/removals, role changes.'),
|
|
|
createBulletPoint('Item creation, modification, deletion.'),
|
|
|
createBulletPoint('Data refresh operations (including source connections and credentials used).'),
|
|
|
createBulletPoint('Power BI report views, row-level security changes.'),
|
|
|
createBulletPoint('Audit logs stored in the Audit Log Activity table (accessible via Power BI REST API and Log Analytics).'),
|
|
|
|
|
|
createHeading2('9.2 Databricks Audit Logs'),
|
|
|
createParagraph('Databricks records detailed access and governance events in the system audit log:'),
|
|
|
createBulletPoint('User login/logout, cluster creation/termination.'),
|
|
|
createBulletPoint('Notebook execution, SQL query runs.'),
|
|
|
createBulletPoint('Unity Catalog RLS/column mask changes.'),
|
|
|
createBulletPoint('Delta table modifications (INSERT, UPDATE, DELETE).'),
|
|
|
createBulletPoint('Logs streamed to Azure Log Analytics for downstream analysis and alerting.'),
|
|
|
|
|
|
createHeading2('9.3 Azure Activity Logs & Log Analytics'),
|
|
|
createParagraph('All Azure resource operations (Fabric Premium provisioning, Databricks workspace creation, Key Vault access) logged to Azure Activity Log:'),
|
|
|
createBulletPoint('Centralized ingestion to Azure Log Analytics workspace.'),
|
|
|
createBulletPoint('Queries via Kusto Query Language (KQL) for investigation and reporting.'),
|
|
|
createBulletPoint('Example query: Retrieve all user logins from outside Canada in the past 7 days.'),
|
|
|
|
|
|
createHeading2('9.4 Security Alerting Rules'),
|
|
|
createParagraph('Critical events trigger automated alerts:'),
|
|
|
createBulletPoint('Bulk user additions to privileged Entra groups.'),
|
|
|
createBulletPoint('RLS or column mask rules disabled or modified without change request approval.'),
|
|
|
createBulletPoint('Attempted access from suspicious IP addresses or non-compliant devices.'),
|
|
|
createBulletPoint('Databricks SQL Warehouse accessed outside business hours.'),
|
|
|
createBulletPoint('Alerts routed to Greenfield\'s Security Operations Center (SOC) for investigation.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 10
|
|
|
createHeading1('10. End-to-End Security Flow – Worked Examples'),
|
|
|
|
|
|
createHeading2('10.1 Scenario A: Analyst Queries Customer 360 via Fabric SQL Endpoint (OneLake Security)'),
|
|
|
createParagraph('An analyst in the Sales domain queries customer aggregates via Fabric SQL Analytics Endpoint:'),
|
|
|
createBulletPoint('User: Sarah (Sarah.Smith@greenfield.ca, member of Greenfield-Sales-FabricMember).'),
|
|
|
createBulletPoint('Dataset: Customer 360 (Internal classification), stored in a Lakehouse, exposed via SQL Analytics Endpoint.'),
|
|
|
createBulletPoint('Query: SELECT customer_id, annual_revenue, product_count FROM sales.customer_360 WHERE region = \'East\';'),
|
|
|
|
|
|
createParagraph('Security flow:'),
|
|
|
createBulletPoint('1. Authentication: Sarah signs in via Entra ID. Conditional Access verifies device compliance and MFA.'),
|
|
|
createBulletPoint('2. Workspace access: Entra checks if Sarah is a member of Greenfield-Sales-FabricMember. She is; workspace access granted.'),
|
|
|
createBulletPoint('3. Item permissions: Fabric checks if Sarah has "Editor" permission on the Customer 360 Lakehouse. Default workspace Member role grants this.'),
|
|
|
createBulletPoint('4. RLS evaluation: If an RLS rule is defined (e.g., "only Sales members see their region"), Fabric evaluates the predicate. Sarah\'s region is "East"; the filter applies.'),
|
|
|
createBulletPoint('5. Query execution: SQL Analytics Endpoint executes the query with the RLS filter applied.'),
|
|
|
createBulletPoint('6. Result return: 127 rows returned (only East region). Result cached in Fabric cache for 60 minutes.'),
|
|
|
createBulletPoint('7. Audit log: Fabric logs the query execution (who, what, when) to the Fabric audit table.'),
|
|
|
|
|
|
createParagraph('Security outcome: Sarah sees only the data she is authorized for, enforced transparently by RLS.'),
|
|
|
|
|
|
createHeading2('10.2 Scenario B: Data Engineer Accesses Restricted Customer PII via Databricks Unity Catalog'),
|
|
|
createParagraph('A data engineer in the Data Engineering domain needs to access customer SINs and addresses for a regulatory report. This data is classified as Restricted and governed by Unity Catalog:'),
|
|
|
createBulletPoint('User: Alex (Alex.Johnson@greenfield.ca, member of Greenfield-DataEng-DatabricksAdmin).'),
|
|
|
createBulletPoint('Dataset: customer_identity (Restricted classification), stored in Unity Catalog, with row filter and column mask applied.'),
|
|
|
createBulletPoint('Compute: Databricks SQL Warehouse (not Spark, due to no RLS in Spark).'),
|
|
|
|
|
|
createParagraph('Security flow:'),
|
|
|
createBulletPoint('1. Entra ID authentication: Alex signs in to Databricks via Entra ID SSO.'),
|
|
|
createBulletPoint('2. Workspace access: Databricks verifies Alex is a member of Greenfield-DataEng-DatabricksAdmin. Access granted.'),
|
|
|
createBulletPoint('3. Unity Catalog permission: Databricks checks if Alex has "USE_CATALOG" permission on the greenfield_curated catalog. As a member of the admin group, he does.'),
|
|
|
createBulletPoint('4. Row filter evaluation: Unity Catalog evaluates the rls_by_business_unit() predicate. Alex\'s business unit is "Data Engineering"; the filter returns only rows owned by Data Engineering.'),
|
|
|
createBulletPoint('5. Column mask evaluation: Unity Catalog evaluates the mask_sin() function. Alex is NOT a Data Privacy Officer, so his result sees "XXX-XX-1234" instead of the full SIN.'),
|
|
|
createBulletPoint('6. SQL query execution: The query executes with both row filter and column mask applied.'),
|
|
|
createBulletPoint('7. Result return: 45 rows (Data Engineering-owned customers only); SIN columns show masked values.'),
|
|
|
createBulletPoint('8. Audit log: Databricks logs the query to the system audit log, including the fact that RLS and DDM were applied.'),
|
|
|
|
|
|
createParagraph('Security outcome: Alex can access the data he needs for his role, but sensitive columns are masked. Row-level filtering prevents access to other business units\' data. All access is audited.'),
|
|
|
|
|
|
createHeading2('10.3 Scenario C: Attempt to Bypass Security via Spark Notebook (Prevented)'),
|
|
|
createParagraph('An analyst attempts to load sensitive data via a Spark notebook:'),
|
|
|
createBulletPoint('User: Jordan (Jordan.Lee@greenfield.ca, member of Greenfield-Risk-FabricMember).'),
|
|
|
createBulletPoint('Attempted access: Spark notebook in Fabric attempting to load the Unity Catalog table greenfield_curated.customer.customer_identity.'),
|
|
|
createBulletPoint('Code: df = spark.read.table("greenfield_curated.customer.customer_identity")'),
|
|
|
|
|
|
createParagraph('Security enforcement:'),
|
|
|
createBulletPoint('1. Workspace access: Jordan has Member role in Risk workspace; access granted to the notebook.'),
|
|
|
createBulletPoint('2. Spark initialization: Notebook attempts to attach to a Fabric Spark cluster.'),
|
|
|
createBulletPoint('3. Unity Catalog connectivity: Notebook requests a connection string to the Unity Catalog table.'),
|
|
|
createBulletPoint('4. Access denial: Databricks checks if Jordan has "SELECT" permission on the table. Jordan is a member of Greenfield-Risk-FabricMember, but the table is restricted to Greenfield-DataEng-DatabricksAdmin. Access denied.'),
|
|
|
createBulletPoint('5. Spark error: Notebook execution fails with error: "PERMISSION_DENIED: User does not have the SELECT privilege."'),
|
|
|
createBulletPoint('6. Audit: Denied access attempt logged to Databricks audit log.'),
|
|
|
|
|
|
createParagraph('Mitigation effectiveness: The combination of Databricks workspace permissions and Unity Catalog RBAC prevents the analyst from ever seeing the data, even in Spark where RLS/DDM would not apply.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 11
|
|
|
createHeading1('11. Security Governance & Operating Model'),
|
|
|
createParagraph(
|
|
|
'Security is not a one-time configuration; it requires ongoing governance, periodic audits, and rapid incident response. This section outlines roles, processes, and tools for continuous security management.'
|
|
|
),
|
|
|
|
|
|
createHeading2('11.1 Roles & Responsibilities')
|
|
|
);
|
|
|
|
|
|
const governanceRolesTable = createTable(
|
|
|
['Role', 'Responsibility', 'Frequency'],
|
|
|
[
|
|
|
['Chief Information Security Officer (CISO)', 'Sets security policy and risk tolerance; approves major security decisions.', 'Quarterly reviews'],
|
|
|
['Data Security Officer (DSO)', 'Owns data classification, access reviews, and DLP policies.', 'Monthly audits'],
|
|
|
['Fabric Workspace Admin', 'Creates/deletes workspaces; manages workspace-level RBAC; configures audit logging.', 'Ad-hoc (change management)'],
|
|
|
['Databricks Admin', 'Creates/manages Databricks clusters and jobs; configures Unity Catalog; manages Databricks audit logs.', 'Ad-hoc (change management)'],
|
|
|
['Data Steward (per domain)', 'Owns a domain\'s data assets; approves data access requests; responds to DLP alerts.', 'Weekly (as needed)'],
|
|
|
['Security Operations Center (SOC)', 'Monitors security alerts; investigates suspicious activity; responds to incidents.', 'Continuous'],
|
|
|
]
|
|
|
);
|
|
|
|
|
|
children.push(governanceRolesTable);
|
|
|
children.push(
|
|
|
createHeading2('11.2 Quarterly Access Reviews'),
|
|
|
createParagraph(
|
|
|
'Quarterly reviews ensure that RBAC assignments remain appropriate and that no users retain unnecessary privileges:'
|
|
|
),
|
|
|
createBulletPoint('Process: Generate a report of all Entra group memberships and Fabric/Databricks role assignments.'),
|
|
|
createBulletPoint('Owner review: Each data steward reviews their domain\'s assignments.'),
|
|
|
createBulletPoint('Remediation: Remove any assignments that are no longer justified.'),
|
|
|
createBulletPoint('Attestation: Owner signs off on the review; record archived for audit.'),
|
|
|
createBulletPoint('Tool: Automated via Azure Access Reviews; assignments flagged if unchanged for >6 months.'),
|
|
|
|
|
|
createHeading2('11.3 Change Management for Security Policies'),
|
|
|
createParagraph(
|
|
|
'Changes to RLS rules, column masks, and Entra group assignments are tracked in Git and applied via Infrastructure-as-Code (IaC):'
|
|
|
),
|
|
|
createBulletPoint('Git repository: All policy changes (YAML manifests for RBAC, T-SQL for RLS, DAX for Power BI roles) stored in Git.'),
|
|
|
createBulletPoint('Code review: Changes require peer review and approval from a data steward before merge.'),
|
|
|
createBulletPoint('Automated apply: Merge to main triggers Azure DevOps pipeline to apply policies to Fabric/Databricks.'),
|
|
|
createBulletPoint('Audit trail: Git history + pipeline logs provide immutable record of who changed what and when.'),
|
|
|
|
|
|
createHeading2('11.4 Incident Response'),
|
|
|
createParagraph('When a security incident is detected (e.g., unauthorized access, attempted data exfiltration):'),
|
|
|
createBulletPoint('Alert triage: SOC examines alert; determines severity and scope.'),
|
|
|
createBulletPoint('Containment: Immediately revoke access for affected user; isolate affected cluster/notebook.'),
|
|
|
createBulletPoint('Investigation: Query audit logs (Fabric, Databricks, Azure) to determine extent of access and data touched.'),
|
|
|
createBulletPoint('Notification: Notify data steward and CISO; determine if regulatory reporting required.'),
|
|
|
createBulletPoint('Remediation: Update policies to prevent recurrence; user re-trained or off-boarded.'),
|
|
|
createBulletPoint('Post-incident review: Document findings and lessons learned; update security runbooks.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Section 12
|
|
|
createHeading1('12. Comparison with Databricks-Primary Security Model'),
|
|
|
createParagraph(
|
|
|
'The Fabric-primary and Databricks-primary architectures differ fundamentally in their security enforcement planes. This section highlights key differences to inform architecture selection and migration planning.'
|
|
|
)
|
|
|
);
|
|
|
|
|
|
const comparisonTable = createTable(
|
|
|
['Aspect', 'Databricks-Primary', 'Fabric-Primary', 'Implication'],
|
|
|
[
|
|
|
['Primary enforcement', 'Unity Catalog (row filters, column masks) on all data.', 'OneLake security (Fabric RBAC, RLS, DDM) for majority; Unity Catalog retained for sensitive.', 'Fabric-primary reduces operational complexity for non-sensitive data but introduces engine-specific gaps.'],
|
|
|
['RLS mechanism', 'SQL row filter predicates in Unity Catalog; uniform across all engines.', 'Power BI RLS (DAX); SQL RLS predicates (T-SQL); Spark has no equivalent.', 'Fabric-primary requires architecture to route sensitive data around Spark.'],
|
|
|
['Column masking', 'Unity Catalog column masks; applied uniformly.', 'Fabric DDM on SQL endpoint + Power BI default; Spark sees unmasked data.', 'Same gap: Spark notebooks bypass masking; mitigate via separate Lakehouses.'],
|
|
|
['Public data storage', 'Still in Databricks; UC rows/cols define access.', 'Stored in OneLake; lower governance overhead.', 'Fabric-primary simpler for low-sensitivity workloads.'],
|
|
|
['Restricted data path', 'Databricks SQL Warehouse + Unity Catalog.', 'Databricks SQL Warehouse + Unity Catalog (same).', 'No difference for truly sensitive data.'],
|
|
|
['BI integration', 'Power BI with Databricks connector; RLS policy per BI model.', 'Native Power BI + Fabric SQL; RLS built into DAX.', 'Fabric-primary tighter integration; easier to govern.'],
|
|
|
['Operational cost', 'Higher: Unity Catalog governance on all datasets.', 'Lower: OneLake RBAC simpler than UC for 80% of data.', 'Fabric-primary reduces TCO for low-sensitivity assets.'],
|
|
|
['Audit trail', 'Databricks audit logs comprehensive.', 'Fabric audit logs + Databricks audit logs (dual system).', 'Fabric-primary requires monitoring two platforms.'],
|
|
|
]
|
|
|
);
|
|
|
|
|
|
children.push(comparisonTable);
|
|
|
children.push(
|
|
|
createHeading2('12.1 Migration Implications: Databricks-Primary to Fabric-Primary'),
|
|
|
createParagraph(
|
|
|
'Organizations currently on Databricks-primary considering a transition to Fabric-primary must address the architectural gaps:'
|
|
|
),
|
|
|
createBulletPoint('Step 1: Classify all datasets; identify which rely on Spark for core analytics.'),
|
|
|
createBulletPoint('Step 2: For datasets moving to Fabric, replace Spark dependencies with SQL or Power BI; test RLS rules thoroughly.'),
|
|
|
createBulletPoint('Step 3: Move a non-critical domain (e.g., Marketing analytics) to Fabric first; validate security controls.'),
|
|
|
createBulletPoint('Step 4: Migrate remaining domains in waves; parallel run Databricks until cutover proven.'),
|
|
|
createBulletPoint('Step 5: Sunset Databricks security rules for migrated datasets; maintain UC-governed tables in "legacy" Databricks workspace for historical queries only.'),
|
|
|
|
|
|
new PageBreak(),
|
|
|
|
|
|
// Conclusion
|
|
|
createHeading1('13. Conclusion'),
|
|
|
createParagraph(
|
|
|
'The Fabric-primary security model rebalances Greenfield\'s data platform architecture, shifting the bulk of enforcement to Fabric\'s OneLake and Power BI layers while retaining Databricks Unity Catalog as a specialized enforcement plane for the most sensitive datasets. This dual-enforcement approach reduces operational overhead for the majority of analytics workloads while maintaining the granular access control required for regulated and PII-heavy data.'
|
|
|
),
|
|
|
createParagraph(
|
|
|
'The critical architectural gap—that Fabric Spark notebooks bypass RLS, DDM, and column-level permissions—must be managed through deliberate design choices: separate Lakehouses for sensitive data, SQL-only access paths for restricted datasets, and rigorous notebook governance. When these controls are properly implemented, the Fabric-primary architecture delivers a more integrated, maintainable, and cost-effective security posture than Databricks-primary for a large regulated financial institution like Greenfield.'
|
|
|
),
|
|
|
createParagraph(
|
|
|
'Implementation of this security model requires close collaboration between data engineering, security, compliance, and business stakeholders. Quarterly access reviews, change management via IaC, and continuous monitoring via Fabric and Databricks audit logs ensure that security policies remain effective and audit-ready.'
|
|
|
),
|
|
|
);
|
|
|
|
|
|
const doc = new Document({
|
|
|
sections: [
|
|
|
{
|
|
|
margins: {
|
|
|
top: 1440,
|
|
|
right: 1440,
|
|
|
bottom: 1440,
|
|
|
left: 1440,
|
|
|
},
|
|
|
children: children,
|
|
|
},
|
|
|
],
|
|
|
});
|
|
|
|
|
|
// Create output directory if needed
|
|
|
const outputDir = '/sessions/dreamy-great-hypatia/mnt/mdp/Fabric prime';
|
|
|
if (!fs.existsSync(outputDir)) {
|
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
|
}
|
|
|
|
|
|
// Write document
|
|
|
const outputPath = '/sessions/dreamy-great-hypatia/mnt/mdp/Fabric prime/02_Appendix_A_Security_Privacy_Model_Fabric_Primary_v1.docx';
|
|
|
Packer.toBuffer(doc).then((buffer) => {
|
|
|
fs.writeFileSync(outputPath, buffer);
|
|
|
console.log(`Document created successfully: ${outputPath}`);
|
|
|
process.exit(0);
|
|
|
}).catch((err) => {
|
|
|
console.error('Error creating document:', err);
|
|
|
process.exit(1);
|
|
|
});
|