Recently I got a question from our team member on how are we managing, delta data merging to master data and how do we do synch with source data periodically. It is very big topic, but How do I put them all in 15 minute discussion…The following example I showed to him and he got the insight into it.
The following script, which talks about common scenario for most of the projects. Hope any one who is get starting with Warehouse projects.
-------------------------------------
-- Assume scenario where you get source data as
-- single flat table, you need to extract the masters
-- create surrogate keys and handle INSERT / UPDATE for repeated runs.
-- Source Table: tblSource
-- Target Table: dimPlan, factTransaction
-- Temp tables: wrkSource, wrkPlan
-------------------------------------
----------------
-- SOURCE TABLE
----------------
IF OBJECT_ID('tblSource') IS NOT NULL DROP TABLE tblSource
CREATE TABLE tblSource
(
Id INT,
Name VARCHAR(100),
PlanCode CHAR(2),
PlanDesc VARCHAR(200),
)
----------------
-- TEMP TABLE
----------------
IF OBJECT_ID('wrkSource') IS NOT NULL DROP TABLE wrkSource
-- 1. Transaction
CREATE TABLE wrkSource
(
Id INT,
Name VARCHAR(100),
PlanKey INT
)
IF OBJECT_ID('wrkPlan') IS NOT NULL DROP TABLE wrkPlan
-- 2. Master
CREATE TABLE wrkPlan
(
PlanCode CHAR(2),
PlanDesc VARCHAR(200)
)
----------------
-- TARGET TABLE
----------------
IF OBJECT_ID('dimPlan') IS NOT NULL DROP TABLE dimPlan
-- 1. Master
CREATE TABLE dimPlan
(
Id INT IDENTITY(1,1),
PlanCode CHAR(2),
PlanDesc VARCHAR(200)
)
IF OBJECT_ID('factTransaction') IS NOT NULL DROP TABLE factTransaction
-- 2. Transaction
CREATE TABLE factTransaction
(
Id INT,
Name VARCHAR(100),
PlanKey INT
)
----------------
-- MERGE SP
----------------
DROP PROC MG
GO
CREATE PROC MG
AS
BEGIN
-- Extract Master
TRUNCATE TABLE wrkPlan
INSERT INTO wrkPlan(PlanCode, PlanDesc)
SELECT PlanCode, PlanDesc
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY PlanCode ORDER BY PlanDesc) RNO, PlanCode, PlanDesc
FROM tblSource) S
WHERE RNO =1
-- Merge master
;MERGE dimPlan AS T
USING wrkPlan AS S
ON T.PlanCode = S.PlanCode
WHEN MATCHED THEN UPDATE SET T.PlanDesc = S.PlanDesc
WHEN NOT MATCHED THEN INSERT (PlanCode, PlanDesc) VALUES(S.PlanCode, S.PlanDesc);
-- Merge master --- IF TYPE 2 ON INCLUDE DESC
--;MERGE dimPlan AS T
--USING wrkPlan AS S
--ON T.PlanCode = S.PlanCode
--WHEN MATCHED THEN UPDATE SET T.PlanDesc = S.PlanDesc
--WHEN NOT MATCHED THEN INSERT (PlanCode, PlanDesc) VALUES(S.PlanCode, S.PlanDesc);
-- Extract transaction
TRUNCATE TABLE wrkSource
INSERT INTO wrkSource (id,Name, PlanKey)
SELECT s.id, S.Name, P.Id
FROM tblSource S
JOIN dimPlan P ON S.PlanCode = P.PlanCode
-- Merge transaction
MERGE factTransaction AS T
USING wrkSource AS S
ON T.ID = S.ID
WHEN MATCHED THEN UPDATE SET id = s.id, Name = S.Name, PlanKey = S.PlanKey
WHEN NOT MATCHED THEN INSERT (id,NAME, plankey) VALUES (s.ID,S.NAME, s.plankey);
END
go
----------------
-- UNIT TEST
----------------
SELECT 'Unit test 1. New data' UNIT_TEST
INSERT INTO tblSource VALUES
(1,'RAM','aa','aaaaaaa'),
(2,'RAMA','aa','aaaaaaa'),
(3,'RAMAN','bb','bbbbbbb'),
(4,'RAMU','bb','bbbbbbb')
go
SELECT * FROM tblSource
EXEC MG
SELECT * FROM factTransaction
SELECT * FROM dimPlan
SELECT 'Unit test 2. Data change' UNIT_TEST
INSERT INTO tblSource VALUES (5,'RAddM','aa','aaaaaaa')
UPDATE tblSource SET NAME = 'UPDATED...', PlanCode = 'BB', PlanDesc = 'aaUPDATED TO BBBBB' WHERE ID =2
SELECT * FROM tblSource
EXEC MG
SELECT * FROM factTransaction
SELECT * FROM dimPlan
No comments:
Post a Comment