Wednesday, 28 January 2015

Insert unknown dimension record for all dimension tables

Case
I have a lot of dimension packages in SSIS that all insert a default record for unknown dimension values. It's a lot of repetitive and boring work. Is there an alternative for creating an insert query manually?
A typical dimension package





















Solution
Instead of creating an insert query manually for each dimension table you could also create a Stored Procedure to do this for you. Instead of the insert query in the Execute SQL Task you execute this Stored Procedure in the Execute SQL Task.
-- TSQL code
USE [datamart]
GO

/****** datamart:  StoredProcedure [dbo].[InsertUnknownDimensionRow]   ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE PROCEDURE [dbo].[InsertUnknownDimensionRow](@TableName nvarchar(128))
AS
BEGIN

 -- This Stored Procedure inserts a record in the dimension table
 -- for unknown dimension values. It generates an insert statement
 -- based on the column datatypes and executes it.
 -- The integer column with identity enabled gets the value -1 and
 -- all other columns get a default value based on their datatype.
 -- Columns with a default value are ignored.

 -- Create temporary table for column specs of dimension table
 DECLARE @TableSpecs TABLE (
        COLUMN_ID     int identity,
        COLUMN_NAME     nvarchar(128),
        DATA_TYPE     nvarchar(128),
        CHARACTER_MAXIMUM_LENGTH int,
        COLUMN_IS_IDENTITY   bit
          )

 -- Use the information schema to get column info and insert it
 -- to the temporary table.
 INSERT              @TableSpecs
 SELECT              C.COLUMN_NAME
 ,                   C.DATA_TYPE
 ,                   C.CHARACTER_MAXIMUM_LENGTH
 ,                   columnproperty(object_id(C.TABLE_SCHEMA + '.' + C.TABLE_NAME)
      , C.COLUMN_NAME, 'IsIdentity') AS COLUMN_IS_IDENTITY
 FROM                INFORMATION_SCHEMA.COLUMNS C
 WHERE               QUOTENAME(C.TABLE_NAME) = QUOTENAME(@TableName)
 AND                 C.COLUMN_DEFAULT IS NULL
 ORDER BY            C.ORDINAL_POSITION

 -- Variables to keep track of the number of columns
 DECLARE @ColumnId INT
 SET @ColumnId = -1

 DECLARE @ColumnCount INT
 SET @ColumnCount = 0

 -- Variables to create the insert query
 DECLARE @INSERTSTATEMENT_START nvarchar(max)
 DECLARE @INSERTSTATEMENT_END nvarchar(max)

 SET @INSERTSTATEMENT_START = 'INSERT INTO ' + QUOTENAME(@TableName) + ' ('
 SET @INSERTSTATEMENT_END = 'VALUES ('

 -- Variables to complete the insert query with
 -- extra enable and disable identity statements
 -- You could add an extra check in the loop to
 -- make sure there is an identity column in the
 -- table. Otherwise the SET IDENTITY_INSERT
 -- statement will fail.
 DECLARE @IDENITYSTATEMENT_ON nvarchar(255)
 DECLARE @IDENITYSTATEMENT_OFF nvarchar(255)

 SET @IDENITYSTATEMENT_ON = 'SET IDENTITY_INSERT ' + QUOTENAME(@TableName) + ' ON;'
 SET @IDENITYSTATEMENT_OFF = 'SET IDENTITY_INSERT ' + QUOTENAME(@TableName) + ' OFF;'

 -- Variables filled and use the WHILE loop
 DECLARE @COLUMN_NAME VARCHAR(50)
 DECLARE @DATA_TYPE VARCHAR(50)
 DECLARE @CHARACTER_MAXIMUM_LENGTH INT
 DECLARE @COLUMN_IS_IDENTITY BIT

 -- WHILE loop to loop through all columns and
 -- create a insert query with the columns
 WHILE @ColumnId IS NOT NULL
 BEGIN
   -- Keep track of the number of columns
   SELECT @ColumnId = MIN(COLUMN_ID)
   ,       @ColumnCount = @ColumnCount + 1
   FROM    @TableSpecs
   WHERE   COLUMN_ID > @ColumnCount

   -- Check if there are any columns left
   IF @ColumnId IS NULL
   BEGIN
    -- No columns left, break loop
    BREAK
   END
   ELSE
   BEGIN
    -- Get info for column number x
    SELECT       @COLUMN_NAME = COLUMN_NAME
    ,            @DATA_TYPE = DATA_TYPE
    ,            @CHARACTER_MAXIMUM_LENGTH = CHARACTER_MAXIMUM_LENGTH
    ,            @COLUMN_IS_IDENTITY = COLUMN_IS_IDENTITY
    FROM         @TableSpecs
    WHERE        COLUMN_ID = @ColumnCount
   END
       
   -- Start building the begin of the statement (same for each column)
   SET @INSERTSTATEMENT_START = @INSERTSTATEMENT_START + @COLUMN_NAME + ','

   -- Start building the end of the statement (the default values)
   IF @COLUMN_IS_IDENTITY = 1
   BEGIN
    -- Default value if the current column is the identity column
    SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '-1,'
   END
             
   IF @DATA_TYPE IN ('int', 'numeric', 'decimal', 'money', 'float', 'real', 'bigint', 'smallint', 'tinyint', 'smallmoney') AND (@COLUMN_IS_IDENTITY = 0)
   BEGIN
    -- Default value if the current column is a numeric column,
    -- but not an identity: zero
    SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '0,'
   END

   IF @DATA_TYPE IN ('char', 'nchar', 'varchar', 'nvarchar')
   BEGIN
    -- Default value if the current column is a text column
    -- Part of the text "unknown" depending on the length
    SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '''' + LEFT('Unknown', @CHARACTER_MAXIMUM_LENGTH) + ''','
   END

   IF @DATA_TYPE IN ('datetime', 'date', 'timestamp', 'datatime2', 'datetimeoffset', 'smalldatetime', 'time') 
   BEGIN
    -- Default value if the current column is a datetime column
    -- First of january 1900
    SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '''' + CONVERT(varchar, CONVERT(date, 'Jan 1 1900')) + ''','
   END

   IF @DATA_TYPE = 'bit' 
   BEGIN
    -- Default value if the current column is a boolean 
    SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '0,'
   END
 END

 -- Remove last comma from start and end part of the insert statement
 SET @INSERTSTATEMENT_START = LEFT(@INSERTSTATEMENT_START, LEN(@INSERTSTATEMENT_START) - 1) + ')'
 SET @INSERTSTATEMENT_END = LEFT(@INSERTSTATEMENT_END, LEN(@INSERTSTATEMENT_END) - 1) + ');'

 -- Execute the complete statement
 EXEC (@IDENITYSTATEMENT_ON + ' ' + @INSERTSTATEMENT_START + ' ' + @INSERTSTATEMENT_END + ' ' + @IDENITYSTATEMENT_OFF)
      
END

GO
-- Tweak the code for your own needs and standards
-- Optional extra check if you don't want to truncate
-- your dimensions: is there already a default/unknown
-- record available

Execute Stored Procedure


















Note: only the most common datatypes are handled. Add more if-statements if you expect data types like varbinary, xml, image or sql_variant

1 comment:

Please use the SSIS MSDN forum for general SSIS questions that are not about this post. I'm a regular reader of that forum and will gladly answer those questions over there.

All comments are moderated manually to prevent spam.