1

MERGE特定のディメンション/ステージング テーブル ダンプ間の SCD Type 1 更新を処理する TSQL ステートメントを動的に生成するストアド プロシージャがあります。

T-SQL コードは次のとおりです (元のコードは Alex Whittles によって変更されています)。

USE [OPPY_DWUSD]
GO

ALTER PROCEDURE [dbo].[GenerateMerge]
/***************************************************************
*                                                              *
*   Script for use with blog post                              *
*     "Automating T-SQL Merge to load Dimensions (SCD)"        *
*     http://www.purplefrogsystems.com/blog/2012/04/automating-t-sql-merge-to-load-dimensions-scd
*                                                              *   
*   Posted: 6th April 2012                                     *
*                                                              *
*   By: Alex Whittles - Purple Frog Business Intelligence      *
*       www.PurpleFrogSystems.com                              *
*                                                              *
*   All code samples are provided “AS IS” without warranty of  *
*   any kind, either express or implied, including but not     *
*   limited to the implied warranties of merchantability       *
*   and/or fitness for a particular purpose.                   *
*                                                              *
***************************************************************/

@Dimension  varchar(50),
@Schema     varchar(50),
@ETLTable   varchar(50),
@ETLSchema  varchar(50),
@Execute    bit=0  --Should the resulting merge be returned or executed
AS
BEGIN
SET NOCOUNT ON;

--Create Carriage return variable to format the resulting query
DECLARE @crlf char(2)
SET @crlf = CHAR(13)


--Find out which Audit fields are used
DECLARE @UseIsInferred bit
DECLARE @UseFirstCreated bit
DECLARE @UseValidTo bit
DECLARE @UseIsRowCurrent bit
DECLARE @UseLastUpdated bit

SET @UseIsInferred = ISNULL((SELECT MAX(1) 
FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE       s.name = @Schema
        AND t.name = @Dimension
        AND c.name = 'IsInferred'
),0)

SET @UseFirstCreated= ISNULL((SELECT MAX(1) 
FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE       s.name = @Schema
        AND t.name = @Dimension
        AND c.name = 'FirstCreated'
),0)

SET @UseValidTo = ISNULL((SELECT MAX(1) 
FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE       s.name = @Schema
        AND t.name = @Dimension
        AND c.name = 'ValidTo'
),0)

SET @UseIsRowCurrent = ISNULL((SELECT MAX(1) 
FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE       s.name = @Schema
        AND t.name = @Dimension
        AND c.name = 'IsRowCurrent'
),0)

SET @UseLastUpdated = ISNULL((SELECT MAX(1) 
FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE       s.name = @Schema
        AND t.name = @Dimension
        AND c.name = 'LastUpdated'
),0)


--Identify the business key column(s)
--Also define what the null replacement should be
DECLARE myCurBK Cursor FOR 
SELECT c.name
    , CASE WHEN ty.name IN ('text', 'ntext', 'varchar', 'char', 'nvarchar', 'nchar') THEN ''''''
        WHEN ty.name IN ('tinyint', 'smallint', 'int', 'real', 'money', 'float', 'bit', 'decimal', 'numeric','smallmoney','bigint') THEN '0'
        WHEN ty.name IN ('date', 'datetime') THEN '''19000101'''
        ELSE 'NULL' END AS NullRep
FROM sys.columns c
    INNER JOIN sys.tables t on c.object_id = t.object_id
    INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
    INNER JOIN sys.types ty ON c.user_type_id = ty.user_type_id
    INNER JOIN sys.extended_properties ep 
         ON t.object_id=ep.major_id
        AND c.column_id=ep.minor_id
        AND ep.class=1
        AND ep.name='SCD'
WHERE s.name = @Schema
    AND t.name = @Dimension
    AND ep.value = 'BK'
ORDER BY c.column_id

--Identify all fields to be merged (Exclude Type 0)
DECLARE myCurType1 Cursor
FOR SELECT c.name
    , CASE WHEN ty.name IN ('text', 'ntext', 'varchar', 'char', 'nvarchar', 'nchar') THEN ''''''
        WHEN ty.name IN ('tinyint', 'smallint', 'int', 'real', 'money', 'float', 'bit', 'decimal', 'numeric','smallmoney','bigint') THEN '0'
        WHEN ty.name IN ('date', 'datetime') THEN '''19000101'''
        ELSE 'NULL' END AS NullRep
    FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
        INNER JOIN sys.types ty ON c.user_type_id = ty.user_type_id
        LEFT JOIN sys.extended_properties ep 
             ON t.object_id=ep.major_id
            AND c.column_id=ep.minor_id
            AND ep.class=1
            AND ep.name='SCD'
    WHERE s.name = @Schema
        AND t.name = @Dimension
        AND c.is_identity=0
        AND ISNULL(ep.value,'1') NOT IN ('0', 'Audit', 'BK')
    ORDER BY c.column_id ASC

--Identify all fields for insert
DECLARE myCurAll Cursor
FOR SELECT c.name
    , CASE WHEN ty.name IN ('text', 'ntext', 'varchar', 'char', 'nvarchar', 'nchar') THEN ''''''
        WHEN ty.name IN ('tinyint', 'smallint', 'int', 'real', 'money', 'float', 'bit', 'decimal', 'numeric','smallmoney','bigint') THEN '0'
        WHEN ty.name IN ('date', 'datetime') THEN '''19000101'''
        ELSE 'NULL' END AS NullRep
    FROM sys.columns c
        INNER JOIN sys.tables t on c.object_id = t.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
        INNER JOIN sys.types ty ON c.user_type_id = ty.user_type_id
    WHERE s.name = @Schema
        AND t.name = @Dimension
        AND c.name NOT IN ('LastUpdated', 'IsInferred', 'FirstCreated', 'ValidTo', 'IsRowCurrent')
        AND c.is_identity=0
    ORDER BY c.column_id ASC

DECLARE @Field varchar(255)
DECLARE @NullRep varchar(20)
DECLARE @SQL varchar(max)
DECLARE @SQL2 varchar(max)
DECLARE @SQL3 varchar(max)

--Now start building up the dynamic SQL

SET @SQL ='MERGE [' + @Schema + '].[' + @Dimension + '] AS Target'
SET @SQL = @SQL + @crlf + 'USING [' + @ETLSchema + '].[' + @ETLTable + '] AS Source'

OPEN myCurBK
FETCH NEXT FROM myCurBK INTO @Field, @NullRep
IF (@@FETCH_STATUS>=0)
    BEGIN
        SET @SQL = @SQL + @crlf + '       ON Target.' + '[' + @Field + ']' +  ' = Source.' + '[' + @Field + ']'
        FETCH NEXT FROM myCurBK INTO @Field, @NullRep
    END
WHILE (@@FETCH_STATUS<>-1)
BEGIN
    IF (@@FETCH_STATUS<>-2)
        SET @SQL = @SQL + @crlf + '       AND Target.' + '[' + @Field + ']' +  ' = Source.' + '[' + @Field + ']'
    FETCH NEXT FROM myCurBK INTO @Field, @NullRep
END
CLOSE myCurBK

IF @UseIsRowCurrent>0   SET @SQL = @SQL + @crlf + '       AND Target.IsRowCurrent=1'

SET @SQL = @SQL + @crlf + '    WHEN MATCHED'

OPEN myCurType1
FETCH NEXT FROM myCurType1 INTO @Field, @NullRep

IF (@@FETCH_STATUS>=0)
    BEGIN
        SET @SQL = @SQL + @crlf + '       AND (ISNULL(Target.' + '[' + @Field + ']' + ',' + @NullRep + ') <> ISNULL(Source.' + '[' + @Field + ']' + ',' + @NullRep + ')'
        FETCH NEXT FROM myCurType1 INTO @Field, @NullRep
    END
WHILE (@@FETCH_STATUS<>-1)
BEGIN
    IF (@@FETCH_STATUS<>-2)
        SET @SQL = @SQL + @crlf + '       OR ISNULL(Target.' + '[' + @Field + ']' + ',' + @NullRep + ') <> ISNULL(Source.' + '[' + @Field + ']' + ',' + @NullRep + ')'
    FETCH NEXT FROM myCurType1 INTO @Field, @NullRep
END
CLOSE myCurType1

SET @SQL = @SQL + @crlf + '      )'
SET @SQL2 = '    THEN UPDATE SET'

OPEN myCurType1
FETCH NEXT FROM myCurType1 INTO @Field, @NullRep
IF (@@FETCH_STATUS>=0)
    BEGIN
        SET @SQL2 = @SQL2 + @crlf + '       ' + '[' + @Field + ']' + ' = Source.' + '[' + @Field + ']'
        FETCH NEXT FROM myCurType1 INTO @Field, @NullRep
    END
WHILE (@@FETCH_STATUS<>-1)
BEGIN
    IF (@@FETCH_STATUS<>-2)
        SET @SQL2 = @SQL2 + @crlf + '      ,' + '[' + @Field + ']' + ' = Source.' + '[' + @Field + ']'
    FETCH NEXT FROM myCurType1 INTO @Field, @NullRep
END
CLOSE myCurType1

IF @UseLastUpdated>0   SET @SQL2 = @SQL2 + @crlf + '      ,LastUpdated = GetDate()'

SET @SQL3 = '    WHEN NOT MATCHED THEN'
SET @SQL3 = @SQL3 + @crlf + '         INSERT ('

OPEN myCurAll
FETCH NEXT FROM myCurAll INTO @Field, @NullRep
IF (@@FETCH_STATUS>=0)
    BEGIN
        SET @SQL3 = @SQL3 + @crlf + '           ' + '[' + @Field + ']'
        FETCH NEXT FROM myCurAll INTO @Field, @NullRep
    END
WHILE (@@FETCH_STATUS<>-1)
BEGIN
    IF (@@FETCH_STATUS<>-2)
        SET @SQL3 = @SQL3 + @crlf + '           ,' + '[' + @Field + ']'
    FETCH NEXT FROM myCurAll INTO @Field, @NullRep
END
CLOSE myCurAll


IF @UseIsInferred>0     SET @SQL3 = @SQL3 + @crlf + '           ,IsInferred'
IF @UseFirstCreated>0   SET @SQL3 = @SQL3 + @crlf + '           ,FirstCreated'
IF @UseValidTo>0        SET @SQL3 = @SQL3 + @crlf + '           ,ValidTo'
IF @UseIsRowCurrent>0   SET @SQL3 = @SQL3 + @crlf + '           ,IsRowCurrent'
IF @UseLastUpdated>0    SET @SQL3 = @SQL3 + @crlf + '           ,LastUpdated'
SET @SQL3 = @SQL3 + @crlf + '         ) VALUES ('

OPEN myCurAll
FETCH NEXT FROM myCurAll INTO @Field, @NullRep
IF (@@FETCH_STATUS>=0)
    BEGIN
        SET @SQL3 = @SQL3 + @crlf + '            Source.' + '[' + @Field + ']'
        FETCH NEXT FROM myCurAll INTO @Field, @NullRep
    END
WHILE (@@FETCH_STATUS<>-1)
BEGIN
    IF (@@FETCH_STATUS<>-2)
        SET @SQL3 = @SQL3 + @crlf + '           ,Source.' + '[' + @Field + ']'
    FETCH NEXT FROM myCurAll INTO @Field, @NullRep
END
CLOSE myCurAll    

IF @UseIsInferred>0     SET @SQL3 = @SQL3 + @crlf + '           ,0'
IF @UseFirstCreated>0   SET @SQL3 = @SQL3 + @crlf + '           ,GetDate()'
IF @UseValidTo>0        SET @SQL3 = @SQL3 + @crlf + '           ,NULL'
IF @UseIsRowCurrent>0   SET @SQL3 = @SQL3 + @crlf + '           ,1'
IF @UseLastUpdated>0    SET @SQL3 = @SQL3 + @crlf + '           ,GetDate()'
SET @SQL3 = @SQL3 + @crlf + '         );'

--clean up
DEALLOCATE myCurType1
DEALLOCATE myCurAll
DEALLOCATE myCurBK

IF @Execute = 1
BEGIN
    EXEC(@SQL + @SQL2 + @SQL3)           
END
ELSE
BEGIN    
    PRINT @SQL
    PRINT @SQL2
    PRINT @SQL3
END

END

上記のストアド プロシージャで変数を渡すと、次のコードが生成されます。

MERGE [DIM].[COMPANY] AS Target
USING [DBO].[DWUSD_LIVE] AS Source
    ON Target.[comp] = Source.[comp]
 WHEN MATCHED
    AND (ISNULL(Target.[comp name],'') <> ISNULL(Source.[comp name],'')
    OR ISNULL(Target.[comp description],'') <> ISNULL(Source.[comp description],'')
   )
THEN UPDATE SET
    [comp name] = Source.[comp name]
   ,[comp description] = Source.[comp description]
   ,LastUpdated = GetDate()
WHEN NOT MATCHED THEN
      INSERT (
        [comp]
        ,[comp name]
        ,[comp description]
        ,FirstCreated
        ,LastUpdated
      ) VALUES (
         Source.[comp]
        ,Source.[comp name]
        ,Source.[comp description]
        ,GetDate()
        ,GetDate()
      );

私ができる必要があるDISTINCT SELECTのは、最初にサブクエリを追加することです...

MERGE [dim].[Company] AS Target
USING (
SELECT DISTINCT 
[COMP NAME],
[COMP DESCRIPTION],
[COMP]
FROM [dbo].[DWUSD_LIVE]
) AS Source

そのため、ソース (ステージング テーブル) を見るとSELECT DISTINCT、ディメンションにあるが監査列 ( FirstCreated/ LastUpdated) を含まない同じ列に対して実行されます。

私のステージング テーブルには重複したレコードがあるため、SELECT DISTINCTそれ以外の場合はディメンションに複数のレコードが作成されるだけで済みます。

4

1 に答える 1

1

これらの変数で、ソースとして使用するテーブルをストアド プロシージャに渡しているようです。

@ETLTable   varchar(50),
@ETLSchema  varchar(50),

ソースの定義には、これらと同じ変数が使用されます。

--Now start building up the dynamic SQL

SET @SQL ='MERGE [' + @Schema + '].[' + @Dimension + '] AS Target'
SET @SQL = @SQL + @crlf + 'USING [' + @ETLSchema + '].[' + @ETLTable + '] AS Source'

実際のソース テーブルから必要な列が同じである限り、使用したい別のクエリを取得し、それを使用してビューを作成し、そのテーブルをストアド プロシージャに渡すことができると思います。

CREATE VIEW dbo.vwDWUSD_LIVE
AS 
SELECT DISTINCT 
[COMP NAME],
[COMP DESCRIPTION],
[COMP]
FROM [dbo].[DWUSD_LIVE]
GO

exec ステートメントは次のようになります。

EXEC [dbo].[GenerateMerge] 'COMPANY', 'DIM', 'vwDWUSD_LIVE', 'DBO', 1

また、ストアド プロシージャを変更する必要はありません。これは、他のプロセスによって呼び出される可能性があるように見えます。他のテーブルで同じ状況が発生した場合は、同様のソリューションを適用できます。

于 2012-10-29T01:19:46.337 に答える