2

私はこの問題に私が認めるよりも多くの時間を費やしてきました。カーソルを実装する解決策(以下)がありますが、他の方法で可能かどうか疑問に思いますか?私にとって、SQLには配列などの通常の構造がないため、特に困難でした。

これも再帰を使用するための良い問題のように思えますが、私はそれを理解できませんでした。プラットフォームはMSSQL2008またはT-SQLです。

一意でない識別子の2つの列と日付の列を持つテーブルについて考えてみます。日付ごとに、1つの列(X)の識別子を、2番目の列(Y)の識別子が一意であるセットに統合またはグループ化します。

  • X識別子は一意ではありません
  • Y識別子はX識別子の各セット内で一意ですが、全体的に一意ではありません
  • 組み合わせる場合、可能な限り最小のX識別子が使用されます
  • X識別子は複数の日付にまたがることはありません

おそらく、開始するのに最適な場所は、いくつかのサンプルデータを使用することです。ソリューションには、いくつかの拡張サンプルデータもあります。私がこれを使用している実際の実装では、通常200行未満であり、ほとんどの場合100行未満になります。

Dt         X     Y     newX
6/1/2012   1     1     1
6/1/2012   1     2     1
6/1/2012   2     3     1
6/1/2012   3     1     3   <-- because Y=1 is already in X=1
6/1/2012   3     4     3
6/1/2012   4     5     1
6/1/2012   5     4     1   <-- Y=4 is in X=3 but not X=1
6/1/2012   5     6     1
6/1/2012   6     4     6   <-- Y=4 is in X=1 and X=3
6/1/2012   6     7     6

ソリューション...

-- task: combine/condense/reassign/coalesce/collapse/consolidate sets of X identifiers into groups with unique Y identifiers, and by date
--  - X identifiers are non-unique
--  - Y identifiers are unique within each set of X identifiers, but non-unique overall
--  - When combining, the minimum possible X identifier is used
--  - An X identifier will not span more than a single date

--drop table #tmpA

CREATE TABLE #tmpA (Dt DATETIME, X INT, Y INT, newX INT, origX INT)

-- sample data
--                        Dt                              X  Y  newX
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 2, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 1, 0, 7)       -- causes a debug 4

INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 7, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 1, 0, 7)       -- causes a debug 3 if below not used
--INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 6, 0, 7)     -- causes a debug 8 if above not used
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 5, 0, 7)

DECLARE @X          INT
DECLARE @tX         INT     -- temporary X
DECLARE @Y          INT
DECLARE @Dt         DATETIME
DECLARE @tDt        DATETIME = CAST('1900-01-01' AS DATETIME)   -- temporary date
DECLARE @newX       INT
DECLARE @min_X      INT     -- minimum X without Y duplicate
DECLARE @min_newX   INT

DECLARE CursorA CURSOR FOR SELECT Dt, X, Y, newX FROM #tmpA
ORDER BY Dt, X
OPEN CursorA
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
SET @tX = @X        -- initialize for change in X detection

WHILE (@@FETCH_STATUS = 0)
BEGIN

-- a change in X?
IF (@tX != @X)
BEGIN
    -- change in X, update all prior X to their newX (which should all be the same)
    UPDATE #tmpA SET X = newX WHERE Dt = @tDt AND X = @tX
select 1 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    SET @tX = @X
END

IF (@newX != 0)
BEGIN
    -- newX for this X and Y already assigned, move on
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

IF (@Dt != @tDt)
BEGIN
    -- date change
    SET @tDt = @Dt

    -- all for this first X are simply the same identifier
    UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 2 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- still on same date
-- is there any duplicate Y already assigned a newX?
SELECT @min_X = MIN(X) FROM #tmpA
    WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0

IF @min_X IS NOT NULL
BEGIN

    -- there is a Y duplicate within this date
    -- find the earliest X which does not have a duplicate Y
    SELECT @min_newX = MIN(X) FROM #tmpA
        WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0
        AND X NOT IN (SELECT X FROM #tmpA
            WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0)

    IF @min_newX IS NOT NULL
    BEGIN

        -- is there an "earlier" X already assigned a newX?
        SELECT @min_X = MIN(newX) FROM #tmpA
            WHERE Dt = @Dt AND X = @X AND newX !=0

        IF @min_X IS NOT NULL
        BEGIN

            -- there is another X already assigned a newX
            IF @min_newX >= @min_X
            BEGIN
                -- set the other one to this one
                UPDATE #tmpA SET newX = @min_newX
                    WHERE Dt = @Dt AND X = @X AND newX = @min_X
                UPDATE #tmpA SET newX = @min_newX
                    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 3 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
            END
            ELSE
            BEGIN
                UPDATE #tmpA SET newX = @min_X
                    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 4 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
            END

            FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
            CONTINUE
        END

        -- 
        UPDATE #tmpA SET newX = @min_newX
            WHERE Dt = @Dt AND X = @X AND Y = @Y
select 5 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
        FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
        CONTINUE

    END

    -- no other X without a duplicate Y already assigned a newX so assign this entire X set to itself
    UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 6 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- no other Y but it's possible that another newX for this X is set to something different
SELECT @min_newX = MIN(newX) FROM #tmpA
    WHERE Dt = @Dt AND X = @X AND Y != @Y AND newX != 0

-- also find the min X for this Y
SELECT @min_X = MIN(X) FROM #tmpA
    WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0

IF @min_newX IS NULL
BEGIN
    -- no other Y for this X is assigned so set it to the minimum X already found
    UPDATE #tmpA SET newX = @min_X
        WHERE Dt = @Dt AND X = @X AND Y = @Y
select 7 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- there is another of the same X with a newX
IF (@min_X = @min_newX OR @min_X > @min_newX OR @min_newX IS NULL)
BEGIN
    -- there is a different Y for this X which has already been assigned the same newX as this one should be
    -- or a later one was found
    UPDATE #tmpA SET newX = @min_X
        WHERE Dt = @Dt AND X = @X AND Y = @Y
select 8 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

UPDATE #tmpA SET newX = @min_newX
    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 9 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX

END

-- gotta catch the last set
UPDATE #tmpA SET X = newX WHERE Dt = @Dt AND X = @tX

SELECT * FROM #tmpA
--  ORDER BY Dt, X, Y
CLOSE CursorA
DEALLOCATE CursorA
RETURN

出力は次のようになります...

Dt                      X   Y   newX    origX
2012-06-01 00:00:00.000 1   2   1   1
2012-06-01 00:00:00.000 1   1   1   1
2012-06-01 00:00:00.000 1   5   1   2
2012-06-01 00:00:00.000 3   2   3   3
2012-06-01 00:00:00.000 3   3   3   3
2012-06-01 00:00:00.000 1   3   1   4
2012-06-01 00:00:00.000 3   5   3   5
2012-06-01 00:00:00.000 6   5   6   6
2012-06-01 00:00:00.000 6   2   6   7
2012-06-01 00:00:00.000 6   1   6   7
2012-06-02 00:00:00.000 1   2   1   1
2012-06-02 00:00:00.000 1   1   1   1
2012-06-02 00:00:00.000 1   5   1   2
2012-06-02 00:00:00.000 3   2   3   3
2012-06-02 00:00:00.000 3   3   3   3
2012-06-02 00:00:00.000 1   3   1   4
2012-06-02 00:00:00.000 3   5   3   5
2012-06-02 00:00:00.000 6   5   6   6
2012-06-02 00:00:00.000 7   7   7   7
2012-06-02 00:00:00.000 7   1   7   7
2012-06-02 00:00:00.000 7   5   7   7
4

1 に答える 1

0

あなたが欲しいのはこのようなものです。一時テーブルへの挿入の後にこれを貼り付けます

select * from 
(
select X, Y, origX, ROW_NUMBER() OVER(PARTITION BY X ORDER BY x) newX
from #tmpA
)
results 
于 2012-07-16T17:20:56.607 に答える