私はこの問題に私が認めるよりも多くの時間を費やしてきました。カーソルを実装する解決策(以下)がありますが、他の方法で可能かどうか疑問に思いますか?私にとって、SQLには配列などの通常の構造がないため、特に困難でした。
これも再帰を使用するための良い問題のように思えますが、私はそれを理解できませんでした。プラットフォームはMSSQL2008またはT-SQLです。
一意でない識別子の2つの列と日付の列を持つテーブルについて考えてみます。日付ごとに、1つの列(X)の識別子を、2番目の列(Y)の識別子が一意であるセットに統合またはグループ化します。
- X識別子は一意ではありません
- Y識別子はX識別子の各セット内で一意ですが、全体的に一意ではありません
- 組み合わせる場合、可能な限り最小のX識別子が使用されます
- X識別子は複数の日付にまたがることはありません
おそらく、開始するのに最適な場所は、いくつかのサンプルデータを使用することです。ソリューションには、いくつかの拡張サンプルデータもあります。私がこれを使用している実際の実装では、通常200行未満であり、ほとんどの場合100行未満になります。
Dt X Y newX
6/1/2012 1 1 1
6/1/2012 1 2 1
6/1/2012 2 3 1
6/1/2012 3 1 3 <-- because Y=1 is already in X=1
6/1/2012 3 4 3
6/1/2012 4 5 1
6/1/2012 5 4 1 <-- Y=4 is in X=3 but not X=1
6/1/2012 5 6 1
6/1/2012 6 4 6 <-- Y=4 is in X=1 and X=3
6/1/2012 6 7 6
ソリューション...
-- task: combine/condense/reassign/coalesce/collapse/consolidate sets of X identifiers into groups with unique Y identifiers, and by date
-- - X identifiers are non-unique
-- - Y identifiers are unique within each set of X identifiers, but non-unique overall
-- - When combining, the minimum possible X identifier is used
-- - An X identifier will not span more than a single date
--drop table #tmpA
CREATE TABLE #tmpA (Dt DATETIME, X INT, Y INT, newX INT, origX INT)
-- sample data
-- Dt X Y newX
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 2, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 1, 0, 7) -- causes a debug 4
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 7, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 1, 0, 7) -- causes a debug 3 if below not used
--INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 6, 0, 7) -- causes a debug 8 if above not used
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 5, 0, 7)
DECLARE @X INT
DECLARE @tX INT -- temporary X
DECLARE @Y INT
DECLARE @Dt DATETIME
DECLARE @tDt DATETIME = CAST('1900-01-01' AS DATETIME) -- temporary date
DECLARE @newX INT
DECLARE @min_X INT -- minimum X without Y duplicate
DECLARE @min_newX INT
DECLARE CursorA CURSOR FOR SELECT Dt, X, Y, newX FROM #tmpA
ORDER BY Dt, X
OPEN CursorA
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
SET @tX = @X -- initialize for change in X detection
WHILE (@@FETCH_STATUS = 0)
BEGIN
-- a change in X?
IF (@tX != @X)
BEGIN
-- change in X, update all prior X to their newX (which should all be the same)
UPDATE #tmpA SET X = newX WHERE Dt = @tDt AND X = @tX
select 1 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
SET @tX = @X
END
IF (@newX != 0)
BEGIN
-- newX for this X and Y already assigned, move on
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
IF (@Dt != @tDt)
BEGIN
-- date change
SET @tDt = @Dt
-- all for this first X are simply the same identifier
UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 2 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
-- still on same date
-- is there any duplicate Y already assigned a newX?
SELECT @min_X = MIN(X) FROM #tmpA
WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0
IF @min_X IS NOT NULL
BEGIN
-- there is a Y duplicate within this date
-- find the earliest X which does not have a duplicate Y
SELECT @min_newX = MIN(X) FROM #tmpA
WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0
AND X NOT IN (SELECT X FROM #tmpA
WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0)
IF @min_newX IS NOT NULL
BEGIN
-- is there an "earlier" X already assigned a newX?
SELECT @min_X = MIN(newX) FROM #tmpA
WHERE Dt = @Dt AND X = @X AND newX !=0
IF @min_X IS NOT NULL
BEGIN
-- there is another X already assigned a newX
IF @min_newX >= @min_X
BEGIN
-- set the other one to this one
UPDATE #tmpA SET newX = @min_newX
WHERE Dt = @Dt AND X = @X AND newX = @min_X
UPDATE #tmpA SET newX = @min_newX
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 3 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
END
ELSE
BEGIN
UPDATE #tmpA SET newX = @min_X
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 4 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
END
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
--
UPDATE #tmpA SET newX = @min_newX
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 5 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
-- no other X without a duplicate Y already assigned a newX so assign this entire X set to itself
UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 6 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
-- no other Y but it's possible that another newX for this X is set to something different
SELECT @min_newX = MIN(newX) FROM #tmpA
WHERE Dt = @Dt AND X = @X AND Y != @Y AND newX != 0
-- also find the min X for this Y
SELECT @min_X = MIN(X) FROM #tmpA
WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0
IF @min_newX IS NULL
BEGIN
-- no other Y for this X is assigned so set it to the minimum X already found
UPDATE #tmpA SET newX = @min_X
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 7 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
-- there is another of the same X with a newX
IF (@min_X = @min_newX OR @min_X > @min_newX OR @min_newX IS NULL)
BEGIN
-- there is a different Y for this X which has already been assigned the same newX as this one should be
-- or a later one was found
UPDATE #tmpA SET newX = @min_X
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 8 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
CONTINUE
END
UPDATE #tmpA SET newX = @min_newX
WHERE Dt = @Dt AND X = @X AND Y = @Y
select 9 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
END
-- gotta catch the last set
UPDATE #tmpA SET X = newX WHERE Dt = @Dt AND X = @tX
SELECT * FROM #tmpA
-- ORDER BY Dt, X, Y
CLOSE CursorA
DEALLOCATE CursorA
RETURN
出力は次のようになります...
Dt X Y newX origX
2012-06-01 00:00:00.000 1 2 1 1
2012-06-01 00:00:00.000 1 1 1 1
2012-06-01 00:00:00.000 1 5 1 2
2012-06-01 00:00:00.000 3 2 3 3
2012-06-01 00:00:00.000 3 3 3 3
2012-06-01 00:00:00.000 1 3 1 4
2012-06-01 00:00:00.000 3 5 3 5
2012-06-01 00:00:00.000 6 5 6 6
2012-06-01 00:00:00.000 6 2 6 7
2012-06-01 00:00:00.000 6 1 6 7
2012-06-02 00:00:00.000 1 2 1 1
2012-06-02 00:00:00.000 1 1 1 1
2012-06-02 00:00:00.000 1 5 1 2
2012-06-02 00:00:00.000 3 2 3 3
2012-06-02 00:00:00.000 3 3 3 3
2012-06-02 00:00:00.000 1 3 1 4
2012-06-02 00:00:00.000 3 5 3 5
2012-06-02 00:00:00.000 6 5 6 6
2012-06-02 00:00:00.000 7 7 7 7
2012-06-02 00:00:00.000 7 1 7 7
2012-06-02 00:00:00.000 7 5 7 7