私は一連の日付間隔で作業しています。各間隔にはバージョン番号があり、新しい間隔は古い間隔と頻繁に重複するか、それらのサブセットになります。このデータから、各時点での最新のバージョン番号を示す新しい間隔のセットを計算する必要があります。この問題に対するセットベースの解決策はありますか?
これがイラストです:
Interval 1: 11111111111111111111111
Interval 2: 2222222222
Interval 3: 33333333333333
Interval 4: 444444444
Interval 5: 555555555
Result : 11333333333333331155555555544
これが私が扱っているデータのサンプルです:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 1/1/2011 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2012 12/31/2012 6
1 10/1/2012 11/1/2012 8
...および目的の出力:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 10/1/2010 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2011 10/1/2012 6
1 10/1/2012 11/1/2012 8 << note how version 8 supersedes version 6
1 11/1/2012 12/31/2012 6 << version 6 is split into two records
この問題の他の例は見つかりませんでした。グーグルで検索すると、ギャップや島、またはカバーセットを識別するクエリのみが表示されます。
反復的な解決策があると思います(SQL Server2008)。これは、結果セットの間隔の一時テーブルから始まり、特別なバージョン番号のレコードを挿入することによって、カバーする範囲の開始点と終了点を定義します。次に、結果セットの間隔間のギャップを繰り返し識別し、ギャップがなくなるか、追加するレコードがなくなるまで、元のデータセットの最新のレコードでそれらを埋めようとします。
GO
-- Create data set and results table
CREATE TABLE #Data (
groupId INT
,startDate DATE
,endDate DATE
,versionId INT
)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2007-12-22', '2008-12-22', 8)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2008-12-22', '2009-12-22', 9)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2009-12-22', '2010-12-22', 10)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2010-12-22', '2011-12-22', 11)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-01-01', '2011-11-30', 500)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-12-22', '2012-12-22', 12)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 13)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 14)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 17)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 19)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-01-01', '2011-01-01', 1)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-10-01', '2011-07-05', 2)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2011-07-05', '2012-08-13', 3)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-08-13', '2012-12-31', 6)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-10-01', '2012-11-01', 8)
CREATE TABLE #Results (
groupId VARCHAR(10)
,startDate DATE
,endDate DATE
,versionId BIGINT
)
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20121231'
SET @placeholderId = 999999999999999
INSERT #Results
SELECT DISTINCT
groupId
,CASE WHEN MIN(startDate) < @startDate THEN MIN(startDate) ELSE @startDate END
,CASE WHEN MIN(startDate) < @startDate THEN @startDate ELSE MIN(startDate) END
,@placeholderId
FROM #data
GROUP BY groupId
UNION ALL
SELECT DISTINCT
groupId
,CASE WHEN MAX(endDate) < @endDate THEN MAX(endDate) ELSE @endDate END
,CASE WHEN MAX(endDate) < @endDate THEN @endDate ELSE MAX(endDate) END
,@placeholderId
FROM #data
GROUP BY groupId
GO
-- Fill gaps in results table
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20111231'
SET @placeholderId = 999999999999999
DECLARE @counter INT
SET @counter = 0
WHILE @counter < 10
BEGIN
SET @counter = @counter + 1;
WITH Gaps AS (
SELECT
gs.groupId
,gs.startDate
,MIN(ge.endDate) as endDate
,ROW_NUMBER() OVER (ORDER BY gs.groupId, gs.startDate) as gapId
FROM (
SELECT groupId, endDate as startDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.startDate <= r1.endDate
AND r2.endDate > r1.endDate
)
AND NOT (endDate >= @endDate AND versionId = @placeholderId)
) gs
INNER JOIN (
SELECT groupId, startDate as endDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.endDate >= r1.startDate
AND r2.startDate < r1.startDate
)
AND NOT (startDate <= @startDate AND versionId = @placeholderId)
) ge
ON ge.groupId = gs.groupId
AND ge.endDate >= gs.startDate
GROUP BY gs.groupId, gs.startDate
)
INSERT #Results (
groupId
,startDate
,endDate
,versionId
)
SELECT
d.groupId
,CASE WHEN d.startDate < g.startDate THEN g.startDate ELSE d.startDate END
,CASE WHEN d.endDate > g.endDate THEN g.endDate ELSE d.endDate END
,d.versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
INNER JOIN (
SELECT
d.groupId
,gapId
,MAX(d.versionId) as versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
WHERE d.versionId < (
SELECT MIN(versionId)
FROM #Results r
WHERE r.groupId = d.groupId
AND (r.startDate = g.endDate OR r.endDate = g.startDate)
)
AND NOT EXISTS (
SELECT *
FROM #Data dsup
WHERE dsup.groupId = d.groupId
AND dsup.versionId > d.versionId
AND dsup.startDate <= d.startDate
AND dsup.endDate >= d.endDate
)
GROUP BY
d.groupId
,g.gapId
) mg
ON mg.groupId = g.groupId
AND mg.gapId = g.gapId
AND mg.versionId = d.versionId
END
SELECT *
FROM #Results
WHERE versionId <> @placeholderId
order by groupId, startDate
セットベースのソリューションの方がはるかに便利ですが、私はそれを見つけるのに苦労しました。何か案は?