sql-server - 島とギャップ tsql

Question

私は実際には非常に単純なはずの問題に苦しんでいますが、1週間の読書、グーグル、実験などの後、私の同僚と私たちは適切な解決策を見つけることができません. :(

問題: 2 つの値を持つテーブルがあります: 従業員番号 (P_ID, int) <--- 従業員の ID 日付 (starttime, datetime) <--- 従業員がチェックインした時間

各従業員がどの期間働いているかを知る必要があります。
2 つの日付の間隔が @gap 日未満の場合、それらは同じ期間に属します
従業員ごとに、特定の日に複数のレコードが存在する可能性がありますが、彼が働いた日付を知る必要があるだけで、時間の部分には興味がありません
ギャップが @gap 日を超えるとすぐに、次の日付が新しい範囲の開始と見なされます
範囲は少なくとも 1 日 (例: 21-9-2011 | 21-09-2011) ですが、最大長はありません。(従業員が @gap - 1 日ごとにチェックインすると、最初にチェックインした日から今日までの期間になります)

私たちが必要だと考えているのは、この表で日数のギャップが @variable より大きい島です (@gap = 30 は 30 日を意味します)。

例：

ソース可能:

P_ID  | starttime
------|------------------
12121 | 24-03-2009 7:30
12121 | 24-03-2009 14:25 
12345 | 27-06-2011 10:00
99999 | 01-05-2012 4:50 
12345 | 27-06-2011 10:30
12345 | 28-06-2011 11:00
98765 | 13-04-2012 10:00
12345 | 21-07-2011 9:00
99999 | 03-05-2012 23:15
12345 | 21-09-2011 12:00
45454 | 12-07-2010 8:00
12345 | 21-09-2011 17:00
99999 | 06-05-2012 11:05
99999 | 20-05-2012 12:45
98765 | 26-04-2012 16:00
12345 | 07-07-2012 14:00
99999 | 01-06-2012 13:55
12345 | 13-08-2012 13:00

結果として必要なのは次のとおりです。

期間:

P_ID  |   Start    |    End
-------------------------------
12121 | 24-03-2009 | 24-03-2009
12345 | 27-06-2012 | 21-07-2012
12345 | 21-09-2012 | 21-09-2012
12345 | 07-07-2012 | (today) OR 13-08-2012  <-- (less than @gap days ago) OR (last date in table)
45454 | 12-07-2010 | 12-07-2010
45454 | 17-06-2012 | 17-06-2012 
98765 | 13-04-2012 | 26-04-2012
99999 | 01-05-2012 | 01-06-2012

これがこのように明確であることを願っています。ここまで読んでくれてありがとうございます。貢献していただければ幸いです。

score 1 · Accepted Answer

私はあなたが始めるための大まかなスクリプトを作成しました。日時の調整に悩まされておらず、エンドポイントの比較には微調整が必要になる場合があります。

select 
    P_ID,
    src.starttime,
    endtime = case when src.starttime <> lst.starttime or lst.starttime < DATEADD(dd,-1 * @gap,GETDATE()) then lst.starttime else GETDATE() end,
    frst.starttime,
    lst.starttime
from @SOURCETABLE src
outer apply (select starttime = MIN(starttime) from @SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime > DATEADD(dd,-1 * @gap,src.starttime)) frst
outer apply (select starttime = MAX(starttime) from @SOURCETABLE sub where src.p_id = sub.p_id and src.starttime > DATEADD(dd,-1 * @gap,sub.starttime)) lst
where src.starttime = frst.starttime
order by P_ID, src.starttime

次の出力が得られますが、これはあなたのものとは少し異なりますが、問題ないと思います。

P_ID        starttime               endtime                 starttime               starttime
----------- ----------------------- ----------------------- ----------------------- -----------------------
12121       2009-03-24 07:30:00.000 2009-03-24 14:25:00.000 2009-03-24 07:30:00.000 2009-03-24 14:25:00.000
12345       2011-06-27 10:00:00.000 2011-07-21 09:00:00.000 2011-06-27 10:00:00.000 2011-07-21 09:00:00.000
12345       2011-09-21 12:00:00.000 2011-09-21 17:00:00.000 2011-09-21 12:00:00.000 2011-09-21 17:00:00.000
12345       2012-07-07 14:00:00.000 2012-07-07 14:00:00.000 2012-07-07 14:00:00.000 2012-07-07 14:00:00.000
12345       2012-08-13 13:00:00.000 2012-08-16 11:23:25.787 2012-08-13 13:00:00.000 2012-08-13 13:00:00.000
45454       2010-07-12 08:00:00.000 2010-07-12 08:00:00.000 2010-07-12 08:00:00.000 2010-07-12 08:00:00.000
98765       2012-04-13 10:00:00.000 2012-04-26 16:00:00.000 2012-04-13 10:00:00.000 2012-04-26 16:00:00.000

最後の 2 つの出力列はouter applyセクションの結果であり、デバッグ用です。

これは、次の設定に基づいています。

declare @gap int
set @gap = 30

set dateformat dmy
-----P_ID----|----starttime----
declare @SOURCETABLE table (P_ID int, starttime datetime)
insert @SourceTable values 
(12121,'24-03-2009 7:30'),
(12121,'24-03-2009 14:25'),
(12345,'27-06-2011 10:00'),
(12345,'27-06-2011 10:30'),
(12345,'28-06-2011 11:00'),
(98765,'13-04-2012 10:00'),
(12345,'21-07-2011 9:00'),
(12345,'21-09-2011 12:00'),
(45454,'12-07-2010 8:00'),
(12345,'21-09-2011 17:00'),
(98765,'26-04-2012 16:00'),
(12345,'07-07-2012 14:00'),
(12345,'13-08-2012 13:00')

更新: 少し考え直します。CTE を使用して、各項目から前方および後方へのギャップを計算し、それらを集計します。

--Get the gap between each starttime and the next and prev (use 999 to indicate non-closed intervals)
;WITH CTE_Gaps As ( 
    select
        p_id,
        src.starttime,
        nextgap = coalesce(DATEDIFF(dd,src.starttime,nxt.starttime),999), --Gap to the next entry
        prevgap = coalesce(DATEDIFF(dd,prv.starttime,src.starttime),999), --Gap to the previous entry
        isold = case when DATEDIFF(dd,src.starttime,getdate()) > @gap then 1 else 0 end --Is starttime more than gap days ago?
    from
        @SOURCETABLE src
        cross apply (select starttime = MIN(starttime) from @SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime > src.starttime) nxt
        cross apply (select starttime = max(starttime) from @SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime < src.starttime) prv   
)
--select * from CTE_Gaps
select
        p_id,
        starttime = min(gap.starttime),
        endtime = nxt.starttime
    from
        CTE_Gaps gap
        --Find the next starttime where its gap to the next > @gap
        cross apply (select starttime = MIN(sub.starttime) from CTE_Gaps sub where gap.p_id = sub.p_id and sub.starttime >= gap.starttime and sub.nextgap > @gap) nxt
group by P_ID, nxt.starttime
order by P_ID, nxt.starttime

score 0 · Accepted Answer

ジョンは間違いなく正しい方向性を示してくれました。ただし、パフォーマンスはひどいものでした (データベース内の 400 万以上のレコード)。そして、いくつかの情報が不足しているように見えました。あなたから学んだすべてのことから、以下の解決策を思いつきました。提案されたすべての回答の要素を使用し、最終的に結果を吐き出す前に 3 つの temptables を循環しますが、パフォーマンスと生成するデータは十分です。

declare @gap int
declare @Employee_id int

set @gap = 30   
set dateformat dmy
--------------------------------------------------------------- #temp1 --------------------------------------------------
CREATE TABLE #temp1 ( EmployeeID int, starttime date)
INSERT INTO #temp1 ( EmployeeID, starttime)

select distinct ck.Employee_id, 
                cast(ck.starttime as date)
from SERVER1.DB1.dbo.checkins pd
        inner join SERVER1.DB1.dbo.Team t on ck.team_id = t.id
where t.productive = 1

--------------------------------------------------------------- #temp2 --------------------------------------------------

create table #temp2 (ROWNR int, Employeeid int, ENDOFCHECKIN datetime, FIRSTCHECKIN datetime)
INSERT INTO #temp2 

select Row_number() OVER (partition by EmployeeID ORDER BY t.prev) + 1 as ROWNR,
             EmployeeID,
             DATEADD(DAY, 1, t.Prev) AS start_gap,
           DATEADD(DAY, 0, t.next) AS end_gap
from 
             (
                    select a.EmployeeID,
                                  a.starttime as Prev, 
                                  (
                                  select min(b.starttime)
                                  from #temp1 as b
                                  where starttime > a.starttime and b.EmployeeID = a.EmployeeID 
                                  ) as Next
from #temp1 as a) as t

where  datediff(day, prev, next ) > 30
group by     EmployeeID,
                    t.Prev,
                    t.next
union -- add first known date for Employee 

select      1 as ROWNR,
            EmployeeID,
            NULL,
            min(starttime)
from #temp1 ct
group by ct.EmployeeID

--------------------------------------------------------------- #temp3 --------------------------------------------------

create table #temp3 (ROWNR int, Employeeid int, ENDOFCHECKIN datetime, STARTOFCHECKIN datetime)
INSERT INTO #temp3

select  ROWNR,
        Employeeid,
        ENDOFCHECKIN,
        FIRSTCHECKIN
from #temp2 

union -- add last known date for Employee 

select       (select count(*) from #temp2 b where Employeeid = ct.Employeeid)+1 as ROWNR,
             ct.Employeeid,
            (select dateadd(d,1,max(starttime)) from #temp1 c where Employeeid = ct.Employeeid),
             NULL
from #temp2 ct
group by ct.EmployeeID

---------------------------------------finally check our data-------------------------------------------------


select              a1.Employeeid,
                    a1.STARTOFCHECKIN as STARTOFCHECKIN,
                    ENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN a1.ENDOFCHECKIN ELSE b1.ENDOFCHECKIN END,
                    year(a1.STARTOFCHECKIN) as JaarSTARTOFCHECKIN,
                    JaarENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN  year(a1.ENDOFCHECKIN) ELSE  year(b1.ENDOFCHECKIN) END,
                    Month(a1.STARTOFCHECKIN) as MaandSTARTOFCHECKIN,
                    MaandENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN  month(a1.ENDOFCHECKIN) ELSE  month(b1.ENDOFCHECKIN) END,
                    (year(a1.STARTOFCHECKIN)*100)+month(a1.STARTOFCHECKIN) as JaarMaandSTARTOFCHECKIN,
                    JaarMaandENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN (year(a1.ENDOFCHECKIN)*100)+month(a1.STARTOFCHECKIN) ELSE (year(b1.ENDOFCHECKIN)*100)+month(b1.ENDOFCHECKIN) END,
                    datediff(M,a1.STARTOFCHECKIN,b1.ENDOFCHECKIN) as MONTHSCHECKEDIN
from #temp3 a1
       full outer join #temp3 b1 on a1.ROWNR = b1.ROWNR -1 and a1.Employeeid = b1.Employeeid
where not (a1.STARTOFCHECKIN is null AND b1.ENDOFCHECKIN is null) 
order by a1.Employeeid, a1.STARTOFCHECKIN

sql-server - 島とギャップ tsql

2 に答える 2

Related

Reference