sql - SQL: 一定期間にわたって計算された SUM 間の差異

Question

次のようなテーブルがあります。

CREATE TABLE foobar (
                     id                     SERIAL PRIMARY KEY,
                     data_entry_date        DATE NOT NULL,
                     user_id                INTEGER NOT NULL,
                     wine_glasses_drunk     INTEGER NOT NULL,
                     whisky_shots_drunk     INTEGER NOT NULL,
                     beer_bottle_drunk      INTEGER NOT NULL
                 );

insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-01', 1, 1,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-02', 1, 4,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-03', 1, 0,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-04', 1, 1,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-05', 1, 2,1,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-07', 1, 1,2,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-08', 1, 4,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-11', 1, 1,1,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-12', 1, 1,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-13', 1, 2,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-14', 1, 1,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-15', 1, 9,3,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-16', 1, 0,4,2);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-17', 1, 0,5,3);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-18', 1, 2,2,5);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-20', 1, 1,1,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-23', 1, 1,3,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-01-24', 1, 0,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-01', 1, 1,1,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-02', 1, 2,3,4);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-05', 1, 1,2,2);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-09', 1, 0,0,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-10', 1, 1,1,1);
insert into foobar (data_entry_date, user_id, wine_glasses_drunk, whisky_shots_drunk, beer_bottle_drunk) VALUES ('2011-02-11', 1, 3,6,3);

特定の期間の TOTAL wine_glasses_drunk、TOTAL whisky_shots_drunk、および TOTAL bee_bottles_drunk の差を、前の期間の TOTAL と比較して表示するクエリを作成したいと考えています。

おそらく、実際よりも複雑に聞こえます。1週間== 7 日の期間* を使用している場合、クエリは、先週消費された合計と比較して、今週消費された合計の差を返す必要があります。

少し複雑なのは、表の日付が連続していないことです。つまり、欠落している日付があるため、クエリは期間計算の日付を決定するときに最も関連性の高い日付を見つける必要があります。

This is what I have so far:

-- using hard coded dates

SELECT (SUM(f1.wine_glasses_drunk) - SUM(f2.wine_glasses_drunk)) as wine_diff, 
(SUM(f1.whisky_shots_drunk) - SUM(f2.whisky_shots_drunk)) as whisky_diff, 
(SUM(f1.beer_bottle_drunk) - SUM(f2.beer_bottle_drunk)) as beer_diff 
FROM foobar f1 INNER JOIN foobar f2 ON f2.user_id=f1.user_id
WHERE f1.user_id=1 
AND f1.data_entry_date BETWEEN '2011-01-08' AND '2011-01-15'
AND f2.data_entry_date BETWEEN '2011-01-01' AND '2011-01-08'
AND f1.data_entry_date - f2.data_entry_date between 6 and 9;

上記の SQL は明らかにハックです (特にf1.data_entry_date - f2.data_entry_date between 6 and 9基準)。Excel で結果を確認したところ、上記のクエリの結果は (当然のことながら) 間違っていました。

このクエリを作成するにはどうすればよいですか?また、データベース内の連続していない日付を処理できるように変更するにはどうすればよいですか?

私は postgreSQl を使用していますが、できればデータベースにとらわれない (つまり ANSI) SQL を使用したいと考えています。

score 2 · Accepted Answer

あなたが与えた説明から、私がこれを正しい方法で行っているかどうかは完全にはわかりませんが、2 つの異なる関数を使用して、必要な結果を得ることができます。

まず、date_trunc 関数を見てください。これにより、週の最初の日の日付を取得でき、それをグループ化してその週の合計を取得できます。週の最初の日が希望どおりでない場合は、日付演算を使用してそれを整理できます。この週の最初の日は月曜日だと思います。

次に、ラグウィンドウ関数を使用して、前の行の合計を見つけることができます。1 週間欠落している場合、この関数は前の週ではなく前の行を参照することに注意してください。データベースが正しい行を見ていることを確認するためだけに、クエリにチェックを入れました。

select 
  user_id,
  week_start_date,
  this_week_wine_glasses_drunk -
    case when is_consecutive_weeks = 'TRUE' 
      then last_week_wine_glasses_drunk else 0 end as wine_glasses_drunk,
  this_week_whisky_shots_drunk -
    case when is_consecutive_weeks = 'TRUE' 
      then last_week_whisky_shots_drunk else 0 end as whisky_shots_drunk,
  this_week_beer_bottle_drunk -
    case when is_consecutive_weeks = 'TRUE' 
      then last_week_beer_bottle_drunk else 0 end as beer_bottle_drunk
from (
select
  user_id,
  week_start_date,
  this_week_wine_glasses_drunk,
  this_week_whisky_shots_drunk,
  this_week_beer_bottle_drunk,
  case when (lag(week_start_date)
    over (partition by user_id order by week_start_date)  + interval '7' day)
      = week_start_date then 'TRUE' end as is_consecutive_weeks,
  lag(this_week_wine_glasses_drunk) 
    over (partition by user_id order by week_start_date) as last_week_wine_glasses_drunk,
  lag(this_week_whisky_shots_drunk) 
    over (partition by user_id order by week_start_date) as last_week_whisky_shots_drunk,
  lag(this_week_beer_bottle_drunk) 
    over (partition by user_id order by week_start_date) as last_week_beer_bottle_drunk
from (
  select
    user_id,
    date_trunc('week', data_entry_date) as week_start_date,
    sum(wine_glasses_drunk) as this_week_wine_glasses_drunk,
    sum(whisky_shots_drunk) as this_week_whisky_shots_drunk,
    sum(beer_bottle_drunk) as this_week_beer_bottle_drunk
  from foobar
  group by user_id,
    date_trunc('week', data_entry_date)
  ) a
) b

SQL フィドルを参照できます。

ところで、私は Oracle のバックグラウンドを持っており、PostgreSQL のドキュメントと SQL Fiddle を使用してこれをハッキングしました。うまくいけば、それはあなたが必要とするものです.

score 1 · Accepted Answer

少し異なるアプローチ (日付パラメーターを入力させます):

Declare @StartDate1, @EndDate1, @StartDate2, @EndDate2 AS Date
Set @StartDate1='6/1/2012'
Set @EndDate1='6/15/2012'
Set @StartDate2='6/16/2012'
Set @EndDate2='6/30/2012'

SELECT SUM(U.WineP1)-SUM(U.WineP2) AS WineDiff, SUM(U.WhiskeyP1)-SUM(U.WhiskeyP2) AS WhiskeyDiff, SUM(U.BeerP1)-SUM(U.BeerP2) AS BeerDiff
FROM
(
SELECT SUM(wine_glasses_drunk) AS WineP1, SUM(whisky_shots_drunk) AS WhiskeyP1, SUM(beer_bottle_drunk) AS BeerP1, 0 AS WineP2, 0 AS WhiskeyP2, 0 AS BeerP2
FROM foobar
WHERE data_entry_date BETWEEN @StartDate1 AND @EndDate1

UNION ALL

SELECT 0 AS WineP1, 0 AS WhiskeyP1, 0 AS BeerP1, SUM(wine_glasses_drunk) AS WineP2, SUM(whisky_shots_drunk) AS WhiskeyP2, SUM(beer_bottle_drunk) AS BeerP2
FROM foobar
WHERE data_entry_date BETWEEN @StartDate2 AND @EndDate2
) AS U

score 0 · Accepted Answer

これを他の回答の編集として追加するつもりでしたが、実際には別の方法であるため、別の回答にする必要があります。

私は私が与えた他の答えを好むと思いますが、これはデータにギャップがあっても機能するはずです。

クエリのパラメータを設定するには、with句のperiod_start_dateおよびperiod_daysの部分の値を変更します。query_params

with query_params as (
  select 
    date '2011-01-01' as period_start_date,
    7 as period_days
),
summary_data as (
select
  user_id,
  (data_entry_date - period_start_date)/period_days as period_number,
  sum(wine_glasses_drunk) as wine_glasses_drunk,
  sum(whisky_shots_drunk) as whisky_shots_drunk,
  sum(beer_bottle_drunk) as beer_bottle_drunk
from foobar
  cross join query_params
group by user_id,
  (data_entry_date - period_start_date)/period_days
)
select
  user_id,
  period_number,
  period_start_date + period_number * period_days as period_start_date,
  sum(wine_glasses_drunk) as wine_glasses_drunk,
  sum(whisky_shots_drunk) as whisky_shots_drunk,
  sum(beer_bottle_drunk) as beer_bottle_drunk
from (
  -- this weeks data
  select 
    user_id,
    period_number,
    wine_glasses_drunk,
    whisky_shots_drunk,
    beer_bottle_drunk
  from summary_data
  union all
  -- last weeks data
  select 
    user_id,
    period_number + 1 as period_number,
    -wine_glasses_drunk as wine_glasses_drunk,
    -whisky_shots_drunk as whisky_shots_drunk,
    -beer_bottle_drunk as beer_bottle_drunk
  from summary_data
) a
cross join query_params
where period_number <= (select max(period_number) from summary_data)
group by 
  user_id,
  period_number,
  period_start_date + period_number * period_days
order by 1, 2

また、SQLフィドルが利用可能です。

score 0 · Accepted Answer

これらのクエリを作成するときは、原則として、ピースで構築してから結合します。最初に適切な構造を見つけてから、必要なすべてのピースを個別に構築して、各ピースが独自にどのように機能するかを理解できるようにします.

ここでは、より明確な方法を見つけるために、より多くのサブクエリを使用する必要があると思います。これらの行に沿って何かを試すことができると思います：

必要な日付範囲を計算し、それらを変数として保持します。(上記のコードの代わりに、次の期間を見つけるために日付に日数を追加することもできます。)

Declare @SQL1, @SQL2, @SQL3 as Date
Set @SQL1=(SQL1)
...

次に、日付をパラメーターとして使用する方法で、1 週間あたりの合計を見つけます。

Select 
  sum(wine_glasses_drunk) as wine_totals, 
  sum(whiskey_shots_drunk) as whiskey_totals, 
  sum(beer_bottle_drunk) as beer_totals,
  case 
    when data_entry_date between @SQL1 and @SQL2 then 1
    when data_entry_date between @SQL2 and @SQL3 then 2
  end as period_number
from foobar

次に、データが簡単な形式であり、同じ値の合計を何度も使用する必要がないため、必要な集計クエリを作成します。

sql - SQL: 一定期間にわたって計算された SUM 間の差異

4 に答える 4

Related

Reference