EMR PrestoDB で実行しようとしているクエリがあります。s3 グルー ORC テーブルを読み取っています。「クエリに失敗しました: ステートメントが大きすぎます (分析中のスタック オーバーフロー)」というエラーが表示されますが、クエリ自体はそれほど長くなく、1364 行しかなく、実際には Athena で正常に実行されます。 PrestoDB で直接、失敗することを確認します。
クエリは 1364 行で、144 KB です (オブジェクトの一部を匿名化する必要があったため、以下の例は短くなっています)。
プロキシ ジャンプ SSH 経由で Datagrip IDE から実行しています。httpの問題があるのではないかと思っていますか?
以下はクエリの例ですが、StackOverflow には 30000 文字の制限があるため、クエリ自体は長くなります。
create
table new_schema.test
with (format = 'ORC')
as (
select distinct
sr2_ln
, sr3_ln
, sr4_ln
, sr5_ln
, sr2_lin
, sr3_lin
, sr4_lin
, sr5_lin
, is_t1_t3
, is_t4_t7
, is_t8
, is_t10
, egroup
, is_f
, is_v
, is_d
, is_t
, is_b
, ry
, snapshot_month_int
, snapshot_year
--Window partitions
, (
coalesce(sr2_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol2_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol3_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol4_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(sr5_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(is_f,'_')
|| coalesce(is_v,'_')
|| coalesce(is_d,'_')
|| coalesce(is_t, '_')
|| coalesce(is_b, '_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as e_all_up_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(sr5_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(egroup,'_')
|| coalesce(is_v,'_')
|| coalesce(is_d,'_')
|| coalesce(is_t, '_')
|| coalesce(is_b, '_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as f_all_up_window_partition
--b metrics
, b_count as b_current_month
, sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) as b_1_month_prior_original
, sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) as b_1_year_prior
, case
when snapshot_month_int = 1 then b_count
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
end as b_avg_ytd
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
end as b_avg_ly_ytd
, avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row) as b_avg_ttm_as_of_snapshot_date
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
end as b_end_of_ly
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
when snapshot_month_int = 4 then b_count
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
end as b_avg_ytd_for_rt
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
end as b_avg_ly_ytd_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 21 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 22 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 23 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 13 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 14 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 15 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 16 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 17 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 18 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 19 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 20 preceding)
end as b_beg_of_ly_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
end as b_end_of_ly_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 4 then b_count
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
end as b_beg_of_ty_for_rt
, case when is_t = 'Y' then b_count end as t_b_current_month
, case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) end as t_b_1_month_prior
, case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) end as t_b_1_year_prior
, case
when snapshot_month_int = 1 and is_t = 'Y' then b_count
when snapshot_month_int = 2 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 3 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 4 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 5 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 6 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 7 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 8 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 9 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
when snapshot_month_int = 10 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 11 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 12 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
end as t_b_avg_ytd
, snapshot_date
, snapshot_date as snapshot_date_partition
from schema.table
where cast(snapshot_date as timestamp) > cast('2019-01-01' as date)
) -- CTAS の終了