0

EMR PrestoDB で実行しようとしているクエリがあります。s3 グルー ORC テーブルを読み取っています。「クエリに失敗しました: ステートメントが大きすぎます (分析中のスタック オーバーフロー)」というエラーが表示されますが、クエリ自体はそれほど長くなく、1364 行しかなく、実際には Athena で正常に実行されます。 PrestoDB で直接、失敗することを確認します。

クエリは 1364 行で、144 KB です (オブジェクトの一部を匿名化する必要があったため、以下の例は短くなっています)。

プロキシ ジャンプ SSH 経由で Datagrip IDE から実行しています。httpの問題があるのではないかと思っていますか?

以下はクエリの例ですが、StackOverflow には 30000 文字の制限があるため、クエリ自体は長くなります。

create
table new_schema.test
with (format = 'ORC')
as (
        select distinct
            sr2_ln
            , sr3_ln
            , sr4_ln
            , sr5_ln
            , sr2_lin
            , sr3_lin
            , sr4_lin
            , sr5_lin
            , is_t1_t3
            , is_t4_t7
            , is_t8
            , is_t10
            , egroup
            , is_f
            , is_v
            , is_d
            , is_t
            , is_b
            , ry
            , snapshot_month_int
            , snapshot_year

            --Window partitions
            , (
                coalesce(sr2_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol2_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol3_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol4_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(sr5_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(is_f,'_')
                || coalesce(is_v,'_')
                || coalesce(is_d,'_')
                || coalesce(is_t, '_')
                || coalesce(is_b, '_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as e_all_up_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(sr5_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(egroup,'_')
                || coalesce(is_v,'_')
                || coalesce(is_d,'_')
                || coalesce(is_t, '_')
                || coalesce(is_b, '_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as f_all_up_window_partition

            --b metrics
            , b_count as b_current_month
            , sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) as b_1_month_prior_original
            , sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) as b_1_year_prior
            , case
                when snapshot_month_int = 1 then b_count
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                end as b_avg_ytd
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
                end as b_avg_ly_ytd
            , avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row) as b_avg_ttm_as_of_snapshot_date
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                end as b_end_of_ly
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                when snapshot_month_int = 4 then b_count
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                end as b_avg_ytd_for_rt
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
                end as b_avg_ly_ytd_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 21 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 22 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 23 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 13 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 14 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 15 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 16 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 17 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 18 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 19 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 20 preceding)
                end as b_beg_of_ly_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                end as b_end_of_ly_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 4 then b_count
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                end as b_beg_of_ty_for_rt

            , case when is_t = 'Y' then b_count end as t_b_current_month
            , case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) end as t_b_1_month_prior
            , case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) end as t_b_1_year_prior
            , case
                when snapshot_month_int = 1 and is_t = 'Y' then b_count
                when snapshot_month_int = 2 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 3 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 4 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 5 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 6 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 7 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 8 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 9 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                when snapshot_month_int = 10 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 11 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 12 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                end as t_b_avg_ytd
            
            , snapshot_date
            , snapshot_date as snapshot_date_partition

        from schema.table 
        where cast(snapshot_date as timestamp) > cast('2019-01-01' as date)

) -- CTAS の終了

4

0 に答える 0