xml - XML / JSON説明プランからクエリの列（およびテーブル）のリストを取得します

Question

私は約持っています。200個のSQLステートメントと、それらの列で使用されている列とテーブルを分析する必要があります。PostgreSQL9.0以降で利用可能なXMLExplainプランがあることがわかりました。

そのプランから使用済みの列とテーブルのリストを取得する方法はわかっていますか？

更新版：

テストデータ

CREATE TABLE tmp.a (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.b (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.c (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.d (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.e (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);

insert into tmp.a values (1,1,1,1,1,1,1,1,1);
insert into tmp.a values (2,1,1,1,1,1,1,1,1);
insert into tmp.a values (3,1,1,1,1,1,1,1,1);
insert into tmp.a values (4,1,1,1,1,1,1,1,1);
insert into tmp.b values (1,1,1,1,1,1,1,1,1);
insert into tmp.b values (2,1,1,1,1,1,1,1,1);
insert into tmp.b values (3,1,1,1,1,1,1,1,1);
insert into tmp.b values (4,1,1,1,1,1,1,1,1);
insert into tmp.c values (1,1,1,1,1,1,1,1,1);
insert into tmp.c values (2,1,1,1,1,1,1,1,1);
insert into tmp.c values (3,1,1,1,1,1,1,1,1);
insert into tmp.c values (4,1,1,1,1,1,1,1,1);
insert into tmp.d values (1,1,1,1,1,1,1,1,1);
insert into tmp.d values (2,1,1,1,1,1,1,1,1);
insert into tmp.d values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (2,1,1,1,1,1,1,1,1);
insert into tmp.e values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (4,1,1,1,1,1,1,1,1);

SQLのサンプルと計画の説明

explain (verbose true, format xml, costs false)
select 
    a.b,
    a.c,
    b.c,
    d.b,
    e.f
from
    tmp.a a
    join tmp.b b using (id)
    join tmp.c c using (id)
    left join tmp.d d on (a.id = d.id)
    left join tmp.e e on (b.id = e.id)
where
    c.d = 1 and (d.f > 0 or e.g is null)

テーブルに保存されたXML結果

create table tmp.file (fcontent text);

insert into tmp.file values ('
    <explain xmlns="http://www.postgresql.org/2009/explain">
      <Query>
        <Plan>
          <Node-Type>Merge Join</Node-Type>
          <Join-Type>Left</Join-Type>
          <Output>
            <Item>a.b</Item>
            <Item>a.c</Item>
            <Item>b.c</Item>
            <Item>d.b</Item>
            <Item>e.f</Item>
          </Output>
          <Merge-Cond>(b.id = e.id)</Merge-Cond>
          <Filter>((d.f &gt; 0) OR (e.g IS NULL))</Filter>
          <Plans>
            <Plan>
              <Node-Type>Merge Join</Node-Type>
              <Parent-Relationship>Outer</Parent-Relationship>
              <Join-Type>Left</Join-Type>
              <Output>
                <Item>a.b</Item>
                <Item>a.c</Item>
                <Item>b.c</Item>
                <Item>b.id</Item>
                <Item>d.b</Item>
                <Item>d.f</Item>
              </Output>
              <Merge-Cond>(a.id = d.id)</Merge-Cond>
              <Plans>
                <Plan>
                  <Node-Type>Sort</Node-Type>
                  <Parent-Relationship>Outer</Parent-Relationship>
                  <Output>
                    <Item>a.b</Item>
                    <Item>a.c</Item>
                    <Item>a.id</Item>
                    <Item>b.c</Item>
                    <Item>b.id</Item>
                  </Output>
                  <Sort-Key>
                    <Item>a.id</Item>
                  </Sort-Key>
                  <Plans>
                    <Plan>
                      <Node-Type>Hash Join</Node-Type>
                      <Parent-Relationship>Outer</Parent-Relationship>
                      <Join-Type>Inner</Join-Type>
                      <Output>
                        <Item>a.b</Item>
                        <Item>a.c</Item>
                        <Item>a.id</Item>
                        <Item>b.c</Item>
                        <Item>b.id</Item>
                      </Output>
                      <Hash-Cond>(b.id = a.id)</Hash-Cond>
                      <Plans>
                        <Plan>
                          <Node-Type>Seq Scan</Node-Type>
                          <Parent-Relationship>Outer</Parent-Relationship>
                          <Relation-Name>b</Relation-Name>
                          <Schema>tmp</Schema>
                          <Alias>b</Alias>
                          <Output>
                            <Item>b.id</Item>
                            <Item>b.b</Item>
                            <Item>b.c</Item>
                            <Item>b.d</Item>
                            <Item>b.e</Item>
                            <Item>b.f</Item>
                            <Item>b.g</Item>
                            <Item>b.h</Item>
                            <Item>b.i</Item>
                          </Output>
                        </Plan>
                        <Plan>
                          <Node-Type>Hash</Node-Type>
                          <Parent-Relationship>Inner</Parent-Relationship>
                          <Output>
                            <Item>a.b</Item>
                            <Item>a.c</Item>
                            <Item>a.id</Item>
                            <Item>c.id</Item>
                          </Output>
                          <Plans>
                            <Plan>
                              <Node-Type>Hash Join</Node-Type>
                              <Parent-Relationship>Outer</Parent-Relationship>
                              <Join-Type>Inner</Join-Type>
                              <Output>
                                <Item>a.b</Item>
                                <Item>a.c</Item>
                                <Item>a.id</Item>
                                <Item>c.id</Item>
                              </Output>
                              <Hash-Cond>(a.id = c.id)</Hash-Cond>
                              <Plans>
                                <Plan>
                                  <Node-Type>Seq Scan</Node-Type>
                                  <Parent-Relationship>Outer</Parent-Relationship>
                                  <Relation-Name>a</Relation-Name>
                                  <Schema>tmp</Schema>
                                  <Alias>a</Alias>
                                  <Output>
                                    <Item>a.id</Item>
                                    <Item>a.b</Item>
                                    <Item>a.c</Item>
                                    <Item>a.d</Item>
                                    <Item>a.e</Item>
                                    <Item>a.f</Item>
                                    <Item>a.g</Item>
                                    <Item>a.h</Item>
                                    <Item>a.i</Item>
                                  </Output>
                                </Plan>
                                <Plan>
                                  <Node-Type>Hash</Node-Type>
                                  <Parent-Relationship>Inner</Parent-Relationship>
                                  <Output>
                                    <Item>c.id</Item>
                                  </Output>
                                  <Plans>
                                    <Plan>
                                      <Node-Type>Seq Scan</Node-Type>
                                      <Parent-Relationship>Outer</Parent-Relationship>
                                      <Relation-Name>c</Relation-Name>
                                      <Schema>tmp</Schema>
                                      <Alias>c</Alias>
                                      <Output>
                                        <Item>c.id</Item>
                                      </Output>
                                      <Filter>(c.d = 1)</Filter>
                                    </Plan>
                                  </Plans>
                                </Plan>
                              </Plans>
                            </Plan>
                          </Plans>
                        </Plan>
                      </Plans>
                    </Plan>
                  </Plans>
                </Plan>
                <Plan>
                  <Node-Type>Sort</Node-Type>
                  <Parent-Relationship>Inner</Parent-Relationship>
                  <Output>
                    <Item>d.b</Item>
                    <Item>d.id</Item>
                    <Item>d.f</Item>
                  </Output>
                  <Sort-Key>
                    <Item>d.id</Item>
                  </Sort-Key>
                  <Plans>
                    <Plan>
                      <Node-Type>Seq Scan</Node-Type>
                      <Parent-Relationship>Outer</Parent-Relationship>
                      <Relation-Name>d</Relation-Name>
                      <Schema>tmp</Schema>
                      <Alias>d</Alias>
                      <Output>
                        <Item>d.b</Item>
                        <Item>d.id</Item>
                        <Item>d.f</Item>
                      </Output>
                    </Plan>
                  </Plans>
                </Plan>
              </Plans>
            </Plan>
            <Plan>
              <Node-Type>Sort</Node-Type>
              <Parent-Relationship>Inner</Parent-Relationship>
              <Output>
                <Item>e.f</Item>
                <Item>e.id</Item>
                <Item>e.g</Item>
              </Output>
              <Sort-Key>
                <Item>e.id</Item>
              </Sort-Key>
              <Plans>
                <Plan>
                  <Node-Type>Seq Scan</Node-Type>
                  <Parent-Relationship>Outer</Parent-Relationship>
                  <Relation-Name>e</Relation-Name>
                  <Schema>tmp</Schema>
                  <Alias>e</Alias>
                  <Output>
                    <Item>e.f</Item>
                    <Item>e.id</Item>
                    <Item>e.g</Item>
                  </Output>
                </Plan>
              </Plans>
            </Plan>
          </Plans>
        </Plan>
      </Query>
    </explain>
    ');

説明計画の項目

with elements as (
    SELECT trim(a[rn]) AS elem, rn
    FROM   (
        SELECT *, generate_series(1, array_upper(a, 1)) AS rn
        FROM  (
        SELECT string_to_array(fcontent, chr(10)) AS a
        FROM   tmp.file
        ) x
        ) y
    )
select 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from 
    elements where elem like '<Item>%' 
group by 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )

Itemタグには25列あります。ただし、このクエリを実行するために必要なのは13個だけですa.b, a.c, b.c, d.b, e.f, a.id, b.id, c.id, d.id, e.id, c.d, d.f, e.g。説明計画からこれらの列だけを取得する方法はありますか？

元のバージョン

たとえば、私は次のクエリを持っています（説明のためにもっと、それを理解する必要はありません）：

select
    dd.id_databox_data,
    dd.recipient_id,
    dd.sender_id,
    lp.business_name,
    fa.repayment_identification,
    perform_time,
    dd.subject as dd_subject,
    ca.subject as ca_subject,
    d.unique_name,
    s.name,
    s.long_name,
    lld.legal_template,
    lld.issue_date,
    ca.perform_time,
    dd.id_recipient_document_ident,
    dd.id_sender_document_ident,
    dci1.ref_number,    
    dci2.ref_number     
from 
    databox_data as dd 
    join databox_data_attachments as dda on (dd.id_databox_data = dda.id_databox_data)
    join databox_attachment as da on (da.id_databox_attachment = dda.id_databox_attachment)
    join document as d on (d.id_document = da.id_document)
    join external_file_letter_data as fld on (fld.id_document = d.id_document)
    join letter_data as ld on (ld.id_letter_data = fld.id_letter_data)
    join legal_letter_data lld on (ld.id_letter_data = lld.id_letter_data)
    join legal_instrument li using (id_legal_instrument)
    left join execution e using (id_legal_instrument)
    join v_communication_act as ca on (ca.id_letter_data = ld.id_letter_data)
    join solver s using (id_solver)
    join responsibility as r on (r.id_responsibility = ca.id_related_responsibility)
    join party pr on (r.id_responsible = pr.id_party)
    join financial_accountability fa using (id_accountability)
    join flight f using (id_flight)
    join portfolio p using (id_portfolio)
    join legal_person lp on (pr.id_source = lp.id_party)
    left join v_authority va on (dd.recipient_id = va.data_box_id) 
    left join databox_document_ident dci1 on (dd.id_recipient_document_ident = dci1.id_databox_document_ident)
    left join databox_document_ident dci2 on (dd.id_recipient_document_ident = dci2.id_databox_document_ident)      
where
    ca.perform_time > (Now()::date - 1)
    and s.id_solver = 41

私はを使用してexplain (verbose true, format xml, costs false)います。

これにより、次の説明プラン（XMLバージョン）が作成されます。残念ながら、説明プラン全体を挿入することはできません（投稿の長さに関するSOの制限）。完全な説明プランバージョンが必要な場合は、このペーストビンを使用してください。

<explain xmlns="http://www.postgresql.org/2009/explain">
  <Query>
    <Plan>
      <Node-Type>Nested Loop</Node-Type>
      <Join-Type>Left</Join-Type>
      <Output>
        <Item>dd.id_databox_data</Item>
        <Item>dd.recipient_id</Item>
        <Item>dd.sender_id</Item>
        <Item>lp.business_name</Item>
        <Item>fa.repayment_identification</Item>
        <Item>act.perform_time</Item>
        <Item>dd.subject</Item>
        <Item>communication_act.subject</Item>
        <Item>d.unique_name</Item>
        <Item>s.name</Item>
        <Item>s.long_name</Item>
        <Item>lld.legal_template</Item>
        <Item>lld.issue_date</Item>
        <Item>act.perform_time</Item>
        <Item>dd.id_recipient_document_ident</Item>
        <Item>dd.id_sender_document_ident</Item>
        <Item>dci1.ref_number</Item>
        <Item>dci2.ref_number</Item>
      </Output>
      <Plans>
        <Plan>
          <Node-Type>Nested Loop</Node-Type>
          <Parent-Relationship>Outer</Parent-Relationship>
          <Join-Type>Left</Join-Type>
          <Output>
            <Item>dd.id_databox_data</Item>
            <Item>dd.recipient_id</Item>
            <Item>dd.sender_id</Item>
            <Item>dd.subject</Item>
            <Item>dd.id_recipient_document_ident</Item>
            <Item>dd.id_sender_document_ident</Item>
            <Item>d.unique_name</Item>
            <Item>lld.legal_template</Item>
            <Item>lld.issue_date</Item>
            <Item>communication_act.subject</Item>
            <Item>act.perform_time</Item>
            <Item>s.name</Item>
            <Item>s.long_name</Item>
            <Item>fa.repayment_identification</Item>
            <Item>lp.business_name</Item>
            <Item>dci1.ref_number</Item>
          </Output>
          <Plans>
            <Plan>
              <Node-Type>Nested Loop</Node-Type>
              <Parent-Relationship>Outer</Parent-Relationship>
              <Join-Type>Inner</Join-Type>
              <Output>
                <Item>dd.id_databox_data</Item>
                <Item>dd.recipient_id</Item>
                <Item>dd.sender_id</Item>
                <Item>dd.subject</Item>
                <Item>dd.id_recipient_document_ident</Item>
                <Item>dd.id_sender_document_ident</Item>
                <Item>d.unique_name</Item>
                <Item>lld.legal_template</Item>
                <Item>lld.issue_date</Item>
                <Item>communication_act.subject</Item>
                <Item>act.perform_time</Item>
                <Item>s.name</Item>
                <Item>s.long_name</Item>
                <Item>fa.repayment_identification</Item>

そのような計画から使用済みの列とテーブルのリストを取得する方法（できればSQLの方法）はありますか？行をクエリするだけでは不十分です<Item>。テーブルがExplainプランに最初に表示されるとき（最下位レベル）、<Item>クエリを完了するために多くの列は必要ありませんが、すべての列がタグにリストされます。

次のSQLを使用して、一意の<Item>タグを一覧表示しました。

with elements as (
    SELECT trim(a[rn]) AS elem, rn
    FROM   (
        SELECT *, generate_series(1, array_upper(a, 1)) AS rn
        FROM  (
        SELECT string_to_array(fcontent, chr(10)) AS a
        FROM   tmp.file
        ) x
        ) y
    )
select 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from 
    elements where elem like '<Item>%' 
group by 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )

score 1 · Accepted Answer

XML の 3 つの領域を解析する必要があるように思えます。

クエリから返される列は、 (タグoutput内の) 最初の要素の下になります。item

結合の各サブノードにはoutput、プランナーが使用する必要があると考える列で構成されるがあります。結合列はここになります。これによりスーパーセットが得られる場合は、ここで情報を取得するためにの内容を解析する必要がある場合がありjoin-filterます。

これらの 2 つは、必要なすべてを提供するわけではありません。filterそこから列を引き出すには、要素を解析する必要があります。

これを SQL で行う場合は、PostgreSQLのxml 関数を確認する必要があります。PostgreSQL の xml で xpath クエリを実行するか、xslt を介して実行できます。これは、正規表現で解析しようとするよりもはるかに優れています。

残念ながら、これの複雑さは実用的な例を少し超えていますが、これがあなたが始めるのに十分であることを願っています.

xml - XML / JSON説明プランからクエリの列（およびテーブル）のリストを取得します

1 に答える 1

Related

Reference