AWS S3 の場所を指すベースの場所を持つハイブ テーブルを作成しました。ただし、'Insert Overwrite' クエリを使用して HDFS クラスターにパーティションを作成したいと考えています。
以下の手順:
-- Create intermediate table
create table test_int_ash
( loc string)
partitioned by (name string, age int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
stored as textfile
location '/user/ash/test_int';
-- Insert into intermedate table with two names 'rash' and 'nash'
INSERT INTO test_int_ash partition (name="rash",age=20) values ('brisbane');
INSERT INTO test_int_ash partition (name="rash",age=30) values ('Sydney');
INSERT INTO test_int_ash partition (name="rash",age=40) values ('Melbourne');
INSERT INTO test_int_ash partition (name="rash",age=50) values ('Perth');
INSERT INTO test_int_ash partition (name="nash",age=50) values ('Auckland');
INSERT INTO test_int_ash partition (name="nash",age=40) values ('Wellington');
-- create curated table
create external table test_curated_ash
( loc string)
partitioned by (name string, age int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
stored as textfile
location 's3a://mybucket/tmp/test_curated/';
-- load curated table from intermedate table, using dynamic partition method, creates partitions on aws s3.
insert overwrite table test_curated_ash partition(name='rash',age)
select loc,age from test_int_ash where name='rash' ;
-- I want to keep this partition on HDFS cluster, below query doesnt work
insert overwrite table test_curated_ash partition(name='nash',age) location 'hdfs://mynamenode/user/ash/test_curated_new'
select loc,age from test_int_ash where name='nash';
以下のクエリは機能しますが、「静的パーティション」メソッドで処理したくありません。
alter table test_curated_ash add partition(name='nash',age=40) location 'hdfs://swmcdh1/user/contexti/ash/test_curated_new/name=nash/age=40';
alter table test_curated_ash add partition(name='nash',age=50) location 'hdfs://swmcdh1/user/contexti/ash/test_curated_new/name=nash/age=50';
insert overwrite table test_curated_ash partition(name='nash',age)
select loc,age from test_int_ash where name='nash'
「挿入上書き」動的クエリでパーティションの場所を設定する方法を教えてください。