0

外部 MapReduce ジョブを実行するために、いくつかの Pig 変数を Hadoop SequenceFile に保存したいと考えています。

データに (chararray, int) スキーマがあるとします。

(hello,1)
(test,2)
(example,3)

私はこの保存関数を書きました:

import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.pig.StoreFunc;
import org.apache.pig.data.Tuple;


public class StoreTest extends StoreFunc {

    private String storeLocation;
    private RecordWriter writer;
    private Job job;

    public StoreTest(){

    }

    @Override
    public OutputFormat getOutputFormat() throws IOException {
        //return new TextOutputFormat();
        return new SequenceFileOutputFormat();
    }

    @Override
    public void setStoreLocation(String location, Job job) throws IOException {
        this.storeLocation = location;
        this.job = job;
        System.out.println("Load location is " + storeLocation);
        FileOutputFormat.setOutputPath(job, new Path(location));        
        System.out.println("Out path " + FileOutputFormat.getOutputPath(job));
    }

    @Override
    public void prepareToWrite(RecordWriter writer) throws IOException {
        this.writer = writer;
    }

    @Override
    public void putNext(Tuple tuple) throws IOException {
        try {
            Text k = new Text(((String)tuple.get(0)));
            IntWritable v = new IntWritable((Integer)tuple.get(1));
            writer.write(k, v);
        } catch (InterruptedException ex) {
            Logger.getLogger(StoreTest.class.getName()).log(Level.SEVERE, null, ex);
        }

    }
}

そして、この豚のコード:

register MyUDFs.jar;
x = load '/user/pinoli/input' as (a:chararray,b:int);
store x into '/user/pinoli/output/' using StoreTest(); 

ただし、保存に失敗し、次のエラーが発生します。

ERROR org.apache.pig.tools.pigstats.PigStats - ERROR 0: java.io.IOException: wrong key class: org.apache.hadoop.io.Text is not class org.apache.hadoop.io.LongWritable

それを修正する方法はありますか??

4

1 に答える 1