3

map reduce の例を実行したい:

    package my.test;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;




    /**
     * This class demonstrates the use of the MultiTableOutputFormat class.
     * Using this class we can write the output of a Hadoop map reduce program
     * into different HBase table.
     *
     * @version 1.0 19 Jul 2011
     * @author  Wildnove
     */
    public class TestMultiTable extends Configured implements Tool {

        private static final Logger LOG = Logger.getLogger(TestMultiTable.class);
        private static final String CMDLINE = "com.wildnove.tutorial.TestMultiTable <inputFile> [-n name] [-s]";

        public static void main(String[] args) throws Exception {
            int res = ToolRunner.run(new TestMultiTable(), args);
            System.exit(res);
        }

        @Override
        public int run(String[] args) throws Exception {
            HelpFormatter help = new HelpFormatter();
            Options options = new Options();
            options.addOption("h", "help", false, "print program usage");
            options.addOption("n", "name", true, "sets job name");
            CommandLineParser parser = new BasicParser();
            CommandLine cline;
            try {
                cline = parser.parse(options, args);
                args = cline.getArgs();
                if (args.length < 1) {
                    help.printHelp(CMDLINE, options);
                    return -1;
                }
            } catch (ParseException e) {
                System.out.println(e);
                e.printStackTrace();
                help.printHelp(CMDLINE, options);
                return -1;
            }

            String name = null;
            try {
                if (cline.hasOption('n'))
                    name = cline.getOptionValue('n');
                else
                    name = "wildnove.com - Tutorial MultiTableOutputFormat ";
                Configuration conf = getConf();
                FileSystem fs = FileSystem.get(conf);
                Path inputFile = new Path(fs.makeQualified(new Path(args[0])).toUri().getPath());
                if (!getMultiTableOutputJob(name, inputFile).waitForCompletion(true))
                    return -1;
            } catch (Exception e) {
                System.out.println(e);
                e.printStackTrace();
                help.printHelp(CMDLINE, options);
                return -1;
            }
            return 0;
        }

        /**
         * Here we configure our job to use MultiTableOutputFormat class as map reduce output.
         * Note that we use 1 reduce only for debugging purpose, but you can use more than 1 reduce.
         */
        private Job getMultiTableOutputJob(String name, Path inputFile) throws IOException {
            if (LOG.isInfoEnabled()) {
                LOG.info(name + " starting...");
                LOG.info("computing file: " + inputFile);
            }
            Job job = new Job(getConf(), name);
            job.setJarByClass(TestMultiTable.class);
            job.setMapperClass(Mapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            FileInputFormat.addInputPath(job, inputFile);
            job.setOutputFormatClass(MultiTableOutputFormat.class);
            job.setNumReduceTasks(1);
            job.setReducerClass(Reducer.class);

            return job;
        }

        private static class Mapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, Text> {

            private Text outKey = new Text();
            private Text outValue = new Text();

            /**
             * The map method splits the csv file according to this structure
             * brand,model,size (e.g. Cadillac,Seville,Midsize) and output all data using
             * brand as key and the couple model,size as value.
             */
            @Override
            public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                String[] valueSplitted = value.toString().split(",");
                if (valueSplitted.length == 3) {
                    String brand = valueSplitted[0];
                    String model = valueSplitted[1];
                    String size = valueSplitted[2];

                    outKey.set(brand);
                    outValue.set(model + "," + size);
                    context.write(outKey, outValue);
                }
            }
        }

        private static class Reducer extends org.apache.hadoop.mapreduce.Reducer<Text, Text, ImmutableBytesWritable, Writable> {

            /**
             * The reduce method fill the TestCars table with all csv data,
             * compute some counters and save those counters into the TestBrandsSizes table.
             * So we use two different HBase table as output for the reduce method.
             */
            @Override
            protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
                Map<String, Integer> statsSizeCounters = new HashMap<String, Integer>();
                String brand = key.toString();
                // We are receiving all models,size grouped by brand.
                for (Text value : values) {
                    String[] valueSplitted = value.toString().split(",");
                    if (valueSplitted.length == 2) {
                        String model = valueSplitted[0];
                        String size = valueSplitted[1];

                        // Fill the TestCars table
                        ImmutableBytesWritable putTable = new ImmutableBytesWritable(Bytes.toBytes("TestCars"));
                        byte[] putKey = Bytes.toBytes(brand + "," + model);
                        byte[] putFamily = Bytes.toBytes("Car");
                        Put put = new Put(putKey);
                        // qualifier brand
                        byte[] putQualifier = Bytes.toBytes("brand");
                        byte[] putValue = Bytes.toBytes(brand);
                        put.add(putFamily, putQualifier, putValue);
                        // qualifier model
                        putQualifier = Bytes.toBytes("model");
                        putValue = Bytes.toBytes(model);
                        put.add(putFamily, putQualifier, putValue);
                        // qualifier size
                        putQualifier = Bytes.toBytes("size");
                        putValue = Bytes.toBytes(size);
                        put.add(putFamily, putQualifier, putValue);
                        context.write(putTable, put);

                        // Compute some counters: number of different sizes for a brand
                        if (!statsSizeCounters.containsKey(size))
                            statsSizeCounters.put(size, 1);
                        else
                            statsSizeCounters.put(size, statsSizeCounters.get(size) + 1);
                    }
                }

                for (Entry<String, Integer> entry : statsSizeCounters.entrySet()) {
                    // Fill the TestBrandsSizes table
                    ImmutableBytesWritable putTable = new ImmutableBytesWritable(Bytes.toBytes("TestBrandsSizes"));
                    byte[] putKey = Bytes.toBytes(brand);
                    byte[] putFamily = Bytes.toBytes("BrandSizes");
                    Put put = new Put(putKey);
                    // We can use as qualifier the sizes
                    byte[] putQualifier = Bytes.toBytes(entry.getKey());
                    byte[] putValue = Bytes.toBytes(entry.getValue());
                    put.add(putFamily, putQualifier, putValue);
                    context.write(putTable, put);
                }
            }
        }
    }

Eclipseオプションを使用してjar mt.jarにビルドします:jarファイル

mapreduce を実行します。

[zhouhh@Hadoop48 ~]$ HADOOP_CLASSPATH= ${HBASE_HOME}/bin/hbase classpath:${HADOOP_HOME}/bin/hadoop classpath ${HADOOP_HOME}/bin/hadoop jar mt.jar cars.csv 12/06/11 20:14:33 INFO test.TestMultiTable: wildnove.com - チュートリアル MultiTableOutputFormat 開始... 12/06/11 20:14:33 INFO test.TestMultiTable: ファイルの計算: /user/zhouhh/cars.csv 12/06/11 20:14:34 INFO input.FileInputFormat: 処理する合計入力パス: 1 12/06/11 20:14:34 INFO util .NativeCodeLoader: ネイティブ Hadoop ライブラリをロードしました 12/06/11 20:14:34 警告 snappy.LoadSnappy: Snappy ネイティブ ライブラリがロードされていません 12/06/11 20:14:35 情報 mapred.JobClient: 実行中のジョブ: job_201206111811_0012 12/ 06/11 20:14:36 INFO mapred.JobClient: map 0% reduce 0% 12/06/11 20:14:42 INFO mapred.JobClient: Task Id: attempts_201206111811_0012_m_000002_0, Status: FAILED java.lang.RuntimeException: java. lang.ClassNotFoundException: org.apache.hadoop.hbase.mapreduce.org.apache.hadoop.conf.Configuration.getClass(Configuration.java:867) の MultiTableOutputFormat org.apache.hadoop.mapreduce.JobContext.getOutputFormatClass(JobContext.java:235) org.apache.hadoop.mapred.Task. initialize(Task.java:513) で org.apache.hadoop.mapred.MapTask.run(MapTask.java:353) で org.apache.hadoop.mapred.Child$4.run(Child.java:255) で java. security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache. hadoop.mapred.Child.main(Child.java:249) 原因: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat at java.net.URLClassLoader$1。run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) ) java.lang.ClassLoader.loadClass(ClassLoader.java:423) で sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) で java.lang.ClassLoader.loadClass(ClassLoader.java:356) でorg.apache.hadoop.conf の java.lang.Class.forName(Class.java:264) の .lang.Class.forName0(ネイティブ メソッド) org.apache.hadoop の Configuration.getClassByName(Configuration.java:820) .conf.Configuration.getClass(Configuration.java:865)doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java: 308) で java.lang.ClassLoader.loadClass(ClassLoader.java:356) で java.lang.Class.forName0(Native Method) で java.lang.Class.forName(Class.java:264) で org.apache.hadoop org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865) の .conf.Configuration.getClassByName(Configuration.java:820)doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java: 308) で java.lang.ClassLoader.loadClass(ClassLoader.java:356) で java.lang.Class.forName0(Native Method) で java.lang.Class.forName(Class.java:264) で org.apache.hadoop org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865) の .conf.Configuration.getClassByName(Configuration.java:820)org.apache.hadoop.conf.Configuration.getClassByName(Configuration. java:820) org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865) でorg.apache.hadoop.conf.Configuration.getClassByName(Configuration. java:820) org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865) で

車.csv:

[zhouhh@Hadoop48 ~]$ cat cars.csv Acura,Integra,Small Acura,Legend,Midsize Audi,90,Compact Audi,100,Midsize BMW,535i,Midsize Buick,Century,Midsize Buick,LeSabre,Large Buick,Roadmaster,大型ビュイック、リビエラ、中型キャデラック、デビル、大型キャデラック、セビリア、中型

MultiTableOutputFormat.class は Hbase.0.94.jar にあります

[zhouhh@Hadoop48 ~]$ echo $HADOOP_CLASSPATH |tr ':' '\n' | grep hbase /home/zhouhh/hbase-0.94.0/conf /home/zhouhh/hbase-0.94.0 /home/zhouhh/hbase-0.94.0/hbase-0.94.0.jar /home/zhouhh/hbase-0.94 .0/hbase-0.94.0-tests.jar /home/zhouhh/hbase-0.94.0/lib/activation-1.1.jar /home/zhouhh/hbase-0.94.0/lib/asm-3.1.jar /home /zhouhh/hbase-0.94.0/lib/avro-1.5.3.jar /home/zhouhh/hbase-0.94.0/lib/avro-ipc-1.5.3.jar /home/zhouhh/hbase-0.94.0 /lib/commons-beanutils-1.7.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-beanutils-core-1.8.0.jar /home/zhouhh/hbase-0.94.0/lib/commons -cli-1.2.jar /home/zhouhh/hbase-0.94.0/lib/commons-codec-1.4.jar /home/zhouhh/hbase-0.94.0/lib/commons-collections-3.2.1.jar /home /zhouhh/hbase-0.94.0/lib/commons-configuration-1.6.jar /home/zhouhh/hbase-0.94.0/lib/commons-digester-1.8.

私は多くの方法を試しましたが、同じエラーがまだあります。

誰でも私を助けることができますか?ありがとう

4

4 に答える 4

4

2 つの簡単なオプションがあります。

mt.jar1)ファイルに含まれるファットジャーを作成しますhbase-0.94.0.jar (で実行できますmvn package -Dfatjar

2)GenericOptionsParser(実装してしようとしていると思います)を使用Toolし、コマンドラインで -libjars パラメータを指定します。

于 2012-06-12T15:31:52.157 に答える
1
 `hadoop classpath`

`hbase classpath` 

これを HADOOP_CLASSPATH にエクスポートするクラスタ クラスパスを指定します。(クラスターのローカル環境を利用する標準的な方法です)。

  • 上記を変数にキャプチャし、Linux コマンドを使用してフォーマットする必要があります。-libjars探しているjarが見つからない場合は、mapreduceのオプションに追加してください。
于 2015-11-13T12:41:06.787 に答える
0

次のスクリプトを使用して、ジョブの依存関係を lib フォルダーに追加し、hbase の依存関係をジョブのクラスパスに追加しています。

cp=$(find `pwd` -name '*.jar' | tr '\n', ',')
cp=$cp$(hbase mapredcp 2>&1 | tail -1 | tr ':' ',')
export HADOOP_CLASSPATH=`echo ${cp} | sed s/,/:/g`
hadoop jar `pwd`/bin/mr.jar \
--libjars ${cp} \
$@
于 2015-07-16T14:31:31.970 に答える