hadoop - シリアル化でAvroNullPointerExceptionを使用するMRUnit

Question

MRUnitを使用してHadoop.mapreduceAvroジョブをテストしようとしています。以下に示すように、NullPointerExceptionを受け取ります。pomとソースコードの一部を添付しました。どんな援助もいただければ幸いです。

ありがとう

私が得ているエラーは次のとおりです：

java.lang.NullPointerException
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:73)
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:91)
at org.apache.hadoop.mrunit.internal.io.Serialization.copyWithConf(Serialization.java:104)
at org.apache.hadoop.mrunit.TestDriver.copy(TestDriver.java:608)
at org.apache.hadoop.mrunit.MapDriverBase.setInputKey(MapDriverBase.java:64)
at org.apache.hadoop.mrunit.MapDriverBase.setInput(MapDriverBase.java:104)
at org.apache.hadoop.mrunit.MapDriverBase.withInput(MapDriverBase.java:218)
at org.lab41.project.mapreduce.ParseMetadataAsTextIntoAvroTest.testMap(ParseMetadataAsTextIntoAvroTest.java:115)
.....

pomスニペット：

<dependency>
    <groupId>org.apache.mrunit</groupId>
    <artifactId>mrunit</artifactId>
    <version>0.9.0-incubating</version>
    <classifier>hadoop2</classifier>
    <scope>test</scope>
</dependency>


<avro.version>1.7.4</avro.version>
<hadoop.version>2.0.0-mr1-cdh4.1.3</hadoop.version>

<dependency>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro</artifactId>
    <version>${avro.version}</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>${hadoop.version}</version>
    <scope>provided</scope>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-core</artifactId>
    <version>${hadoop.version}</version>
    <scope>provided</scope>
 </dependency>
 <dependency>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro-mapred</artifactId>
    <version>${avro.version}</version>
    <classifier>hadoop2</classifier>
 </dependency>

これがテストの抜粋です：

import static org.junit.Assert.*;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.apache.avro.mapred.AvroKey;
import org.apache.avro.hadoop.io.AvroSerialization;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.lab41.project.domain.DataRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParseMetadataAsTextIntoAvroTest {

    Logger logger = LoggerFactory
            .getLogger(ParseMetadataAsTextIntoAvroTest.class);
    private MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>> mapDriver;

    @BeforeClass
    public static void setUpClass() {
    }

    @AfterClass
    public static void tearDownClass() {
    }

    @Before
    public void setUp() throws IOException {
        ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();

        mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>>();
        mapDriver.setMapper(mapper);
        mapDriver.getConfiguration().setStrings("io.serializations", new String[]{  
            AvroSerialization.class.getName()
        });
    }

    @Test
    public void testMap() throws ParseException, IOException {
        Text testInputText = new Text(test0);

        DataRecord record = new DataRecord();
       ….

        AvroKey<Long> expectedPivot = new AvroKey<Long>(1L);
        AvroValue<DataRecord> expectedRecord = new AvroValue<DataRecord>(record);

        mapDriver.withInput(new Pair<LongWritable, Text>(new LongWritable(1), testInputText));
        mapDriver.withOutput(new Pair<AvroKey<Long>, AvroValue<DataRecord>>(expectedPivot, expectedRecord));
        mapDriver.runTest();

    }
}

score 10 · Accepted Answer

これを機能させるAvroSerializatioには、デフォルトのserailizationsにを追加する必要があります。また、を構成する必要がありますAvroSerializationn。

 @Before
public void setUp() throws IOException {
    ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
    mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<NetworkRecord>>();
    mapDriver.setMapper(mapper);

    //Copy over the default io.serializations. If you don't do this then you will 
    //not be able to deserialize the inputs to the mapper
    String[] strings = mapDriver.getConfiguration().getStrings("io.serializations");
    String[] newStrings = new String[strings.length +1];
    System.arraycopy( strings, 0, newStrings, 0, strings.length );
    newStrings[newStrings.length-1] = AvroSerialization.class.getName();

    //Now you have to configure AvroSerialization by sepecifying the key
    //writer Schema and the value writer schema.
    mapDriver.getConfiguration().setStrings("io.serializations", newStrings);
    mapDriver.getConfiguration().setStrings("avro.serialization.key.writer.schema", Schema.create(Schema.Type.LONG).toString(true));
    mapDriver.getConfiguration().setStrings("avro.serialization.value.writer.schema", NetworkRecord.SCHEMA$.toString(true));
}

score 4 · Accepted Answer

これにより、コードが短く明確になるというメリットもあり、問題も解決します。

        MapDriver driver = MapDriver.newMapDriver(your mapper);

        Configuration conf = driver.getConfiguration();
        AvroSerialization.addToConfiguration(conf);
        AvroSerialization.setKeyWriterSchema(conf, your schema);
        AvroSerialization.setKeyReaderSchema(conf, your schema);
        Job job = new Job(conf);
        job.set... your job settings;
        AvroJob.set... your avro job settings;

io.serializationsを正しく設定しないのはmrunitのバグかもしれません。代わりに、job.setInputFormatClass（AvroKeyInputFormat.class）によって設定されているはずです。

score 0 · Accepted Answer

AvroSerializationデフォルトのシリアル化に追加して、を構成する必要がありますAvroSerialization。

@Before
public void setUp() throws IOException {
    ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
    mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<NetworkRecord>>();
    mapDriver.setMapper(mapper);
    Configuration configuration = mapDriver.getConfiguration();

    // Add AvroSerialization to the configuration
    // (copy over the default serializations for deserializing the mapper inputs)
    String[] serializations = configuration.getStrings(CommonConfigurationKeysPublic.IO_SERIALIZATIONS_KEY);
    String[] newSerializations = Arrays.copyOf(serializations, serializations.length + 1);
    newSerializations[serializations.length] = AvroSerialization.class.getName();
    configuration.setStrings(CommonConfigurationKeysPublic.IO_SERIALIZATIONS_KEY, newSerializations);

    //Configure AvroSerialization by specifying the key writer and value writer schemas
    AvroSerialization.setKeyWriterSchema(configuration, Schema.create(Schema.Type.LONG));
    AvroSerialization.setValueWriterSchema(configuration, NetworkRecord.SCHEMA$)
}

score -1 · Accepted Answer

ここで回答：https ：//issues.apache.org/jira/browse/MRUNIT-181具体的には：https ：//cwiki.apache.org/confluence/display/MRUNIT/MRUnit+with+Avro

hadoop - シリアル化でAvroNullPointerExceptionを使用するMRUnit

4 に答える 4

Related

Reference