0

「Spark with Machine learning」という本を参照して学習しています

groupId: org.apache.spark artifactId: spark-core_2.11 バージョン: 2.0.1

JavaApp.java

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.DoubleFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
 * A simple Spark app in Java
 */
public class JavaApp {

    public static void main(String[] args) {
        JavaSparkContext sc = new JavaSparkContext("local[2]", "First Spark App");
        // we take the raw data in CSV format and convert it into a set of records of the form (user, product, price)
        JavaRDD<String[]> data = sc.textFile("data/UserPurchaseHistory.csv")
                .map(new Function<String, String[]>() {
                    @Override
                    public String[] call(String s) throws Exception {
                        return s.split(",");
                    }
                });

        // let's count the number of purchases
        long numPurchases = data.count();

        // let's count how many unique users made purchases
        long uniqueUsers = data.map(new Function<String[], String>() {
            @Override
            public String call(String[] strings) throws Exception {
                return strings[0];
            }
        }).distinct().count();

        // let's sum up our total revenue
        double totalRevenue = data.map(new DoubleFunction<String[]>() {
            @Override
            public double call(String[] strings) throws Exception {
                //double ret=Double.parseDouble(strings[2]);
                //return ret;
                //return Double.parseDouble(strings[2]);
            }
        }).sum();

        // let's find our most popular product
        // first we map the data to records of (product, 1) using a PairFunction
        // and the Tuple2 class.
        // then we call a reduceByKey operation with a Function2, which is essentially the sum function
        List<Tuple2<String, Integer>> pairs = data.map(new PairFunction<String[], String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String[] strings) throws Exception {
                return new Tuple2(strings[1], 1);
            }
        }).reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
            }
        }).collect();

        // finally we sort the result. Note we need to create a Comparator function,
        // that reverses the sort order.
        Collections.sort(pairs, new Comparator<Tuple2<String, Integer>>() {
            @Override
            public int compare(Tuple2<String, Integer> o1, Tuple2<String, Integer> o2) {
                return -(o1._2() - o2._2());
            }
        });
        String mostPopular = pairs.get(0)._1();
        int purchases = pairs.get(0)._2();

        // print everything out
        System.out.println("Total purchases: " + numPurchases);
        System.out.println("Unique users: " + uniqueUsers);
        System.out.println("Total revenue: " + totalRevenue);
        System.out.println(String.format("Most popular product: %s with %d purchases",
                mostPopular, purchases));

    sc.stop();

    }

}

私のpom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>java-spark-app</groupId>
    <artifactId>java-spark-app</artifactId>
    <version>1.0</version>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.0.1</version>
        </dependency>
    </dependencies>

</project>

そのため、mavenを使用してコードをコンパイルしましたが、以下のエラーメッセージを解決できません

[INFO] 2 errors 
[INFO] -------------------------------------------------------------
[INFO] ------------------------------------------------------------------------
[INFO] BUILD FAILURE
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 6.785 s
[INFO] Finished at: 2016-10-10T05:17:42+00:00
[INFO] Final Memory: 34M/777M
[INFO] ------------------------------------------------------------------------
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.1:compile (default-compile) on project java-spark-app: Compilation failure: Compilation failure:
[ERROR] /home/ubuntu/workspace/practice/8519OS_Code/Chapter_01/java-spark-app/src/main/java/JavaApp.java:[40,35] method map in class org.apache.spark.api.java.AbstractJavaRDDLike<T,This> cannot be applied to given types;
[ERROR] required: org.apache.spark.api.java.function.Function<java.lang.String[],R>
[ERROR] found: <anonymous org.apache.spark.api.java.function.DoubleFunction<java.lang.String[]>>
[ERROR] reason: cannot infer type-variable(s) R
[ERROR] (argument mismatch; <anonymous org.apache.spark.api.java.function.DoubleFunction<java.lang.String[]>> cannot be converted to org.apache.spark.api.java.function.Function<java.lang.String[],R>)
[ERROR] /home/ubuntu/workspace/practice/8519OS_Code/Chapter_01/java-spark-app/src/main/java/JavaApp.java:[53,51] method map in class org.apache.spark.api.java.AbstractJavaRDDLike<T,This> cannot be applied to given types;
[ERROR] required: org.apache.spark.api.java.function.Function<java.lang.String[],R>
[ERROR] found: <anonymous org.apache.spark.api.java.function.PairFunction<java.lang.String[],java.lang.String,java.lang.Integer>>
[ERROR] reason: cannot infer type-variable(s) R
[ERROR] (argument mismatch; <anonymous org.apache.spark.api.java.function.PairFunction<java.lang.String[],java.lang.String,java.lang.Integer>> cannot be converted to org.apache.spark.api.java.function.Function<java.lang.String[],R>)
[ERROR] -> [Help 1]
[ERROR] 
[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
[ERROR] 
[ERROR] For more information about the errors and possible solutions, please read the following articles:
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException

この問題は4日間私を悩ませました。私を助けてください :(

4

1 に答える 1

0

参照しているコード サンプルは、古いバージョンの Spark を使用している可能性があります。投稿されたコードを修正しましたが、最新バージョンの Spark で問題なく動作しています。以下のコードをビルドして実行してみてください。

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.DoubleFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

public class JavaApp {
public static void main(String[] args) {
        JavaSparkContext sc = new JavaSparkContext("local[2]", "First Spark App");
        // we take the raw data in CSV format and convert it into a set of records of the form (user, product, price)
        JavaRDD<String[]> data = sc.textFile("data/UserPurchaseHistory.csv")
                .map(new Function<String, String[]>() {
                    @Override
                    public String[] call(String s) throws Exception {
                        return s.split(",");
                    }
                });

        // let's count the number of purchases
        long numPurchases = data.count();

        // let's count how many unique users made purchases
        long uniqueUsers = data.map(new Function<String[], String>() {
            @Override
            public String call(String[] strings) throws Exception {
                return strings[0];
            }
        }).distinct().count();

        // let's sum up our total revenue
        double totalRevenue = data.mapToDouble(new DoubleFunction<String[]>() {
            @Override
            public double call(String[] strings) throws Exception {
                //double ret=Double.parseDouble(strings[2]);
                //return ret;
                return Double.parseDouble(strings[2]);
            }
        }).sum();

        // let's find our most popular product
        // first we map the data to records of (product, 1) using a PairFunction
        // and the Tuple2 class.
        // then we call a reduceByKey operation with a Function2, which is essentially the sum function
        List<Tuple2<String, Integer>> pairs = data.mapToPair(new PairFunction<String[], String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String[] strings) throws Exception {
                return new Tuple2(strings[1], 1);
            }
        }).reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
            }
        }).collect();

        // finally we sort the result. Note we need to create a Comparator function,
        // that reverses the sort order.
        Collections.sort(new ArrayList(pairs), new Comparator<Tuple2<String, Integer>>() {
            @Override
            public int compare(Tuple2<String, Integer> o1, Tuple2<String, Integer> o2) {
                return -(o1._2() - o2._2());
            }
        });
        String mostPopular = pairs.get(0)._1();
        int purchases = pairs.get(0)._2();

        // print everything out
        System.out.println("Total purchases: " + numPurchases);
        System.out.println("Unique users: " + uniqueUsers);
        System.out.println("Total revenue: " + totalRevenue);
        System.out.println(String.format("Most popular product: %s with %d purchases",
                mostPopular, purchases));

    sc.stop();

    }
}
于 2016-10-10T10:35:23.417 に答える