Unable to find encoder for type stored in a Dataset.--万峥，MangoCool|芒果酷，永不止步！

Unable to find encoder for type stored in a Dataset.

2016-10-28 09:28:54 作者：MangoCool 来源：MangoCool

在参考spark官网编写ALS例子时报错：

Error:(36, 11) Unable to find encoder for type stored in a Dataset.  Primitive types (Int, String, etc) and Product types (case classes) are supported by importing spark.implicits._  Support for serializing other types will be added in future releases.
      .map(parseRating)
Error:(36, 11) not enough arguments for method map: (implicit evidence$7: org.apache.spark.sql.Encoder[com.dtxy.xbdp.test.ALSTest.Rating])org.apache.spark.sql.Dataset[com.dtxy.xbdp.test.ALSTest.Rating].
Unspecified value parameter evidence$7.
      .map(parseRating)

代码：

package com.dtxy.xbdp.test

import org.apache.spark.SparkConf
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.sql.SparkSession

/**
  * Created by MANGOCOOL on 2016/10/27.
  */
object ALSTest {

  System.setProperty("hadoop.home.dir", "E:\\Program Files\\hadoop-2.7.0")

  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf().setAppName("ALS with ML Pipeline")
    val spark = SparkSession
      .builder()
      .config(sparkConf)
      .master("local")
      .config("spark.sql.warehouse.dir","/")
      .getOrCreate()

    case class Rating(userId: Int, movieId: Int, rating: Float)
    def parseRating(str: String): Rating = {
      val fields = str.split("::")
      assert(fields.size == 3)
      Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat)
    }

    val ratings = spark.read.textFile("hdfs://masters/test/movielens.txt")
      .map(parseRating)
      .toDF()
    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))

    // Build the recommendation model using ALS on the training data
    val als = new ALS()
      .setMaxIter(5)
      .setRegParam(0.01)
      .setUserCol("userId")
      .setItemCol("movieId")
      .setRatingCol("rating")
    val model = als.fit(training)

    // Evaluate the model by computing the RMSE on the test data
    val predictions = model.transform(test)

    val evaluator = new RegressionEvaluator()
      .setMetricName("rmse")
      .setLabelCol("rating")
      .setPredictionCol("prediction")
    val rmse = evaluator.evaluate(predictions)
    println(s"Root-mean-square error = $rmse")
  }

}

解决办法：

在报错位置增加：import spark.implicits._

并将class Rating置为全局的，代码如下：

package com.dtxy.xbdp.test

import org.apache.spark.SparkConf
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.sql.SparkSession

/**
  * Created by MANGOCOOL on 2016/10/27.
  */
object ALSTest {

  System.setProperty("hadoop.home.dir", "E:\\Program Files\\hadoop-2.7.0")
  case class Rating(userId: Int, movieId: Int, rating: Float)

  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf().setAppName("ALS with ML Pipeline")
    val spark = SparkSession
      .builder()
      .config(sparkConf)
      .master("local")
      .config("spark.sql.warehouse.dir","/")
      .getOrCreate()

//    case class Rating(userId: Int, movieId: Int, rating: Float)
    import spark.implicits._
    def parseRating(str: String): Rating = {
      val fields = str.split("::")
      assert(fields.size == 3)
      Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat)
    }

    val ratings = spark.read.textFile("hdfs://masters/test/movielens.txt")
      .map(parseRating)
      .toDF()
    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))

    // Build the recommendation model using ALS on the training data
    val als = new ALS()
      .setMaxIter(5)
      .setRegParam(0.01)
      .setUserCol("userId")
      .setItemCol("movieId")
      .setRatingCol("rating")
    val model = als.fit(training)

    // Evaluate the model by computing the RMSE on the test data
    val predictions = model.transform(test)

    val evaluator = new RegressionEvaluator()
      .setMetricName("rmse")
      .setLabelCol("rating")
      .setPredictionCol("prediction")
    val rmse = evaluator.evaluate(predictions)
    println(s"Root-mean-square error = $rmse")
  }

}

参考来源：http://stackoverflow.com/questions/38664972/why-is-unable-to-find-encoder-for-type-stored-in-a-dataset-when-creating-a-dat

标签： scala error ALS

上一篇Error:(39, 37) No implicit Ordering defined for org.apache.spark.sql.Row.

下一篇java.lang.IllegalArgumentException: requirement failed: Column features must be of type org.apache.spark.mllib.linalg.VectorUDT@f71b0bce but was actually org.apache.spark.ml.linalg.VectorUDT@3bfc3ba7.

Unable to find encoder for type stored in a Dataset.

2016-10-28 09:28:54 作者：MangoCool 来源：MangoCool

关于我

联系方式

座右铭