package Spark_MLlib

import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession object 多项式逻辑回归__多分类 {
val spark=SparkSession.builder().master("local").getOrCreate()
import spark.implicits._ //支持把一个RDD隐式转换为一个DataFrame
def main(args: Array[String]): Unit = {
val df =spark.sparkContext.textFile("file:///home/soyo/桌面/spark编程测试数据/soyo.txt")
.map(_.split(",")).map(x=>data_schema(Vectors.dense(x().toDouble,x().toDouble,x().toDouble,x().toDouble),x())).toDF()
// df.show(150)
val labelIndexer=new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(df)
val featureIndexer=new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").fit(df) //目的在特征向量中建类别索引
val Array(trainData,testData)=df.randomSplit(Array(0.7,0.3))
val lr=new LogisticRegression().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures").setMaxIter().setRegParam(0.3).setElasticNetParam(0.8).setFamily("multinomial")//设置elasticnet混合参数为0.8,setFamily("multinomial"):设置为多项逻辑回归,不设置setFamily为二项逻辑回归
val labelConverter=new IndexToString().setInputCol("prediction").setOutputCol("predictionLabel").setLabels(labelIndexer.labels) val lrPipeline=new Pipeline().setStages(Array(labelIndexer,featureIndexer,lr,labelConverter))
val lrPipeline_Model=lrPipeline.fit(trainData)
val lrPrediction=lrPipeline_Model.transform(testData)
lrPrediction.show()
// lrPrediction.take(100).foreach(println)
//模型评估
val evaluator=new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
val lrAccuracy=evaluator.evaluate(lrPrediction)
println("准确率为: "+lrAccuracy)
val lrError=-lrAccuracy
println("错误率为: "+lrError)
val LRmodel=lrPipeline_Model.stages().asInstanceOf[LogisticRegressionModel]
println("二项逻辑回归模型系数矩阵: "+LRmodel.coefficientMatrix)
println("二项逻辑回归模型的截距向量: "+LRmodel.interceptVector)
println("类的数量(标签可以使用的值): "+LRmodel.numClasses)
println("模型所接受的特征的数量: "+LRmodel.numFeatures)
//多项式逻辑回归不包含对模型的摘要总结
println(LRmodel.hasSummary) } }

结果:

+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+
|         features|label|indexedLabel|  indexedFeatures|       rawPrediction|         probability|prediction|predictionLabel|
+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+
|[4.4,3.2,1.3,0.2]|soyo1|         1.0|[4.4,3.2,1.3,0.2]|[0.06313829278191...|[0.23858281707128...|       1.0|          soyo1|
|[4.6,3.4,1.4,0.3]|soyo1|         1.0|[4.6,3.4,1.4,0.3]|[0.06313829278191...|[0.23750012598226...|       1.0|          soyo1|
|[4.7,3.2,1.6,0.2]|soyo1|         1.0|[4.7,3.2,1.6,0.2]|[0.06313829278191...|[0.24710416166321...|       1.0|          soyo1|
|[4.8,3.4,1.6,0.2]|soyo1|         1.0|[4.8,3.4,1.6,0.2]|[0.06313829278191...|[0.23716995683018...|       1.0|          soyo1|
|[4.8,3.4,1.9,0.2]|soyo1|         1.0|[4.8,3.4,1.9,0.2]|[0.06313829278191...|[0.24567798276462...|       1.0|          soyo1|
|[4.9,2.4,3.3,1.0]|soyo2|         0.0|[4.9,2.4,3.3,1.0]|[0.06313829278191...|[0.38071131817453...|       0.0|          soyo2|
|[5.0,3.2,1.2,0.2]|soyo1|         1.0|[5.0,3.2,1.2,0.2]|[0.06313829278191...|[0.23576075216827...|       1.0|          soyo1|
|[5.0,3.5,1.3,0.3]|soyo1|         1.0|[5.0,3.5,1.3,0.3]|[0.06313829278191...|[0.22978111243935...|       1.0|          soyo1|
|[5.2,4.1,1.5,0.1]|soyo1|         1.0|[5.2,4.1,1.5,0.1]|[0.06313829278191...|[0.19523110424215...|       1.0|          soyo1|
|[5.4,3.9,1.3,0.4]|soyo1|         1.0|[5.4,3.9,1.3,0.4]|[0.06313829278191...|[0.21630436073381...|       1.0|          soyo1|
|[5.5,2.4,3.8,1.1]|soyo2|         0.0|[5.5,2.4,3.8,1.1]|[0.06313829278191...|[0.39807479409636...|       0.0|          soyo2|
|[5.5,2.5,4.0,1.3]|soyo2|         0.0|[5.5,2.5,4.0,1.3]|[0.06313829278191...|[0.40810357240132...|       0.0|          soyo2|
|[5.6,2.8,4.9,2.0]|soyo3|         2.0|[5.6,2.8,4.9,2.0]|[0.06313829278191...|[0.44454733071968...|       0.0|          soyo2|
|[5.7,2.9,4.2,1.3]|soyo2|         0.0|[5.7,2.9,4.2,1.3]|[0.06313829278191...|[0.39634982244233...|       0.0|          soyo2|
|[5.8,2.6,4.0,1.2]|soyo2|         0.0|[5.8,2.6,4.0,1.2]|[0.06313829278191...|[0.39930520027794...|       0.0|          soyo2|
|[5.8,2.7,4.1,1.0]|soyo2|         0.0|[5.8,2.7,4.1,1.0]|[0.06313829278191...|[0.38762610877473...|       0.0|          soyo2|
|[5.8,2.7,5.1,1.9]|soyo3|         2.0|[5.8,2.7,5.1,1.9]|[0.06313829278191...|[0.44792417666537...|       0.0|          soyo2|
|[5.9,3.0,5.1,1.8]|soyo3|         2.0|[5.9,3.0,5.1,1.8]|[0.06313829278191...|[0.43418725338764...|       0.0|          soyo2|
|[6.0,2.2,4.0,1.0]|soyo2|         0.0|[6.0,2.2,4.0,1.0]|[0.06313829278191...|[0.40634099537710...|       0.0|          soyo2|
|[6.0,2.7,5.1,1.6]|soyo2|         0.0|[6.0,2.7,5.1,1.6]|[0.06313829278191...|[0.43688076686419...|       0.0|          soyo2|
|[6.0,3.4,4.5,1.6]|soyo2|         0.0|[6.0,3.4,4.5,1.6]|[0.06313829278191...|[0.39704954911011...|       0.0|          soyo2|
|[6.2,2.2,4.5,1.5]|soyo2|         0.0|[6.2,2.2,4.5,1.5]|[0.06313829278191...|[0.43847273913421...|       0.0|          soyo2|
|[6.2,2.8,4.8,1.8]|soyo3|         2.0|[6.2,2.8,4.8,1.8]|[0.06313829278191...|[0.43518321759857...|       0.0|          soyo2|
|[6.3,2.7,4.9,1.8]|soyo3|         2.0|[6.3,2.7,4.9,1.8]|[0.06313829278191...|[0.44055947195014...|       0.0|          soyo2|
|[6.3,2.9,5.6,1.8]|soyo3|         2.0|[6.3,2.9,5.6,1.8]|[0.06313829278191...|[0.44715759200377...|       0.0|          soyo2|
|[6.3,3.4,5.6,2.4]|soyo3|         2.0|[6.3,3.4,5.6,2.4]|[0.06313829278191...|[0.45196576310313...|       0.0|          soyo2|
|[6.4,2.8,5.6,2.1]|soyo3|         2.0|[6.4,2.8,5.6,2.1]|[0.06313829278191...|[0.46017875340546...|       0.0|          soyo2|
|[6.4,2.8,5.6,2.2]|soyo3|         2.0|[6.4,2.8,5.6,2.2]|[0.06313829278191...|[0.46321910727428...|       0.0|          soyo2|
|[6.4,3.1,5.5,1.8]|soyo3|         2.0|[6.4,3.1,5.5,1.8]|[0.06313829278191...|[0.43862320280893...|       0.0|          soyo2|
|[6.4,3.2,4.5,1.5]|soyo2|         0.0|[6.4,3.2,4.5,1.5]|[0.06313829278191...|[0.40056786531830...|       0.0|          soyo2|
|[6.5,3.0,5.5,1.8]|soyo3|         2.0|[6.5,3.0,5.5,1.8]|[0.06313829278191...|[0.44199581778961...|       0.0|          soyo2|
|[6.6,2.9,4.6,1.3]|soyo2|         0.0|[6.6,2.9,4.6,1.3]|[0.06313829278191...|[0.40579282648595...|       0.0|          soyo2|
|[6.7,2.5,5.8,1.8]|soyo3|         2.0|[6.7,2.5,5.8,1.8]|[0.06313829278191...|[0.46287803722998...|       0.0|          soyo2|
|[6.7,3.0,5.2,2.3]|soyo3|         2.0|[6.7,3.0,5.2,2.3]|[0.06313829278191...|[0.45387841693477...|       0.0|          soyo2|
|[6.7,3.1,4.7,1.5]|soyo2|         0.0|[6.7,3.1,4.7,1.5]|[0.06313829278191...|[0.40924150360290...|       0.0|          soyo2|
|[6.7,3.3,5.7,2.5]|soyo3|         2.0|[6.7,3.3,5.7,2.5]|[0.06313829278191...|[0.45972648058424...|       0.0|          soyo2|
|[6.8,3.0,5.5,2.1]|soyo3|         2.0|[6.8,3.0,5.5,2.1]|[0.06313829278191...|[0.45251276088924...|       0.0|          soyo2|
|[6.8,3.2,5.9,2.3]|soyo3|         2.0|[6.8,3.2,5.9,2.3]|[0.06313829278191...|[0.45975331380088...|       0.0|          soyo2|
|[6.9,3.2,5.7,2.3]|soyo3|         2.0|[6.9,3.2,5.7,2.3]|[0.06313829278191...|[0.45642868507279...|       0.0|          soyo2|
|[7.2,3.0,5.8,1.6]|soyo3|         2.0|[7.2,3.0,5.8,1.6]|[0.06313829278191...|[0.44031726493318...|       0.0|          soyo2|
|[7.2,3.2,6.0,1.8]|soyo3|         2.0|[7.2,3.2,6.0,1.8]|[0.06313829278191...|[0.44483171938259...|       0.0|          soyo2|
|[7.6,3.0,6.6,2.1]|soyo3|         2.0|[7.6,3.0,6.6,2.1]|[0.06313829278191...|[0.47047723863543...|       0.0|          soyo2|
|[7.7,3.0,6.1,2.3]|soyo3|         2.0|[7.7,3.0,6.1,2.3]|[0.06313829278191...|[0.46845272424381...|       0.0|          soyo2|
|[7.7,3.8,6.7,2.2]|soyo3|         2.0|[7.7,3.8,6.7,2.2]|[0.06313829278191...|[0.45233124776236...|       0.0|          soyo2|
+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+

准确率为: 0.36458333333333337
错误率为: 0.6354166666666666
二项逻辑回归模型系数矩阵: 3 x 4 CSCMatrix
(1,1) 0.35559564188466614
(1,2) -0.203185158868005
(1,3) -0.43876460704959996
(2,3) 0.0283914830858408
二项逻辑回归模型的截距向量: [0.06313829278191783,0.1708622138778958,-0.23400050665981365]
类的数量(标签可以使用的值): 3
模型所接受的特征的数量: 4
false

Spark 多项式逻辑回归__多分类的更多相关文章

  1. Spark 多项式逻辑回归__二分类

    package Spark_MLlib import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{L ...

  2. Spark 二项逻辑回归__二分类

    package Spark_MLlib import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{B ...

  3. stanford coursera 机器学习编程作业 exercise 3(逻辑回归实现多分类问题)

    本作业使用逻辑回归(logistic regression)和神经网络(neural networks)识别手写的阿拉伯数字(0-9) 关于逻辑回归的一个编程练习,可参考:http://www.cnb ...

  4. Spark Mllib逻辑回归算法分析

    原创文章,转载请注明: 转载自http://www.cnblogs.com/tovin/p/3816289.html 本文以spark 1.0.0版本MLlib算法为准进行分析 一.代码结构 逻辑回归 ...

  5. Spark LogisticRegression 逻辑回归之建模

    导入包 import org.apache.spark.sql.SparkSession import org.apache.spark.sql.Dataset import org.apache.s ...

  6. [Python]数据挖掘(1)、梯度下降求解逻辑回归——考核成绩分类

    ps:本博客内容根据唐宇迪的的机器学习经典算法  学习视频复制总结而来 http://www.abcplus.com.cn/course/83/tasks 逻辑回归 问题描述:我们将建立一个逻辑回归模 ...

  7. Spark ML逻辑回归

    import org.apache.log4j.{Level, Logger} import org.apache.spark.ml.classification.LogisticRegression ...

  8. Spark 机器学习------逻辑回归

    package Spark_MLlib import javassist.bytecode.SignatureAttribute.ArrayType import org.apache.spark.s ...

  9. scikit-learn机器学习(二)逻辑回归进行二分类(垃圾邮件分类),二分类性能指标,画ROC曲线,计算acc,recall,presicion,f1

    数据来自UCI机器学习仓库中的垃圾信息数据集 数据可从http://archive.ics.uci.edu/ml/datasets/sms+spam+collection下载 转成csv载入数据 im ...

随机推荐

  1. windows下mysql 5.7版本中修改编码为utf-8的方法

    方法如下 首先通过 show variables like 'character_set_%';查看mysql字符集情 默认编码为 latin1 然后关闭数据库 在mysql安装目录下找到my.ini ...

  2. LeetCode(53) Maximum Subarray

    题目 Find the contiguous subarray within an array (containing at least one number) which has the large ...

  3. 初学数位DP

    所谓数位dp,字面意思就是在数位上进行dp,数位的含义:一个数有个位.十位.百位.千位.等等,数的每一位就是数位. 数位DP一般应用于: 求出给定区间[A,B]内,符合条件P[i]的数 i 的个数. ...

  4. 【01】emmet系列之基础介绍

    [01]emmet系列之基础介绍 [02]emmet系列之HTML语法 [03]emmet系列之CSS语法 [04]emmet系列之编辑器 [05]emmet系列之各种缩写 相关网址 官网:http: ...

  5. 看板娘 & 二次元 & live2d

    live2d https://l2dwidget.js.org/dev.html https://github.com/xiazeyu/live2d-widget.js 看板娘 要切换看板娘吗? ht ...

  6. [NOIP2004] 提高组 洛谷P1090 合并果子

    题目描述 在一个果园里,多多已经将所有的果子打了下来,而且按果子的不同种类分成了不同的堆.多多决定把所有的果子合成一堆. 每一次合并,多多可以把两堆果子合并到一起,消耗的体力等于两堆果子的重量之和.可 ...

  7. vim状态栏的扩充

    将以下内容添加到~/.vimrc文件中: set statusline= set statusline+=%7*\[%n]                                  " ...

  8. FZU Problem 2132 LQX的作业 (数学题)

    http://acm.fzu.edu.cn/problem.php?pid=2132 N个数已经排成非递减顺序,那么每次可以取 前m->n个在x前面.取前m个在x前面的概率是 C(n,m)*x^ ...

  9. POJ 3469_Dual Core CPU

    题意: N个模块可以在A,B两个核上运行,分别需要A[i]和B[i],模块之间需要传递数据,若两个模块在同一核上,则不需要花费,否则需要花费w[i].问最少需要花费多少? 分析: 用最小的费用将两个对 ...

  10. 使用Post方法模拟登陆爬取网页(转)

    使用Post方法模拟登陆爬取网页   最近弄爬虫,遇到的一个问题就是如何使用post方法模拟登陆爬取网页.下面是极简版的代码: import java.io.BufferedReader; impor ...