Hadoop下大矩阵乘法Version2
1)使用本方法计算F*B,其中F是1000*1000的矩阵,B是1000*20000的矩阵,使用三个节点的集群,每个节点一个CPU核(集群装在虚拟机里,
宿主机只有4个CPU核),每个节点配置一个map槽,一个reduce槽,完成矩阵运算时间为5mins。
2)源码如下:
1 /**
2 * Created with IntelliJ IDEA.
3 * User: hadoop
4 * Date: 16-3-14
5 * Time: 下午3:13
6 * To change this template use File | Settings | File Templates.
7 */
8 import org.apache.hadoop.conf.Configuration;
9 import org.apache.hadoop.fs.FileSystem;
10 import java.io.IOException;
11 import java.lang.reflect.Array;
12 import java.net.URI;
13 import org.apache.hadoop.fs.Path;
14 import org.apache.hadoop.io.*;
15 import org.apache.hadoop.io.DoubleWritable;
16 import org.apache.hadoop.io.Writable;
17 import org.apache.hadoop.mapreduce.InputSplit;
18 import org.apache.hadoop.mapreduce.Job;
19 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
20 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
21 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
22 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
23 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
24 import org.apache.hadoop.mapreduce.Reducer;
25 import org.apache.hadoop.mapreduce.Mapper;
26 import org.apache.hadoop.filecache.DistributedCache;
27 import org.apache.hadoop.util.ReflectionUtils;
28
29 public class MutiDoubleInputMatrixProduct {
30
31 public static void initDoubleArrayWritable(int length,DoubleWritable[] doubleArrayWritable){
32 for (int i=0;i<length;++i){
33 doubleArrayWritable[i]=new DoubleWritable(0.0);
34 }
35 }
36
37 public static class MyMapper extends Mapper<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
38 public DoubleArrayWritable map_value=new DoubleArrayWritable();
39 public double[][] leftMatrix=null;/******************************************/
40 //public Object obValue=null;
41 public DoubleWritable[] arraySum=null;
42 public DoubleWritable[] tempColumnArrayDoubleWritable=null;
43 public DoubleWritable[] tempRowArrayDoubleWritable=null;
44 public double sum=0;
45 public double uValue;
46 public int leftMatrixRowNum;
47 public int leftMatrixColumnNum;
48 public void setup(Context context) throws IOException {
49 Configuration conf=context.getConfiguration();
50 leftMatrixRowNum=conf.getInt("leftMatrixRowNum",10);
51 leftMatrixColumnNum=conf.getInt("leftMatrixColumnNum",10);
52 leftMatrix=new double[leftMatrixRowNum][leftMatrixColumnNum];
53 uValue=(double)(context.getConfiguration().getFloat("u",1.0f));
54 tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
55 initDoubleArrayWritable(leftMatrixColumnNum,tempRowArrayDoubleWritable);
56 tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
57 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
58 System.out.println("map setup() start!");
59 //URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
60 Path[] cacheFiles=DistributedCache.getLocalCacheFiles(conf);
61 String localCacheFile="file://"+cacheFiles[0].toString();
62 //URI[] cacheFiles=DistributedCache.getCacheFiles(conf);
63 //DistributedCache.
64 System.out.println("local path is:"+cacheFiles[0].toString());
65 // URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
66 FileSystem fs =FileSystem.get(URI.create(localCacheFile), conf);
67 SequenceFile.Reader reader=null;
68 reader=new SequenceFile.Reader(fs,new Path(localCacheFile),conf);
69 IntWritable key= (IntWritable)ReflectionUtils.newInstance(reader.getKeyClass(),conf);
70 DoubleArrayWritable value= (DoubleArrayWritable)ReflectionUtils.newInstance(reader.getValueClass(),conf);
71 //int valueLength=0;
72 int rowIndex=0;
73 int index;
74 while (reader.next(key,value)){
75 index=-1;
76 for (Writable val:value.get()){
77 tempRowArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
78 }
79 //obValue=value.toArray();
80 rowIndex=key.get();
81 leftMatrix[rowIndex]=new double[leftMatrixColumnNum];
82 //this.leftMatrix=new double[valueLength][Integer.parseInt(context.getConfiguration().get("leftMatrixColumnNum"))];
83 for (int i=0;i<leftMatrixColumnNum;++i){
84 //leftMatrix[rowIndex][i]=Double.parseDouble(Array.get(obValue, i).toString());
85 //leftMatrix[rowIndex][i]=Array.getDouble(obValue, i);
86 leftMatrix[rowIndex][i]= tempRowArrayDoubleWritable[i].get();
87 }
88
89 }
90 arraySum=new DoubleWritable[leftMatrix.length];
91 initDoubleArrayWritable(leftMatrix.length,arraySum);
92 }
93 public void map(IntWritable key,DoubleArrayWritable value,Context context) throws IOException, InterruptedException {
94 //obValue=value.toArray();
95 InputSplit inputSplit=context.getInputSplit();
96 String fileName=((FileSplit)inputSplit).getPath().getName();
97 if (fileName.startsWith("FB")) {
98 context.write(key,value);
99 }
100 else{
101 int ii=-1;
102 for(Writable val:value.get()){
103 tempColumnArrayDoubleWritable[++ii].set(((DoubleWritable)val).get());
104 }
105 //arraySum=new DoubleWritable[this.leftMatrix.length];
106 for (int i=0;i<this.leftMatrix.length;++i){
107 sum=0;
108 for (int j=0;j<this.leftMatrix[0].length;++j){
109 //sum+= this.leftMatrix[i][j]*Double.parseDouble(Array.get(obValue,j).toString())*(double)(context.getConfiguration().getFloat("u",1f));
110 //sum+= this.leftMatrix[i][j]*Array.getDouble(obValue,j)*uValue;
111 sum+= this.leftMatrix[i][j]*tempColumnArrayDoubleWritable[j].get()*uValue;
112 }
113 arraySum[i].set(sum);
114 //arraySum[i].set(sum);
115 }
116 map_value.set(arraySum);
117 context.write(key,map_value);
118 }
119 }
120 }
121 public static class MyReducer extends Reducer<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
122 public DoubleWritable[] sum=null;
123 // public Object obValue=null;
124 public DoubleArrayWritable valueArrayWritable=new DoubleArrayWritable();
125 public DoubleWritable[] tempColumnArrayDoubleWritable=null;
126 // public DoubleWritable[] tempRowArrayDoubleWritable=null;
127 //private int leftMatrixColumnNum;
128 private int leftMatrixRowNum;
129
130 public void setup(Context context){
131 //leftMatrixColumnNum=context.getConfiguration().getInt("leftMatrixColumnNum",100);
132 leftMatrixRowNum=context.getConfiguration().getInt("leftMatrixRowNum",100);
133 sum=new DoubleWritable[leftMatrixRowNum];
134 initDoubleArrayWritable(leftMatrixRowNum,sum);
135 //tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
136 tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
137 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
138 }
139
140 public void reduce(IntWritable key,Iterable<DoubleArrayWritable>value,Context context) throws IOException, InterruptedException {
141 //int valueLength=0;
142 for(DoubleArrayWritable doubleValue:value){
143 int index=-1;
144 for (Writable val:doubleValue.get()){
145 tempColumnArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
146 }
147 //valueLength=Array.getLength(obValue);
148 for (int i=0;i<leftMatrixRowNum;++i){
149 //sum[i]=new DoubleWritable(Double.parseDouble(Array.get(obValue,i).toString())+sum[i].get());
150 //sum[i]=new DoubleWritable(Array.getDouble(obValue,i)+sum[i].get());
151 sum[i].set(tempColumnArrayDoubleWritable[i].get()+sum[i].get());
152 }
153 }
154 //valueArrayWritable.set(sum);
155 valueArrayWritable.set(tempColumnArrayDoubleWritable);
156 context.write(key,valueArrayWritable);
157 for (int i=0;i<sum.length;++i){
158 sum[i].set(0.0);
159 }
160
161 }
162 }
163
164 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
165 String uri=args[3];
166 String outUri=args[4];
167 String cachePath=args[2];
168 HDFSOperator.deleteDir(outUri);
169 Configuration conf=new Configuration();
170 DistributedCache.addCacheFile(URI.create(cachePath),conf);//添加分布式缓存
171 /**************************************************/
172 //FileSystem fs=FileSystem.get(URI.create(uri),conf);
173 //fs.delete(new Path(outUri),true);
174 /*********************************************************/
175 conf.setInt("leftMatrixColumnNum",Integer.parseInt(args[0]));
176 conf.setInt("leftMatrixRowNum",Integer.parseInt(args[1]));
177 conf.setFloat("u",0.35f);
178 conf.set("mapred.jar","MutiDoubleInputMatrixProduct.jar");
179 Job job=new Job(conf,"MatrixProdcut");
180 job.setJarByClass(MutiDoubleInputMatrixProduct.class);
181 job.setInputFormatClass(SequenceFileInputFormat.class);
182 job.setOutputFormatClass(SequenceFileOutputFormat.class);
183 job.setMapperClass(MyMapper.class);
184 job.setReducerClass(MyReducer.class);
185 job.setMapOutputKeyClass(IntWritable.class);
186 job.setMapOutputValueClass(DoubleArrayWritable.class);
187 job.setOutputKeyClass(IntWritable.class);
188 job.setOutputValueClass(DoubleArrayWritable.class);
189 FileInputFormat.setInputPaths(job, new Path(uri));
190 FileOutputFormat.setOutputPath(job,new Path(outUri));
191 System.exit(job.waitForCompletion(true)?0:1);
192 }
193
194
195 }
196 class DoubleArrayWritable extends ArrayWritable {
197 public DoubleArrayWritable(){
198 super(DoubleWritable.class);
199 }
200 /*
201 public String toString(){
202 StringBuilder sb=new StringBuilder();
203 for (Writable val:get()){
204 DoubleWritable doubleWritable=(DoubleWritable)val;
205 sb.append(doubleWritable.get());
206 sb.append(",");
207 }
208 sb.deleteCharAt(sb.length()-1);
209 return sb.toString();
210 }
211 */
212 }
213
214 class HDFSOperator{
215 public static boolean deleteDir(String dir)throws IOException{
216 Configuration conf=new Configuration();
217 FileSystem fs =FileSystem.get(conf);
218 boolean result=fs.delete(new Path(dir),true);
219 System.out.println("sOutput delete");
220 fs.close();
221 return result;
222 }
223 }
Hadoop下大矩阵乘法Version2的更多相关文章
- MapReduce实现大矩阵乘法
来自:http://blog.csdn.net/xyilu/article/details/9066973 引言 何 为大矩阵?Excel.SPSS,甚至SAS处理不了或者处理起来非常困难,需要设计巧 ...
- [模板][题解][Luogu1939]矩阵乘法加速递推(详解)
题目传送门 题目大意:计算数列a的第n项,其中: \[a[1] = a[2] = a[3] = 1\] \[a[i] = a[i-3] + a[i - 1]\] \[(n ≤ 2 \times 10^ ...
- CNN卷积神经网络_深度残差网络 ResNet——解决神经网络过深反而引起误差增加的根本问题,Highway NetWork 则允许保留一定比例的原始输入 x。(这种思想在inception模型也有,例如卷积是concat并行,而不是串行)这样前面一层的信息,有一定比例可以不经过矩阵乘法和非线性变换,直接传输到下一层,仿佛一条信息高速公路,因此得名Highway Network
from:https://blog.csdn.net/diamonjoy_zone/article/details/70904212 环境:Win8.1 TensorFlow1.0.1 软件:Anac ...
- 矩阵乘法在hadoop的实现
先随机生成一个矩阵,矩阵的行数与列数由用户输入: #!/bin/bashfor i in `seq 1 $1`do for j in `seq 1 $2` do s=$((RANDOM%100)) e ...
- THUSCH 2017 大魔法师(矩阵乘法+线段树)
题意 https://loj.ac/problem/2980 思路 区间修改考虑用线段树维护.由于一段区间的 \(A,B,C\) 可以表示成由原来的 \(A,B,C\) 乘上带上系数再加上某一个某个常 ...
- MapReduce实现矩阵乘法
简单回想一下矩阵乘法: 矩阵乘法要求左矩阵的列数与右矩阵的行数相等.m×n的矩阵A,与n×p的矩阵B相乘,结果为m×p的矩阵C.具体内容能够查看:矩阵乘法. 为了方便描写叙述,先进行如果: 矩阵A的行 ...
- 矩阵乘法的MapReduce实现
对于任意矩阵M和N,若矩阵M的列数等于矩阵N的行数,则记M和N的乘积为P=M*N,其中mik 记做矩阵M的第i行和第k列,nkj记做矩阵N的第k行和第j列,则矩阵P中,第i行第j列的元素可表示为公式( ...
- OpenGL学习进程(12)第九课:矩阵乘法实现3D变换
本节是OpenGL学习的第九个课时,下面将详细介绍OpenGL的多种3D变换和如何操作矩阵堆栈. (1)3D变换: OpenGL中绘制3D世界的空间变换包括:模型变换.视图变换.投影变换和视口 ...
- 【模拟题(电子科大MaxKU)】解题报告【树形问题】【矩阵乘法】【快速幂】【数论】
目录: 1:一道简单题[树形问题](Bzoj 1827 奶牛大集会) 2:一道更简单题[矩阵乘法][快速幂] 3:最简单题[技巧] 话说这些题目的名字也是够了.... 题目: 1.一道简单题 时间1s ...
随机推荐
- var result = eval('(' + data + ')');的学习
$.post("url", function(data) { //这里的function(data)这里的data是前端页面获取的后台的返回的数据: var result = ev ...
- bzoj 3771 Triple FFT 生成函数+容斥
Triple Time Limit: 20 Sec Memory Limit: 64 MBSubmit: 847 Solved: 482[Submit][Status][Discuss] Desc ...
- js和jquery修改背景颜色的区别
html: <HTML> <head> <meta http-equiv="content-type" content="text/html ...
- c# vs2008报表
1. 做报表没做几次,第一次做的都忘记了,还好今天做一下就把报表弄成功了.报表中“参数字段”是可以变的,就是说需要自己赋值或者是要计算的.而在苏据库字段里面的是固定的值.不需要计算(注:有的字段查询出 ...
- AtCoder Regular Contest 075 D Widespread
题目传送门 这道题其实二分一下答案就okay了的 不过LL什么的有时候忘了加 被卡了下 #include<cstdio> #include<cstring> #include& ...
- [BZOJ1602&BZOJ1787&BZOJ2144]树上LCA的算法巩固练习
简述求LCA的倍增算法 对于树上的所有节点,我们可以很轻松地通过dfs求出其直接的父亲节点以及其深度 通过类似RMQ的原理我们可以处理出每个节点的第2^i个父亲 //这个过程既可以在dfs之后双重循环 ...
- HDU 1840 Equations (数学)
title: Equations 数学 杭电1840 tags: [数学] 题目链接 Problem Description All the problems in this contest tota ...
- LeetCode 151 reverse word in a string
Given an input string, reverse the string word by word. For example, Given s = "the sky is blue ...
- 【bzoj2242】计算器
#include<bits/stdc++.h> #define inf 1000000000 using namespace std; typedef long long ll; ?a:g ...
- python通过多进程实行多任务
#原创,转载请联系 在开始之前,我们要知道什么是进程.道理很简单,你平时电脑打开QQ客户端,就是一个进程.再打开一个QQ客户端,又是一个进程.那么,在python中如何用一篇代码就可以开启几个进程呢? ...