hadoop2.2编程：矩阵相乘简单实现

 /*
 matrix-matrix multiplication on Hadoop

 A x B = C
 constraint: A, B, C must be of the same size

 I use this to evaluate the efficiency of Hadoop for matrix multiplication,
 so I really don't care to handle non-square matrices.

 ===Data preparation====
 Matrix data must be stored in a file on Hadoop.
 Line number must be appended to the beginning of each line.
 For example, the following represents a 4x4 matrix:

 0 18 20 16 14
 1 17 12 11 19
 2 10 17 11 19
 3 14 17 20 10

 Left (A in this example) matrix should be stored in file "left";
 Right (B in this example) matrix should be stored in file "right";
 I use filenames to distinguish input data.

 Place "left" and "right" in the same folder (let's call it "input")

 ====Run the program====
 > hadoop jar matrixmul.jar MatrixMul input output 8 2

 results will be placed in "output" folder on HDFS.
 8: all matrices are 8x8
 2: every partitioned block is of size 2x2

 ===Read the results===
 Given the above sample command, we multiply two 8x8 matrices,
 in many 2x2 blocks. So, that the resulted C matrix has 16 blocks.

 In the output folder, there will be 16 separate files:
 part-r-00000, part-r-00001, ... part-r-00015

 Every file stores one block in C. In this example, every block
 has 2 rows and 2 columns.

 These files are organized in "row"-order.

 ===Algorithm===
 Mappers read input data.
 Every reducer processes one block of the resulted matrix.

 */
 import java.io.IOException;
 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.util.GenericOptionsParser;

 public class MatrixMul {

 public static class MyMapper extends Mapper<LongWritable, Text, IntWritable, Text>{

 private String filename=null;
 private boolean isLeftMatrix=false;
 private int totalSize, partSize, npart;

 private boolean isLeft(){return isLeftMatrix;}
 protected void setup(Context context) throws IOException, InterruptedException{
 //get filename
 FileSplit fileSplit = (FileSplit)context.getInputSplit();
 filename = fileSplit.getPath().getName();
 if("left".equalsIgnoreCase(filename))
 isLeftMatrix=true;
 else
 isLeftMatrix=false;

 //get how size and partition information
 Configuration conf=context.getConfiguration();
 totalSize=conf.getInt("matrix-mul-totalsize", -1);
 partSize=conf.getInt("matrix-mul-partsize", -1);
 npart=conf.getInt("matrix-mul-npart", -1);
 if(totalSize<0 || partSize<0 || npart<0){
 System.out.println("Error in setup of MyMapper.");
 System.exit(1);
 }
 }

 public void map(LongWritable key, Text value, Context context
 ) throws IOException, InterruptedException {
 String line=value.toString();
 String[] strs=line.split(" ");
 if(strs.length!=totalSize+1){
 System.out.println("Error in map of Mapper.");
 System.out.println(strs.length+"___"+totalSize);
 System.out.println("line is: "+line);
 System.exit(1);
 }
 int linenum=Integer.parseInt(strs[0]);
 int[] numbers=new int[totalSize];
 for(int i=0;i<totalSize;i++)
 numbers[i]=Integer.parseInt(strs[i+1]);
 int part_hor=linenum/partSize; //horizontal partitioned id
 int prev_part_ver=-1;
 String msg=null;
 for(int i=0;i<totalSize;i++){
 int part_ver=i/partSize; //vertical partition number
 if(part_ver!=prev_part_ver){
 if(msg!=null){
 int baselinenum = part_hor * partSize;
 int old=part_ver;
 part_ver=prev_part_ver;
 if(isLeft()){
 String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg;
 System.out.println("left "+linenum+","+part_ver+" "+msg);
 for(int k=0;k<npart;k++){
 int dest=part_hor * npart + k;
 context.write(new IntWritable(dest), new Text(toSend));
 }
 }else{
 String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg;
 System.out.println("right "+part_ver+":"+linenum+" "+msg);
 for(int k=0;k<npart;k++){
 int dest=k * npart + part_ver;
 context.write(new IntWritable(dest), new Text(toSend));
 }
 }
 part_ver=old;
 }
 msg=null;
 prev_part_ver=part_ver;
 }
 if(msg==null)
 msg=""+strs[i+1];
 else
 msg+=" "+strs[i+1];
 }
 if(msg!=null){ //almost the same code
 int part_ver=npart-1;
 int baselinenum = part_hor * partSize;
 if(isLeft()){
 String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg;
 System.out.println("left "+linenum+","+part_ver+" "+msg);
 for(int k=0;k<npart;k++){
 int dest=part_hor * npart + k;
 context.write(new IntWritable(dest), new Text(toSend));
 }
 }else{
 String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg;
 System.out.println("right "+part_ver+":"+linenum+" "+msg);
 for(int k=0;k<npart;k++){
 int dest=k * npart + part_ver; //has to be the last part
 context.write(new IntWritable(dest), new Text(toSend));
 }
 }
 }
 }
 }

 public static class MyReducer extends Reducer<IntWritable, Text, Text, Text> {

 private int totalSize, partSize, npart;
 int[][] left=null;
 int[][] right=null;
 protected void setup(Context context) throws IOException, InterruptedException{
 //get how # of partitions
 Configuration conf=context.getConfiguration();
 totalSize=conf.getInt("matrix-mul-totalsize", -1);
 partSize=conf.getInt("matrix-mul-partsize", -1);
 npart=conf.getInt("matrix-mul-npart", -1);
 if(totalSize<0 || partSize<0 || npart<0){
 System.out.println("Error in setup of MyReducer.");
 System.exit(1);
 }
 left=new int[partSize][totalSize];
 right=new int[totalSize][partSize];
 }
 public void reduce(IntWritable key, Iterable<Text> values, Context context
 ) throws IOException, InterruptedException {
 int sum = 0;
 for (Text val : values) {
 String line=val.toString();
 String[] meta_val=line.split("#");
 String[] metas=meta_val[0].split(":");
 String[] numbers=meta_val[1].split(" ");

 int baselinenum=Integer.parseInt(metas[1]);
 int blkindex=Integer.parseInt(metas[2]);
 if("l".equalsIgnoreCase(metas[0])){ //from left matrix
 int start=blkindex * partSize;
 for(int i=0;i<partSize; i++)
 left[baselinenum][start+i]=Integer.parseInt(numbers[i]);
 }else{
 int rowindex=blkindex*partSize + baselinenum;
 for(int i=0;i<partSize; i++)
 right[rowindex][i]=Integer.parseInt(numbers[i]);
 }
 }
 }
 protected void cleanup(Context context) throws IOException, InterruptedException {
 //now let's do the calculation
 int[][] res=new int[partSize][partSize];
 for(int i=0;i<partSize;i++)
 for(int j=0;j<partSize;j++)
 res[i][j]=0;
 for(int i=0;i<partSize;i++){
 for(int k=0;k<totalSize;k++){
 for(int j=0;j<partSize;j++){
 res[i][j]+=left[i][k]*right[k][j];
 }
 }
 }
 for(int i=0;i<partSize;i++){
 String output=null;
 for(int j=0;j<partSize;j++){
 if(output==null)
 output=""+res[i][j];
 else
 output+=" "+res[i][j];
 }
 context.write(new Text(output), null);
 }
 }
 }
 public static void main(String[] args) throws Exception {
 Configuration conf = new Configuration();
 if (args.length != 4) {
 System.err.println("Usage: MatrixMul input-dir output-dir total-size part-size");
 System.exit(2);
 }
 int totalsize=Integer.parseInt(args[2]);
 int partsize=Integer.parseInt(args[3]);
 if(totalsize==0 || partsize==0 || partsize>totalsize){
 System.out.println("Invalid total-size or part-size");
 System.exit(1);
 }
 conf.setInt("matrix-mul-totalsize", totalsize); //the matrix is 'totalsize' by 'totalsize'
 conf.setInt("matrix-mul-partsize", partsize); //every block is 'partsize' by 'partsize'
 int npart=totalsize/partsize;
 if(npart*partsize<totalsize)
 npart++;
 conf.setInt("matrix-mul-npart", npart); //number of parts on one dimension
 Job job = new Job(conf, "matrix-mul");
 job.setJarByClass(MatrixMul.class);
 job.setMapperClass(MyMapper.class);
 job.setReducerClass(MyReducer.class);
 job.setNumReduceTasks(npart*npart);

 job.setOutputKeyClass(IntWritable.class);
 job.setOutputValueClass(Text.class);

 //FileInputFormat.addInputPath(job, new Path(args[0]));
 TextInputFormat.addInputPath(job, new Path(args[0])); //need to read a complete line
 FileOutputFormat.setOutputPath(job, new Path(args[1]));
 job.waitForCompletion(true) ;
 }
 }

hadoop2.2编程：矩阵相乘简单实现的更多相关文章

CUDA编程－（2）其实写个矩阵相乘并不是那么难
程序代码及图解析: #include <iostream> #include "book.h" __global__ void add( int a, int b, i ...
编程计算2×3阶矩阵A和3×2阶矩阵B之积C。矩阵相乘的基本方法是：矩阵A的第i行的所有元素同矩阵B第j列的元素对应相乘，并把相乘的结果相加，最终得到的值就是矩阵C的第i行第j列的值。要求： (1)从键盘分别输入矩阵A和B，输出乘积矩阵C (2) **输入提示信息为：输入矩阵A之前提示："Input 2*3 matrix a:\n" 输入矩阵B之前提示
编程计算2×3阶矩阵A和3×2阶矩阵B之积C. 矩阵相乘的基本方法是: 矩阵A的第i行的所有元素同矩阵B第j列的元素对应相乘, 并把相乘的结果相加,最终得到的值就是矩阵C的第i行第j列的值. 要求: ...
利用Hadoop实现超大矩阵相乘之我见（二）
前文在<利用Hadoop实现超大矩阵相乘之我见(一)>中我们所介绍的方法有着“计算过程中文件占用存储空间大”这个缺陷,本文中我们着重解决这个问题. 矩阵相乘计算思想传统的矩阵相乘方法为 ...
利用Hadoop实现超大矩阵相乘之我见（一）
前记最近,公司一位挺优秀的总务离职,欢送宴上,她对我说“你是一位挺优秀的程序员”,刚说完,立马道歉说“对不起,我说你是程序员是不是侮辱你了?”我挺诧异,程序员现在是很低端,很被人瞧不起的工作吗?或许 ...
Strassen 矩阵相乘算法(转)
偶尔在算法课本上面看到矩阵相乘的算法,联想到自己曾经在蓝桥杯系统上曾经做过一道矩阵相乘的题目,当时用的是普通的矩阵相乘的方法,效率极低,勉强通过编译.所以决定研究一下Strassen矩阵相乘算法,由于 ...
dp方法论——由矩阵相乘问题学习dp解题思路
前篇戳:dp入门——由分杆问题认识动态规划导语刷过一些算法题,就会十分珍惜“方法论”这种东西.Leetcode上只有题目.讨论和答案,没有方法论.往往答案看起来十分切中要害,但是从看题目到得到思路 ...
Opencv中Mat矩阵相乘——点乘、dot、mul运算详解
Opencv中Mat矩阵相乘——点乘.dot.mul运算详解 2016年09月02日 00:00:36 -牧野- 阅读数:59593 标签: Opencv矩阵相乘点乘dotmul 更多个人分类: O ...
C++两个矩阵相乘
/*编程求两个矩阵相乘的结果.输入第一行是整数m,n,表示第一个矩阵式m行n列的:然后是一个m * n的矩阵.再下一行的输入时整数p,q,表示下一个矩阵p行,q列的(n=p);然后就是一个p行q列的矩 ...
使用cublas 矩阵库函数实现矩阵相乘
2014-08-10 cublas中执行矩阵乘法运算的函数首先要注意的是cublas使用的是以列为主的存储方式,和c/c++中的以行为主的方式是不一样的.处理方法可参考下面的注释代码 // SOME ...

随机推荐

使用SQL Server 2005 新的语法ROW_NUMBER()进行分页的两种不同方式的性能比较
相比在SQL Server 2000 中使用的分页方式,在SQL Server 2005中使用新的语法ROW_NUMBER()来分页效率要高出很多,但是很多人在使用ROW_NUMBER()这种分页方式 ...
hdu 1715 大菲波数(高精度数)
Problem Description Fibonacci数列,定义如下: f(1)=f(2)=1 f(n)=f(n-1)+f(n-2) n>=3. 计算第n项Fibonacci数值. Inpu ...
CentOS 开启GD库
在php.ini 中没有找到"extension=php_gd2.dll"这行代码,这是因为CentOS一般没有预装GD库. 解决办法: 1.在线安装GD库 yum -y inst ...
ajax提交数据Demo
$.ajax({ url: "url", type: "post", data: JSON.stringify(yourData), contentType: ...
建站服务器的最优选择之Windows Or Linux
转载于:http://www.0553114.com/news/detail-702287.html 不管是个人建站,还是中小型企业建站,选择一款合适的主机是站长朋友们共同的心愿.主机是选择Windo ...
iOS之多线程浅谈
1)并发和并行的区别在软件开发中不可避免的会遇到多线程的问题,在iOS客户端开发(或者.NET的winform或者wpf这样的cs程序)中就更不可避免的会用到多线程,在bs类型的web项目中要考虑一 ...
[HTML]img标签属性
<img src="image.png" width="" height="" align="absmiddle" ...
web2py--------------用web2py写 django的例子 --------开发环境
我们先从广为人知的例子说起xi 也就是官方的例子,我会在最后给出代码: ============================环境=================== 编译器使用vs code , ...
微信开发之开发环境搭建（ visual studio 2015we + IIS express + ngrok）
1. 申请个人测试使用的微信订阅号 https://mp.weixin.qq.com 可注册微信订阅号. 不会?请自行百度. 2. 安装 ngrok 微信开发首先要解决如何让微信链接到本地开发环境.有 ...
DB天气app冲刺第五天
今天上了软工的一节课,感觉自己前几天的方向错了,而且基本是在耗时间,因为虽然一直在努力的看书编代码,但效果不明显.所以今天要好好想一个新的方向重新来过. 明天送上计划.

hadoop2.2编程：矩阵相乘简单实现

hadoop2.2编程：矩阵相乘简单实现的更多相关文章

随机推荐

热门专题