使用blas做矩阵乘法
#define min(x,y) (((x) < (y)) ? (x) : (y)) #include <stdio.h>
#include <stdlib.h>
#include <cublas_v2.h>
#include <iostream>
#include <vector>
//extern "C"
//{
#include <cblas.h>
//} using namespace std;
int main()
{ const enum CBLAS_ORDER Order=CblasRowMajor;
const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
const int M=;//A的行数,C的行数
const int N=;//B的列数,C的列数
const int K=;//A的列数,B的行数
const float alpha=;
const float beta=;
const int lda=K;//A的列
const int ldb=N;//B的列
const int ldc=N;//C的列
const float A[M*K]={,,,,,,,,,,,};
const float B[K*N]={,,,,,};
float C[M*N]; cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); for(int i=;i<M;i++)
{
for(int j=;j<N;j++)
{
cout<<C[i*N+j]<<"\n";
}
cout<<endl;
} return EXIT_SUCCESS; }
g++ testblas.c++ -lopenblas -o testout
g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout 本地编译openblas版本
注意library放在引用library的函数的后面
cblas_sgemm Multiplies two matrices (single-precision). void cblas_sgemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them : The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix. Matrix A =
[1 2 3]
[4 5 6]
Row-major stores values as {1,2,3,4,5,6}
Stride here is 3 Col-major stores values as {1, 4, 2, 5, 3, 6}
Stride here is 2 Matrix B =
[1 2 3]
[4 5 6]
[7 8 9] Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9}
Stride here is 3 Read more: http://www.physicsforums.com
const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
); Thus, it calculates either
C←αAB + βC
or
C←αBA + βC
with optional use of transposed forms of A, B, or both.
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
$C=A*B$
$C^T=(A*B)^T=B^T*A^T$ 把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。
Y←αAX + βY
cblas_sgemv
Multiplies a matrix by a vector (single precision).
void cblas_sgemv (
const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA,
const int M,
const int N,
const float alpha,
const float *A,
const int lda,
const float *X,
const int incX,
const float beta,
float *Y,
const int incY
);
STL版本
cblas_daxpy
Computes a constant times a vector plus a vector (double-precision).
On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +
Y[i].
#include <OpenBlas/cblas.h>
#include <OpenBlas/common.h>
#include <iostream>
#include <vector> int main()
{
blasint n = ;
blasint in_x =;
blasint in_y =; std::vector<double> x(n);
std::vector<double> y(n); double alpha = ; std::fill(x.begin(),x.end(),1.0);
std::fill(y.begin(),y.end(),2.0); cblas_daxpy( n, alpha, &x[], in_x, &y[], in_y); //Print y
for(int j=;j<n;j++)
std::cout << y[j] << "\t"; std::cout << std::endl;
}
cublas
cublasStatus_t
cublasCreate(cublasHandle_t *handle)
Return Value Meaning
CUBLAS_STATUS_SUCCESS the initialization succeeded
CUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failed
CUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated
cublasStatus_t
cublasDestroy(cublasHandle_t handle)
Return Value Meaning
CUBLAS_STATUS_SUCCESS the shut down succeeded
CUBLAS_STATUS_NOT_INITIALIZED the library was not initialized
cublasStatus_t cublasSgemm(cublasHandle_t handle, // 唯一的不同:handle to the cuBLAS library context.
cublasOperation_t transa,
cublasOperation_t transb
int m,
int n,
int k,
const float *alpha,
const float*A,
int lda,
const float*B,
int ldb,
const float*beta,
float*C,
int ldc
)
void cblas_sgemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.
const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
);
使用blas做矩阵乘法的更多相关文章
- 【神经网络与深度学习】【C/C++】使用blas做矩阵乘法
使用blas做矩阵乘法 #define min(x,y) (((x) < (y)) ? (x) : (y)) #include <stdio.h> #include <st ...
- numpy.loadtxt() 出现codecError_____ Excel 做矩阵乘法
1) 用 numpy读入csv文件是报错 UnicodeDecodeError: 'gbk' codec can't decode byte 0xbf in position 2: illegal m ...
- cuda中用cublas库做矩阵乘法
这里矩阵C=A*B,原始文档给的公式是C=alpha*A*B+beta*C,所以这里alpha=1,beta=0. 主要使用cublasSgemm这个函数,这个函数的第二个参数有三种类型,这里CUBL ...
- POJ 2778 DNA Sequence (AC自动机,矩阵乘法)
题意:给定n个不能出现的模式串,给定一个长度m,要求长度为m的合法串有多少种. 思路:用AC自动机,利用AC自动机上的节点做矩阵乘法. #include<iostream> #includ ...
- poj3233之经典矩阵乘法
Matrix Power Series Time Limit: 3000MS Memory Limit: 131072K Total Submissions: 12346 Accepted: ...
- 51nod 1462 树据结构 | 树链剖分 矩阵乘法
题目链接 51nod 1462 题目描述 给一颗以1为根的树. 每个点有两个权值:vi, ti,一开始全部是零. Q次操作: 读入o, u, d o = 1 对u到根上所有点的vi += d o = ...
- 【BZOJ1706】[usaco2007 Nov]relays 奶牛接力跑 矩阵乘法
[BZOJ1706][usaco2007 Nov]relays 奶牛接力跑 Description FJ的N(2 <= N <= 1,000,000)头奶牛选择了接力跑作为她们的日常锻炼项 ...
- [ZJOI2005]沼泽鳄鱼 矩阵乘法
---题面--- 题解: 乍一看还是挺懵逼的.和HH去散步很像,思路也是类似的. 复制一段我在HH去散步的题解里面写的一段话吧: 考虑f[i][j]表示i和j是否右边相连,有为1,否则为0,那么f同时 ...
- BZOJ_3231_[Sdoi2008]递归数列_矩阵乘法
BZOJ_3231_[Sdoi2008]递归数列_矩阵乘法 Description 一个由自然数组成的数列按下式定义: 对于i <= k:ai = bi 对于i > k: ai = c1a ...
随机推荐
- ORACLE——count() 统计函数的使用
SQL中用于统计的函数时:COUNT(). 针对count函数的使用做一个记录,很简单. 首先我数据库中建个表TEST,数据如下: 表中ID和NAME都是不重复的数据,HOME.TEL.PATH中存在 ...
- SVN在update的时候报错Please execute the 'Cleanup' command.
需要右键clearn up 然后再update
- Gis数据处理
几何投影和解析投影几何投影是将椭球面上的经纬线网投影到几何平面上,然后将几何面展为平面.几何投影可以分为方位投影.圆柱投影和圆锥投影.这三种投影纬线的形状不同.方位投影纬线的形状是同心圆:圆柱投影纬线 ...
- 使用Sitecore的可视化路径分析器工具洞察客户360度行为
Path Analyzer背后的想法是观察,分析和理解您的潜在客户和客户在浏览您的网站时所遵循的网络旅程.他们最初来自哪里?他们的路径是“有机”(通过搜索引擎)还是来自付费广告?社交帖子?甚至是你发起 ...
- Nginx技术研究系列4-Nginx监控-Nginx+Telegraf+Influxb+Grafana
搭建了Nginx集群后,需要继续深入研究的就是日常Nginx监控. Nginx如何监控?相信百度就可以找到:nginx-status 通过Nginx-status,实时获取到Nginx监控数据后,如何 ...
- [openjudge-搜索]单词接龙
题目描述 描述 单词接龙是一个与我们经常玩的成语接龙相类似的游戏,现在我们已知一组单词,且给定一个开头的字母,要求出以这个字母开头的最长的"龙"(每个单词都最多在"龙&q ...
- flask_mail发送163邮件,报553错误的原因
最近在练习用flask_mail发送163邮件时报错: reply: '553 authentication is required,163 smtp9,DcCowAD3eEQZ561caRiaBA- ...
- qemu 对虚机的地址空间管理
转载:http://huchh.com/2015/06/22/qemu-%E5%AF%B9%E8%99%9A%E6%9C%BA%E7%9A%84%E7%BA%BF%E6%80%A7%E5%9C%B0% ...
- Python sqlalchemy orm 常用操作
增add # 创建表1 # 注:高级封装 import sqlalchemy # 调用链接数据库 from sqlalchemy import create_engine # 调用基类Base fro ...
- 01:golang开发环境
1.1 go环境安装 1.go下载安装 官方:https://golang.org/dl 国内: https://golang.google.cn/dl/ https://www.golangtc.c ...