【异构计算】OpenCL矩阵转置
介绍
矩阵转置,主要的技巧还是利用好local memory ,防止local memory,以及glabol memory的读取尽量是合并读写。
完整代码一:
main.cpp代码
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <time.h> #ifdef _APPLE_
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif #define MATRIXMULLTIPLY #define N 6
#define K 8
#define L 5 //Functio to check and handle OpenCL errors
inline void checkErr(cl_int err,const char *name)
{
if(err !=CL_SUCCESS)
{
std::cerr <<"ERROR: "<< name <<"("<<err<< ")"<<std::endl;
exit(EXIT_FAILURE);
}
}
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL; // First, select an OpenCL platform to run on. For this example, we simply choose the first available platform. Normally, you would
// query for all available platforms and select the most appropriate one.
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
} // Next, create an OpenCL context on the platform. Attempt to create a GPU-based context, and if that fails, try to create
// a CPU-based context.
cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)firstPlatformId, 0 }; context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cout << "Could not create GPU context, trying CPU..." << std::endl;
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
return NULL;
}
} return context;
} int main( int argc, char * argv[])
{
// Use the first platform
cl_int errNum;
cl_platform_id platformID;
cl_context context =NULL;
cl_device_id device; errNum = clGetPlatformIDs(1,&platformID,NULL);
checkErr(errNum,"clGetPlatformIDS");
std::cout<<"Platform ID: "<<platformID<<std::endl; context = CreateContext( );
if(context == NULL)
{
std::cerr << "Failed to create OpenCL context." << std::endl;
return NULL;
} errNum = clGetDeviceIDs(platformID,CL_DEVICE_TYPE_GPU,1,&device,NULL); if(errNum !=CL_SUCCESS)
{
std::cerr <<"Could not create CL_DEVICE_TYPE_GPU context, trying CL_DEVICE_TYPE_CPU..."<<std::endl;
errNum =clGetDeviceIDs(platformID,CL_DEVICE_TYPE_CPU,1,&device,NULL);
std::cout <<"Device: "<<device<<std::endl;
if(errNum !=CL_SUCCESS)
{
checkErr(errNum,"clGetDeviceIDs(..CL_DEVICE_TYPE_ALL..)");
}
} cl_command_queue commandQueue = clCreateCommandQueue(context,device,0,&errNum);
checkErr(errNum,"clCreateCommandQueue( )"); cl_int Mat_A_width = N;
cl_int Mat_A_height = K;
cl_int Mat_B_width = K;
cl_int Mat_B_height = L; float *MatA =(float*)malloc(sizeof(float)*Mat_A_width*Mat_A_height); if(MatA ==NULL)
{
std::cerr<<"Failed to Allocationing Memmey ."<<std::endl;
} #ifdef MATRIXMULLTIPLY
float *MatB =(float*)malloc(sizeof(float)*Mat_B_width*Mat_B_height);
float *MatC =(float*)malloc(sizeof(float)*Mat_A_width*Mat_B_height);
#else
float *MatC =(float*)malloc(sizeof(float)*Mat_A_width*Mat_A_height);
#endif std::cout<<"=====MatA: " << Mat_A_width << "X" << Mat_A_height ;//<< std::endl;
for(int i = 0; i< Mat_A_width*Mat_A_height; i++)
{
MatA[i] = std::rand()*0.25;
//MatA[i] = 4.5; if((i%Mat_A_height ==0)||(i == 0))
{
std::cout << std::endl;
}
std::cout<<MatA[i]<< "\t";
}
std::cout<<std::endl; //Allocate space for Matrix A on the device
cl_mem bufferA = clCreateBuffer(context,
CL_MEM_READ_ONLY,//|CL_MEM_COPY_HOST_PTR,
Mat_A_width*Mat_A_height*sizeof(float),
NULL,
&errNum);
checkErr(errNum,"clCreateBuffer(...bufferA..)");
errNum = clEnqueueWriteBuffer(commandQueue,bufferA,CL_TRUE,0,Mat_A_width*Mat_A_height*sizeof(float),(void*)MatA, 0, NULL,NULL); #ifdef MATRIXMULLTIPLY
std::cout<<"MatB: "<<Mat_B_width <<"X"<<Mat_B_height<<std::endl;
for(int i = 0; i< Mat_B_width*Mat_B_height; i++)
{
MatB[i] = std::rand()*0.25;
//MatB[i] = 2.0;
if((i%Mat_B_height ==0)||(i == 0))
{
std::cout << std::endl;
}
std::cout<<MatA[i]<< " ";
}
std::cout<<std::endl;
//Allocate space for Matrix B on the device
cl_mem bufferB = clCreateBuffer(context,
CL_MEM_READ_ONLY,//|CL_MEM_COPY_HOST_PTR,
Mat_B_width*Mat_B_height*sizeof(float),
NULL,
&errNum);
checkErr(errNum,"clCreateBuffer(...bufferB..)"); //Copy Matrix B to the device
errNum = clEnqueueWriteBuffer(commandQueue,bufferB,CL_TRUE, 0,Mat_B_width*Mat_B_height*sizeof(float),(void*)MatB,0,NULL,NULL); //Allocate space for Matrix C on the device
cl_mem bufferC = clCreateBuffer(context,
CL_MEM_READ_ONLY,//|CL_MEM_COPY_HOST_PTR,
Mat_A_width*Mat_B_height*sizeof(float),
NULL,
&errNum);
checkErr(errNum,"clCreateBuffer(...bufferC..)");
#else
//Allocate space for Matrix C on the device
cl_mem bufferC = clCreateBuffer(context,
CL_MEM_READ_ONLY,//|CL_MEM_COPY_HOST_PTR,
Mat_A_width*Mat_A_height*sizeof(float),
NULL,
&errNum);
checkErr(errNum,"clCreateBuffer(...bufferC..)");
#endif // We assume that the program source si stroed int the variable
cl_program program;
const char* fileName = "Matrixkernel.cl";
std::ifstream kernelFile(fileName,std::ios::in); if( !kernelFile.is_open())
{
std::cerr <<"Failed to open file reading:"<<fileName<<std::endl;
return NULL;
} std::ostringstream oss;
oss << kernelFile.rdbuf(); std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,(const char**)&srcStr,NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create OpenCL program from source." << std::endl;
return NULL;
} errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,sizeof(buildLog), buildLog, NULL); std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
#ifdef MATRIXMULLTIPLY
// Create the kernel
cl_kernel kernel = clCreateKernel(program,"MulltiplySample",NULL);
if(kernel ==NULL)
{
std::cerr<<"Faile to create kernel."<<std::endl;
return NULL;
} //set the kernel arguments
clSetKernelArg(kernel, 0,sizeof(cl_mem), (void*) &bufferC);
clSetKernelArg(kernel, 1,sizeof(cl_int), (void*) &Mat_A_width);
clSetKernelArg(kernel, 2,sizeof(cl_int), (void*) &Mat_A_height);
clSetKernelArg(kernel, 3,sizeof(cl_int), (void*) &Mat_B_width);
clSetKernelArg(kernel, 4,sizeof(cl_int), (void*) &Mat_B_height);
clSetKernelArg(kernel, 5,sizeof(cl_mem), (void*) &bufferA);
clSetKernelArg(kernel, 6,sizeof(cl_mem), (void*) &bufferB); //Set Local and global workgroup sizes
size_t globalws[2]={Mat_A_width,Mat_B_height};
size_t localws[2]={Mat_A_width,Mat_B_height}; //float strTime = clock();
//Execte the kernel
errNum = clEnqueueNDRangeKernel(commandQueue,kernel,2,NULL,globalws,localws,0,NULL,NULL);
if(errNum !=CL_SUCCESS)
{
std::cerr<<"Faile to Execte the kernal.."<<std::endl;
return NULL;
} errNum = clEnqueueReadBuffer(commandQueue,bufferC,CL_TRUE,0,Mat_B_height*Mat_A_width*sizeof(float),(void*)MatC,0,NULL,NULL); std::cout<<"MatrixC:"<<Mat_A_width<<"X"<<Mat_B_height<<std::endl;
for(int i =0; i< Mat_A_width*Mat_B_height; i++)
{
if((i != 0)&&(i%Mat_B_height == 0))
{
std::cout<<std::endl;
} std::cout<<MatC[i]<<"\t";
}
std::cout << std::endl;
clReleaseKernel(kernel);
#else
cl_kernel Trapsposekernel;
cl_int blockSize =16; if(Mat_A_width*Mat_A_height >1000)
{
Trapsposekernel = clCreateKernel(program,"MatrixTranspose",NULL);
std::cout<<"CreateKernel in MatrixTranspose"<<std::endl;
if(Trapsposekernel == NULL)
{
std::cerr<<"Faile to Create TrapsposeKernel."<< std::endl;
return NULL;
} clSetKernelArg(Trapsposekernel, 0,sizeof(cl_mem), (void*) &bufferC);
clSetKernelArg(Trapsposekernel, 1,sizeof(cl_mem), (void*) &bufferA);
clSetKernelArg(Trapsposekernel, 2,sizeof(cl_float)*blockSize*blockSize,NULL); //
clSetKernelArg(Trapsposekernel, 3,sizeof(cl_int), (void*) &Mat_A_width);
clSetKernelArg(Trapsposekernel, 4,sizeof(cl_int), (void*) &Mat_A_height);
clSetKernelArg(Trapsposekernel, 5,sizeof(cl_mem), (void*) &blockSize); //
} else
{
Trapsposekernel = clCreateKernel(program,"TrapsposeMatrixSample",NULL);
std::cout<<"CreateKernel in TrapsposeMatrixSample"<<std::endl; if(Trapsposekernel == NULL)
{
std::cerr<<"Faile to Create TrapsposeKernel."<< std::endl;
return NULL;
} clSetKernelArg(Trapsposekernel, 0,sizeof(cl_mem), (void*) &bufferC);
clSetKernelArg(Trapsposekernel, 1,sizeof(cl_int), (void*) &Mat_A_width);
clSetKernelArg(Trapsposekernel, 2,sizeof(cl_int), (void*) &Mat_A_height);
clSetKernelArg(Trapsposekernel, 3,sizeof(cl_mem), (void*) &bufferA);
} size_t localtr[2] = {Mat_A_height,Mat_A_width};
#ifdef MATRIXMULLTIPLY
size_t globaltr[2] = {Mat_A_width,Mat_B_height}
#else
size_t globaltr[2] = {Mat_A_height,Mat_A_width};
#endif //MATRIXMULLTIPLY
cl_event dev; //commandQueue the kernel up for executio across the array
errNum = clEnqueueNDRangeKernel(commandQueue,Trapsposekernel,2,NULL,globaltr,localtr,0,NULL,&dev);
if(errNum !=CL_SUCCESS)
{
std::cerr<<"Faile to Execte the kernel.."<<std::endl;
return NULL;
} std::cout<<"CommandQueue: "<<commandQueue<<std::endl;
clFinish(commandQueue); cl_ulong startTime, endTime;
clGetEventProfilingInfo(dev, CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &startTime, NULL);
clGetEventProfilingInfo(dev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL);
cl_ulong kernelExecTimeNs = endTime-startTime;
printf("simple kernal exec time :%8.6f ms\n", kernelExecTimeNs*1e-6 ); errNum = clEnqueueReadBuffer(commandQueue,bufferC,CL_TRUE,0,Mat_A_width*Mat_A_height*sizeof(float),(void*)MatC,0,NULL,NULL); std::cout<<"====Trapspose MatrixA : "<<Mat_A_height<<"X"<<Mat_A_width<<std::endl;
for(int i =0; i< Mat_A_width*Mat_A_height; i++)
{
if((i != 0)&&(i%Mat_A_width == 0))
{
std::cout<<std::endl;
} std::cout<<MatC[i]<<"\t";
}
std::cout << std::endl; #endif clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context); delete[] MatA;
//delete[] MatB;
delete[] MatC; return 0;
}
kernel代码
/*
*@param outputC output Matrix
*@param widthA is width of intputA in the Matrix A
*@param heightA is height of intputA in the Matrix A
*@param widthB is width of intputB in the Matrix B
*@param heightB is height of intputB in the Matrix B
*@param inputA is width of intputA in the Matrix A
*@param inputB is width of intputA in the Matrix B
*/
__kernel void MulltiplySample(__global float* outputC,
const int widthA,
const int heightA,
const int widthB,
const int heightB,
__global float* inputA,
__global float* inputB)
{
int row = get_global_id(1); // Get global position in Y direction
int col = get_global_id(0); // Get global position in X direction float sum = 0.0f; //Calculat result of one element of Matrix C
for( int i = 0; i< widthA; i++)
{
sum += inputA[row * widthA+i] * inputB[i * widthB + col];
} outputC[row * widthB+col] = sum;
} /*
*@param TrapsposeMatrix output Matrix
*@param width is InputMatrix width
*@param height is InputMatrix height
*@param InputMatrix is Input Matrix
*/
__kernel void TrapsposeMatrixSample(__global float* TrapsposeMatrix,
const uint width, const uint height,
__global float* InputMatrix)
{
int row = get_global_id(0);
int col = get_global_id(1); TrapsposeMatrix[row * width +col] = InputMatrix[col * height + row];
} /*
* Copies a block to the local memory
* and copies back the transpose from local memory to output
* @param output output matrix
* @param input input matrix
* @param block local memory of size blockSize x blockSize
* @param width width of the input matrix
* @param height height of the input matrix
* @param blockSize size of the block
*/ __kernel void MatrixTranspose(__global float * output,
__global float * input,
__local float * block,
const uint width,
const uint height,
const uint blockSize)
{
uint globalIdx = get_global_id(0);
uint globalIdy = get_global_id(1); uint localIdx = get_local_id(0);
uint localIdy = get_local_id(1); /* copy from input to local memory */
block[localIdy*blockSize + localIdx] = input[globalIdy*width + globalIdx]; /* wait until the whole block is filled */
barrier(CLK_LOCAL_MEM_FENCE); uint groupIdx = get_group_id(0);
uint groupIdy = get_group_id(1); /* calculate the corresponding target location for transpose by inverting x and y values*/
uint targetGlobalIdx = groupIdy*blockSize + localIdy;
uint targetGlobalIdy = groupIdx*blockSize + localIdx; /* calculate the corresponding raster indices of source and target */
uint targetIndex = targetGlobalIdy*height + targetGlobalIdx;
uint sourceIndex = localIdy * blockSize + localIdx; output[targetIndex] = block[sourceIndex];
}
测试结果输出
完整代码二:
maincpp代码
// Matrix.cpp : Defines the entry point for the console application. #include "stdafx.h"
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream> using namespace std;
#pragma comment (lib,"OpenCL.lib") #define M 2048 int convertToString(const char *filename, std::string& s)
{
size_t size;
char* str; std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if(f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg); str = new char[size+1];
if(!str)
{
f.close();
return NULL;
} f.read(str, fileSize);
f.close();
str[size] = '\0'; s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
} int main(int argc, char* argv[])
{
float *src1=0;
float *src2=0; src1 = (float*)malloc(M*M*sizeof(float));
src2 = (float*)malloc(M*M*sizeof(float)); int i, j;
srand( (unsigned)time( NULL ) );
for(i = 0; i < M*M; i++)
src1[i] = rand()%50; for( i=0; i < M; i++)
{
for(j=0; j < M; j++)
{
src2[i*M+j] = src1[j*M+i];
}
} cl_uint status;
cl_platform_id platform; status = clGetPlatformIDs( 1, &platform, NULL );
cl_device_id device; clGetDeviceIDs( platform, CL_DEVICE_TYPE_ALL,1, &device,NULL);
cl_context context = clCreateContext( NULL, 1,&device,NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueue( context,device,
CL_QUEUE_PROFILING_ENABLE, NULL ); cl_mem clsrc1 = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
M*M*sizeof(cl_float),src1,NULL );
cl_mem clsrc2 = clCreateBuffer( context,CL_MEM_WRITE_ONLY,
M*M * sizeof(cl_float), NULL, NULL ); const char * filename = "transpose.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) }; cl_program program = clCreateProgramWithSource(context, 1, &source,sourceSize,NULL); status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
if(status != 0)
{
printf("clBuild failed:%d\n", status);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("\n%s\n", tbuf);
return -1;
} cl_kernel kernel = clCreateKernel( program, "matrixTransposeSimple", NULL );
cl_int dimx = M;
cl_int dimy = M; clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&clsrc2);
clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clsrc1);
clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&dimx);
clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&dimy); //Set local and global workgroup sizes
size_t localws[2] = {16, 16} ;
size_t globalws[2] = {M,M}; cl_event ev;
clEnqueueNDRangeKernel( queue ,kernel,2, 0, globalws, localws,0, NULL, &ev);
clFinish( queue ); cl_ulong startTime, endTime;
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &startTime, NULL);
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &endTime, NULL);
cl_ulong kernelExecTimeNs = endTime-startTime;
printf("simple kernal exec time :%8.6f ms\n ", kernelExecTimeNs*1e-6 ); float *op_data = 0;
// copy results from device back to host
op_data = (cl_float *) clEnqueueMapBuffer(queue,clsrc2,CL_TRUE, CL_MAP_READ,0,
M*M*sizeof(cl_float),0, NULL, NULL, NULL ); for(i = 0; i < M*M; i++)
{
if(abs(src2[i] - op_data[i]) > 0.0001)
{
printf("check failed\n");
break;
}
}
if(i == M*M)
printf("check passed\n"); cl_uint blockSize = 16;
kernel = clCreateKernel( program, "matrixTranspose", NULL ); clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&clsrc2);
clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clsrc1);
clSetKernelArg(kernel, 2, sizeof(cl_float)*blockSize*blockSize, NULL);
clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&dimx);
clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&dimy);
clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&blockSize); clEnqueueNDRangeKernel(queue ,kernel,2, 0, globalws, localws,0, NULL, &ev); clFinish( queue );
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL);
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &endTime, NULL);
kernelExecTimeNs = endTime-startTime;
printf("kernal exec time :%8.6f ms\n ", kernelExecTimeNs*1e-6 ); // copy results from device back to host
op_data = (cl_float *) clEnqueueMapBuffer( queue,clsrc2,CL_TRUE,CL_MAP_READ,0,
M*M*sizeof(cl_float),0, NULL, NULL, NULL ); for(i = 0; i < M*M; i++)
{
if(abs(src2[i] - op_data[i]) > 0.0001)
{
printf("check failed\n");
break;
}
}
if(i == M*M)
printf("check passed\n"); if(src1)
free(src1);
if(src2)
free(src2); clReleaseMemObject(clsrc1);
clReleaseMemObject(clsrc2);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
kernel代码
/*
* Copies a block to the local memory
* and copies back the transpose from local memory to output
* @param output output matrix
* @param input input matrix
* @param block local memory of size blockSize x blockSize
* @param width width of the input matrix
* @param height height of the input matrix
* @param blockSize size of the block
*/ __kernel
void matrixTranspose(__global float * output,
__global float * input,
__local float * block,
const uint width,
const uint height,
const uint blockSize
)
{
uint globalIdx = get_global_id(0);
uint globalIdy = get_global_id(1); uint localIdx = get_local_id(0);
uint localIdy = get_local_id(1); /* copy from input to local memory */
block[localIdy*blockSize + localIdx] = input[globalIdy*width + globalIdx]; /* wait until the whole block is filled */
barrier(CLK_LOCAL_MEM_FENCE); uint groupIdx = get_group_id(0);
uint groupIdy = get_group_id(1); /* calculate the corresponding target location for transpose by inverting x and y values*/
uint targetGlobalIdx = groupIdy*blockSize + localIdy;
uint targetGlobalIdy = groupIdx*blockSize + localIdx; /* calculate the corresponding raster indices of source and target */
uint targetIndex = targetGlobalIdy*height + targetGlobalIdx;
uint sourceIndex = localIdy * blockSize + localIdx; output[targetIndex] = block[sourceIndex];
} __kernel void matrixTransposeSimple(__global float * output,
__global float * input,
const uint width,
const uint height
)
{
uint gdx = get_global_id(0);
uint gdy = get_global_id(1);
output[gdy*width+gdx] = input[gdx*height+gdy] ;
}
测试结果输出
======================================================== 转载请注明出处:http://blog.csdn.net/songzitea/article/details/12178619========================================================
【异构计算】OpenCL矩阵转置的更多相关文章
- <矩阵的基本操作:矩阵相加,矩阵相乘,矩阵转置>
//矩阵的基本操作:矩阵相加,矩阵相乘,矩阵转置 #include<stdio.h> #include<stdlib.h> #define M 2 #define N 3 #d ...
- [转]Python中的矩阵转置
Python中的矩阵转置 via 需求: 你需要转置一个二维数组,将行列互换. 讨论: 你需要确保该数组的行列数都是相同的.比如: arr = [[1, 2, 3], [4, 5, 6], [7, 8 ...
- [置顶] [MATLAB技术贴]漫谈MATLAB矩阵转置
矩阵转置是matlab最基本的操作了,但这个基本操作,也是很多初学者容易出现问题的地方.本帖通过几个实例演示matlab矩阵转置的操作. 方法一:' 运算符与 .' 运算符 >>a ...
- POJ3268 Silver Cow Party(dijkstra+矩阵转置)
Silver Cow Party Time Limit: 2000MS Memory Limit: 65536K Total Submissions: 15156 Accepted: 6843 ...
- 矩阵转置 O(1)空间
题目:用O(1)的空间实现矩阵的转置 为了方便,使用一维数组来分析.所谓矩阵转置,行变列,列变行.在转置的过程中,有的元素位置是不变的:对于变化位置的元素,要求O(1)空间完成,那么这些位置的变化一定 ...
- 基于visual Studio2013解决C语言竞赛题之0705矩阵转置
题目 解决代码及点评 /* 5. 写一函数,将一个3×3的矩阵转置. */ #include <stdio.h> #include <stdlib.h> void mai ...
- Python小代码_5_二维矩阵转置
使用列表推导式实现二维矩阵转置 matrix = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] print(matrix) matrix_t = [[ro ...
- LHC大神问的矩阵转置问题
数学中线性代数中提到的矩阵转置,其实在我们的业务场景中也有需要的地方,比如LHC大神问到的这个问题 那么如何进行行列转换呢? 代码如下: <?php $array=array( '部门1'=&g ...
- c++刷题(15/100)矩阵转置,最深子树
题目一:矩阵转置 给定一个矩阵 A, 返回 A 的转置矩阵. 矩阵的转置是指将矩阵的主对角线翻转,交换矩阵的行索引与列索引. 示例 1: 输入:[[1,2,3],[4,5,6],[7,8,9]] 输出 ...
随机推荐
- POJ 1815 Friendship ★(字典序最小点割集)
[题意]给出一个无向图,和图中的两个点s,t.求至少去掉几个点后才能使得s和t不连通,输出这样的点集并使其字典序最大. 不错的题,有助于更好的理解最小割和求解最小割的方法~ [思路] 问题模型很简单, ...
- ORACLE参数文件
一.oracle pfile/spfile区别 pfile 默认的名称为“init+例程名.ora”文件路径:E:\oracle\product\10.2.0\db_1\dbs,这是一个文本文件 ...
- C#中如何截取Windows消息来触发自定义事件
原文 C#中如何截取Windows消息来触发自定义事件 在c#windows开发中,我们常常会遇到拦截windows消息,来触发某个特定任务的问题. 由于目前使用c#的开发人员非常多,而且大多数c#程 ...
- 25、BroadCastRecevier
BroadCastRecevier 有两种注册方式 1. 清单文件里注册: 一旦应用程序被部署到手机, 广播接受者就会生效 2. 代码里面注册: 一旦代码所在的进程被杀死了, 广播接受者就失效了. 广 ...
- POJ3087 Shuffle'm Up 简单模拟
题意:就是给你两副扑克,然后一张盖一张洗牌,不断重复这个过程,看能不能达到目标的扑克顺序 分析:然后就模拟下,-1的情况就是有循环节 #include<cstdio> #include&l ...
- Console.WriteLine()与MessageBox.Show()的区别
Console.WriteLine(); 将当前行终止符写入标准输出流 在学习控制台应用程序时经常用,输出到控制台 MessageBox.Show(); 显示可包含文本.按钮和符号(通知并指示用户) ...
- sizeof的作用——解释类中与类之外static变量的情况
今天看程序员面试宝典的时候遇到一个问题,书上有这么一句话:sizeof计算栈中分配的大小.咋一看这句话的时候,很不理解,难道像函数中类似于static.extern const类型的变量的sizeof ...
- bzoj 2588 Spoj 10628. Count on a tree(主席树)
Description 给定一棵N个节点的树,每个点有一个权值,对于M个询问(u,v,k),你需要回答u xor lastans和v这两个节点间第K小的点权.其中lastans是上一个询问的答案,初始 ...
- javascript设计模式8
桥接模式(将抽象与其实现隔离开来,以便二者独立变化) function sendInfo(element){ var id=element.id; ajax("GET"," ...
- C++ 顺序表
C++ 顺序表 /***1顺序表1.必做题 编写程序建立一个数续表,并逐个输出顺序表中所有数据元素的值.编写主函数测试结果. 编写顺序表定位操作子函数,在顺序表中查找是否存在数据元素x. 如果存在,返 ...