Here is the code:

#include <stdio.h>
#include <xmmintrin.h>
#include <windows.h> typedef __m128 Vec; typedef unsigned long long value_t; __forceinline value_t now()
{
LARGE_INTEGER n;
QueryPerformanceCounter(&n);
return n.QuadPart;
} inline void img_transpose(
Vec *dst_img,
Vec *src_img,
const int src_w,
const int src_h)
{
#pragma omp parallel for
for (int j = ; j < src_w; ++j)
{
for (int i = ; i < src_h; ++i)
{
dst_img[j * src_h + i] = src_img[i * src_w + j];
}
}
} inline void img_transpose_block(
Vec *dst_img,
Vec *src_img,
const int src_w,
const int src_h)
{
#pragma omp parallel for
for (int j = ; j < src_w; j += )
{
for (int i = ; i < src_h; i += )
{
const int nsize = min(j + , src_w);
const int msize = min(i + , src_h); for (int n = j; n < nsize; ++n)
{
for (int m = i; m < msize; ++m)
{
dst_img[n * src_h + m] = src_img[m * src_w + n];
}
}
}
}
} int main(int argc, char *argv[])
{
//// performance benchmark //// const int w = ;
const int h = ;
Vec *a = new Vec [w * h];
Vec *b = new Vec [w * h];
value_t start_time, end_time; LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
double ms_per_tick = 1000.0 / (double)freq.QuadPart; start_time = now(); for (int t = ; t < ; ++t)
{
img_transpose(b, a, w, h);
img_transpose(a, b, h, w);
} end_time = now();
printf("img_transpose: %f ms\n", (double)(end_time - start_time) * ms_per_tick); start_time = now(); for (int t = ; t < ; ++t)
{
img_transpose_block(b, a, w, h);
img_transpose_block(a, b, h, w);
} end_time = now();
printf("img_transpose_block: %f ms\n", (double)(end_time - start_time) * ms_per_tick); delete [] a;
delete [] b; //// algorithm validation ////
const int width = ;
const int height = ;
Vec *src_img = new Vec [width * height];
Vec *dst_img = new Vec [height * width]; for (int j = ; j < height; ++j)
{
for (int i = ; i < width; ++i)
{
src_img[j * width + i].m128_i32[] = i;
src_img[j * width + i].m128_i32[] = j;
}
} img_transpose_block(dst_img, src_img, width, height); for (int j = ; j < width; ++j)
{
for (int i = ; i < height; ++i)
{
int pi = dst_img[j * height + i].m128_i32[];
int pj = dst_img[j * height + i].m128_i32[]; if (pi != j || pj != i)
{
printf("Algorithm is wrong!!!\n");
goto END_OF_PROGRAM;
}
}
} END_OF_PROGRAM:
printf("All done\n"); return ;
}

A tiny program to benchmark image transpose algorithms的更多相关文章

  1. hey is a tiny program that sends some load to a web application.

    hey is a tiny program that sends some load to a web application. DOS attack DOS攻击生成 https://github.c ...

  2. 自己动手写一个编译器Tiny语言解析器实现

    然后,上一篇文章简介Tiny词法分析,实现语言.本文将介绍Tiny的语法分析器的实现. 1 Tiny语言的语法 下图是Tiny在BNF中的文法. 文法的定义能够看出.INNY语言有以下特点: 1 程序 ...

  3. Reading List on Automated Program Repair

    Some resources: https://www.monperrus.net/martin/automatic-software-repair 2017 [ ] DeepFix: Fixing ...

  4. [io benchmark]常用磁盘基准/压力测试工具

    Unix Disk I/O Benchmarks fio - NEW! fio is an I/O tool meant to be used both for benchmark and stres ...

  5. UVA - 10895 Matrix Transpose

    UVA - 10895 Matrix Transpose Time Limit:3000MS   Memory Limit:Unknown   64bit IO Format:%lld & % ...

  6. Awesome Go

    A curated list of awesome Go frameworks, libraries and software. Inspired by awesome-python. Contrib ...

  7. Go 语言相关的优秀框架,库及软件列表

    If you see a package or project here that is no longer maintained or is not a good fit, please submi ...

  8. Awesome Go (http://awesome-go.com/)

    A curated list of awesome Go frameworks, libraries and software. Inspired by awesome-python. Contrib ...

  9. Awesome Go精选的Go框架,库和软件的精选清单.A curated list of awesome Go frameworks, libraries and software

    Awesome Go      financial support to Awesome Go A curated list of awesome Go frameworks, libraries a ...

随机推荐

  1. ecplice中代码使用快捷键无法格式化,使用其他方法将代码格式化的步骤

    选中需要进行格式化的代码--->右键--->source--->format,就可以将代码格式化了.

  2. db2 查看表空间使用率

    1. 统计所有节点表空间使用率 select substr(TABLESPACE_NAME,1,20) as TBSPC_NAME,bigint(TOTAL_PAGES * PAGE_SIZE)/10 ...

  3. boost 学习(1)

    智能指针的学习 中文教程网站 http://zh.highscore.de/cpp/boost/ 不过代码可能 由于BOOST 版本不同需要稍作修改 scoped_ptr 离开作用域则自动调用类析构函 ...

  4. form 表单添加 enctype ="multipart/form-data" 属性后后台接收中文乱码

    解决办法: new String( request.getParameter("title").getBytes("ISO-8859-1"),"utf ...

  5. 如何使用css来让图片居中不变形 微信小程序和web端适用

    图片变形很多人祭奠出了妖魔鬼怪般的各种大法,比如使用jq来写,或者使用css表达式来写.今天我总结的是使用css3来写,唯一最大缺点就是对一些浏览器版本不够兼容.下面就是关于如何使用css来让图片居中 ...

  6. spring mvc 用cookie和拦截器实现自动登录(/免登录)

    Cookie/Session机制详解:http://blog.csdn.net/fangaoxin/article/details/6952954 SpringMVC记住密码功能:http://blo ...

  7. 2018.09.29 bzoj3039: 玉蟾宫(悬线法)

    传送门 悬线法的板子题. 悬线法只需要保存当期点向下最多多少个,把这个当成一条线,再处理出线绷直之后最多能向左右延展多少就行了. 代码: #include<bits/stdc++.h> # ...

  8. 解决yum安装时 Cannot retrieve repository metadata (repomd.xml) for repository

    打开/etc/yum.repos.d/CentOS6-Base-163.repo 将下面的baseUrl的地址换成网上最新 # CentOS-Base.repo## The mirror system ...

  9. momery

    reg [7:0] moma [255:0] ;//定义一个位宽为8,浓度为什么256的memory. parameter wordsize = 8; parameter memsize = 256; ...

  10. 不用快捷键就能使用Eclipse的自动完成功能

    偶然间看到了这个,或许有和我一样不喜欢按 alt-/ 兄弟用得上.不用老去按那个 alt-/ 了,还是方便不少.         打开 Eclipse -> Window -> Perfe ...