求平方根下取整,对于gcc type __uint128_t。

~45.5ns/op on i7-7700k@4.35G,即typical <200cyc/op。

Together with u128gen&timing&validation.

#include <cmath>
#include <cstdio>
#include <random>
#include <chrono>
typedef __uint128_t u128;
typedef unsigned long long u64;
const int count=10000000;
u64 sqrt_approx(u64 x){
u64 approx=sqrt(double(x));
return (approx+x/approx)>>1;
}
u64 sqrt(u64 x){
u64 approx=sqrt(double(x));
u64 apt=(approx+x/approx)>>1;
approx=apt*apt;
if(approx>x)return apt-1;
if(x-approx>=2*apt-1)return apt+1;
return apt;
}
u128 sqrt(u128 r){
if(!(r>>64))return sqrt(u64(r));
int cnt=(((64-__builtin_clzll(u64(r>>64)))+1)|1)^1;
u128 approx=u128(sqrt_approx(u64(r>>cnt)))<<(cnt/2);
approx=(approx+r/approx)>>1;
u128 apt=u128(u64(approx))*u128(u64(approx));
// if(r-apt>=2*approx-1)return approx+1;
return approx-((r-apt)>>127);
}
u128 rand_arr[count],root_arr[count]; typedef void(*func)(); void Time(const char*str,func fn,int multi=count){
using hrc=std::chrono::high_resolution_clock;
hrc::time_point start=hrc::now();
fn();
hrc::time_point stop=hrc::now();
hrc::duration dur=stop-start;
printf("%s Finished in %llu us . \n",str,std::chrono::duration_cast<std::chrono::microseconds>(dur).count());
if(multi)
printf("Average %.3lfns per op.\n",double(std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count())/multi);
} void Root(){
for(int i=0;i<count;++i)
root_arr[i]=sqrt(rand_arr[i]);
} std::mt19937_64 rng; void Gen(){
for(int i=0;i<count;++i)
rand_arr[i]=(u128(rng())<<64)|rng();
} int Validate(){
for(int i=0;i<count;++i){
u128 ax=root_arr[i];
u128 bx=(ax+1)*(ax+1);
ax=ax*ax;
if(ax>rand_arr[i])
return i+1;
if(bx<=rand_arr[i])
return i+1;
}return 0;
} char pp[300];
#define spp(...) (sprintf(pp,##__VA_ARGS__),pp)
#define hexo(x) (spp("0x%llx%016llx",u64(x>>64),u64(x))) int main(){
printf("Count=%d\n",count);
Time("Generation",Gen);
Time("Square root",Root);
int val=Validate();
printf("Validation %s\n",val?spp("Fail at %d",val):"Passed");
if(val){
--val;
printf("Rand %s\n",hexo(rand_arr[val]));
printf("Root %s\n",hexo(root_arr[val]));
}
return 0;
}

sqrti128的更多相关文章

随机推荐

  1. 使用sqoop将mysql中表导入hive中报错

    [hdfs@node1 root]$ sqoop import --connect jdbc:mysql://node2:3306/cm?charset-utf8 --username root -- ...

  2. Myeclipse报错-Java compiler level does not match 完美解决方法

    从别的地方导入一个项目的时候,经常会遇到eclipse/Myeclipse报Description  Resource Path Location Type Java compiler level d ...

  3. request.getparameter() 获取中文出现乱码 问题

    http请求是以ISO-8859-1的编码来传送url的 如果页面的content-type为utf-8,那么在发送请求时,会将字符转成utf-8后进行传送 如: 中 的UTF-8编码为:E4 B8 ...

  4. 「日常训练」Common Subexpression Elimination(UVa-12219)

    今天做的题目就是抱佛脚2333 懂的都懂. 这条题目干了好几天,最后还是参考别人的代码敲出来了,但是自己独立思考了两天多,还是有收获的. 思路分析 做这条题我是先按照之前的那条题目(The SetSt ...

  5. 问题 C: Goldbach's Conjecture

    题目描述 Goldbach's Conjecture: For any even number n greater than or equal to 4, there exists at least ...

  6. LeetCode - 67. Add Binary(4ms)

    Given two binary strings, return their sum (also a binary string). The input strings are both non-em ...

  7. ASP.NET MVC5.0 OutputCache不起效果

    按照官网文档(https://docs.microsoft.com/en-us/aspnet/mvc/overview/older-versions-1/controllers-and-routing ...

  8. redis-Windows下安装与操作

    Redis windows下安装 1.安装 (1)windows把redisbin_x32安装包放在电脑任意的盘里 (2)通过cmd找到对应目录:  D\redisbin_x32 (3)开始安装 D\ ...

  9. URAL 1736 Chinese Hockey(网络最大流)

    Description Sergey and Denis closely followed the Chinese Football Championship, which has just come ...

  10. 剑指offer:斐波那契数列

    目录 题目 解题思路 具体代码 题目 题目链接 剑指offer:斐波那契数列 题目描述 大家都知道斐波那契数列,现在要求输入一个整数n,请你输出斐波那契数列的第n项(从0开始,第0项为0). n< ...