openMP---第一篇

openMP 处理for循环

//////////////////////////////////////////////////////////////////////////////////////////////

template <typename PointInT, typename PointOutT> void

pcl::MovingLeastSquares<PointInT, PointOutT>::performProcessing (PointCloudOut &output)

{

  // Compute the number of coefficients

  nr_coeff_ = (order_ + ) * (order_ + ) / ;

  size_t mls_result_index = ;

#ifdef _OPENMP

  // (Maximum) number of threads

  const unsigned int threads = threads_ ==  ?  : threads_;

  // Create temporaries for each thread in order to avoid synchronization

  typename PointCloudOut::CloudVectorType projected_points (threads);

  typename NormalCloud::CloudVectorType projected_points_normals (threads);

  std::vector<PointIndices> corresponding_input_indices (threads);

#endif

  // For all points

#ifdef _OPENMP

#pragma omp parallel for schedule (dynamic,1000) num_threads (threads)

#endif

  for (int cp = ; cp < static_cast<int> (indices_->size ()); ++cp)

  {

    // Allocate enough space to hold the results of nearest neighbor searches

    // \note resize is irrelevant for a radiusSearch ().

    std::vector<int> nn_indices;

    std::vector<float> nn_sqr_dists;

    // Get the initial estimates of point positions and their neighborhoods

    if (searchForNeighbors ((*indices_)[cp], nn_indices, nn_sqr_dists))

    {

      // Check the number of nearest neighbors for normal estimation (and later for polynomial fit as well)

      if (nn_indices.size () >= )

      {

        // This thread's ID (range 0 to threads-1)

#ifdef _OPENMP

        const int tn = omp_get_thread_num ();

        // Size of projected points before computeMLSPointNormal () adds points

        size_t pp_size = projected_points[tn].size ();

#else

        PointCloudOut projected_points;

        NormalCloud projected_points_normals;

#endif

        // Get a plane approximating the local surface's tangent and project point onto it

        const int index = (*indices_)[cp];

        if (cache_mls_results_)

          mls_result_index = index; // otherwise we give it a dummy location.

#ifdef _OPENMP

        computeMLSPointNormal (index, nn_indices, projected_points[tn], projected_points_normals[tn], corresponding_input_indices[tn], mls_results_[mls_result_index]);

        // Copy all information from the input cloud to the output points (not doing any interpolation)

        for (size_t pp = pp_size; pp < projected_points[tn].size (); ++pp)

          copyMissingFields (input_->points[(*indices_)[cp]], projected_points[tn][pp]);

#else

        computeMLSPointNormal (index, nn_indices, projected_points, projected_points_normals, *corresponding_input_indices_, mls_results_[mls_result_index]);

        // Append projected points to output

        output.insert (output.end (), projected_points.begin (), projected_points.end ());

        if (compute_normals_)

          normals_->insert (normals_->end (), projected_points_normals.begin (), projected_points_normals.end ());

#endif

      }

    }

  }

#ifdef _OPENMP

  // Combine all threads' results into the output vectors

  for (unsigned int tn = ; tn < threads; ++tn)

  {

    output.insert (output.end (), projected_points[tn].begin (), projected_points[tn].end ());

    corresponding_input_indices_->indices.insert (corresponding_input_indices_->indices.end (),

                                                  corresponding_input_indices[tn].indices.begin (), corresponding_input_indices[tn].indices.end ());

    if (compute_normals_)

      normals_->insert (normals_->end (), projected_points_normals[tn].begin (), projected_points_normals[tn].end ());

  }

#endif

  // Perform the distinct-cloud or voxel-grid upsampling

  performUpsampling (output);

}

template <typename PointT> void

pcl::FastBilateralFilterOMP<PointT>::applyFilter (PointCloud &output)

{

  if (!input_->isOrganized ())

  {

    PCL_ERROR ("[pcl::FastBilateralFilterOMP] Input cloud needs to be organized.\n");

    return;

  }

  copyPointCloud (*input_, output);

  float base_max = -std::numeric_limits<float>::max (),

        base_min = std::numeric_limits<float>::max ();

  bool found_finite = false;

  for (size_t x = ; x < output.width; ++x)

  {

    for (size_t y = ; y < output.height; ++y)

    {

      if (pcl_isfinite (output (x, y).z))

      {

        if (base_max < output (x, y).z)

          base_max = output (x, y).z;

        if (base_min > output (x, y).z)

          base_min = output (x, y).z;

        found_finite = true;

      }

    }

  }

  if (!found_finite)

  {

    PCL_WARN ("[pcl::FastBilateralFilterOMP] Given an empty cloud. Doing nothing.\n");

    return;

  }

#ifdef _OPENMP

#pragma omp parallel for num_threads (threads_)

#endif

  for (long int i = ; i < static_cast<long int> (output.size ()); ++i)

    if (!pcl_isfinite (output.at(i).z))

      output.at(i).z = base_max;

  const float base_delta = base_max - base_min;

  const size_t padding_xy = ;

  const size_t padding_z  = ;

  const size_t small_width  = static_cast<size_t> (static_cast<float> (input_->width  - ) / sigma_s_) +  +  * padding_xy;

  const size_t small_height = static_cast<size_t> (static_cast<float> (input_->height - ) / sigma_s_) +  +  * padding_xy;

  const size_t small_depth  = static_cast<size_t> (base_delta / sigma_r_)   +  +  * padding_z;

  Array3D data (small_width, small_height, small_depth);

#ifdef _OPENMP

#pragma omp parallel for num_threads (threads_)

#endif

  for (long int i = ; i < static_cast<long int> (small_width * small_height); ++i)

  {

    size_t small_x = static_cast<size_t> (i % small_width);

    size_t small_y = static_cast<size_t> (i / small_width);

    size_t start_x = static_cast<size_t>(

        std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + , .f));

    size_t end_x = static_cast<size_t>(

      std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + , .f));

    size_t start_y = static_cast<size_t>(

      std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + , .f));

    size_t end_y = static_cast<size_t>(

      std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + , .f));

    for (size_t x = start_x; x < end_x && x < input_->width; ++x)

    {

      for (size_t y = start_y; y < end_y && y < input_->height; ++y)

      {

        const float z = output (x,y).z - base_min;

        const size_t small_z = static_cast<size_t> (static_cast<float> (z) / sigma_r_ + 0.5f) + padding_z;

        Eigen::Vector2f& d = data (small_x, small_y, small_z);

        d[] += output (x,y).z;

        d[] += 1.0f;

      }

    }

  }

  std::vector<long int> offset ();

  offset[] = &(data (,,)) - &(data (,,));

  offset[] = &(data (,,)) - &(data (,,));

  offset[] = &(data (,,)) - &(data (,,));

  Array3D buffer (small_width, small_height, small_depth);

  for (size_t dim = ; dim < ; ++dim)

  {

    for (size_t n_iter = ; n_iter < ; ++n_iter)

    {

      Array3D* current_buffer = (n_iter %  ==  ? &buffer : &data);

      Array3D* current_data =(n_iter %  ==  ? &data : &buffer);

#ifdef _OPENMP

#pragma omp parallel for num_threads (threads_)

#endif

      for(long int i = ; i < static_cast<long int> ((small_width - )*(small_height - )); ++i)

      {

        size_t x = static_cast<size_t> (i % (small_width - ) + );

        size_t y = static_cast<size_t> (i / (small_width - ) + );

        const long int off = offset[dim];

        Eigen::Vector2f* d_ptr = &(current_data->operator() (x,y,));

        Eigen::Vector2f* b_ptr = &(current_buffer->operator() (x,y,));

        for(size_t z = ; z < small_depth - ; ++z, ++d_ptr, ++b_ptr)

          *d_ptr = (*(b_ptr - off) + *(b_ptr + off) + 2.0 * (*b_ptr)) / 4.0;

      }

    }

  }

  // Note: this works because there are an even number of iterations.

  // If there were an odd number, we would need to end with a:

  // std::swap (data, buffer);

  if (early_division_)

  {

    for (std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f> >::iterator d = data.begin (); d != data.end (); ++d)

      *d /= ((*d)[] != ) ? (*d)[] : ;

#ifdef _OPENMP

#pragma omp parallel for num_threads (threads_)

#endif

    for (long int i = ; i < static_cast<long int> (input_->size ()); ++i)

    {

      size_t x = static_cast<size_t> (i % input_->width);

      size_t y = static_cast<size_t> (i / input_->width);

      const float z = output (x,y).z - base_min;

      const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy,

                                                              static_cast<float> (y) / sigma_s_ + padding_xy,

                                                              z / sigma_r_ + padding_z);

      output(x,y).z = D[];

    }

  }

  else

  {

#ifdef _OPENMP

#pragma omp parallel for num_threads (threads_)

#endif

    for (long i = ; i < static_cast<long int> (input_->size ()); ++i)

    {

      size_t x = static_cast<size_t> (i % input_->width);

      size_t y = static_cast<size_t> (i / input_->width);

      const float z = output (x,y).z - base_min;

      const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy,

                                                              static_cast<float> (y) / sigma_s_ + padding_xy,

                                                              z / sigma_r_ + padding_z);

      output (x,y).z = D[] / D[];

    }

  }

}

template <typename PointInT, typename PointOutT> void

pcl::NormalEstimationOMP<PointInT, PointOutT>::computeFeature (PointCloudOut &output)

{

  // Allocate enough space to hold the results

  // \note This resize is irrelevant for a radiusSearch ().

  std::vector<int> nn_indices (k_);

  std::vector<float> nn_dists (k_);

  output.is_dense = true;

  // Save a few cycles by not checking every point for NaN/Inf values if the cloud is set to dense

  if (input_->is_dense)

  {

#ifdef _OPENMP

#pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_)

#endif

    // Iterating over the entire index vector

    for (int idx = ; idx < static_cast<int> (indices_->size ()); ++idx)

    {

      Eigen::Vector4f n;

      if (this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) ==  ||

          !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature))

      {

        output.points[idx].normal[] = output.points[idx].normal[] = output.points[idx].normal[] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN ();

        output.is_dense = false;

        continue;

      }

      output.points[idx].normal_x = n[];

      output.points[idx].normal_y = n[];

      output.points[idx].normal_z = n[];

      flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_,

                                  output.points[idx].normal[], output.points[idx].normal[], output.points[idx].normal[]);

    }

  }

  else

  {

#ifdef _OPENMP

#pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_)

#endif

    // Iterating over the entire index vector

    for (int idx = ; idx < static_cast<int> (indices_->size ()); ++idx)

    {

      Eigen::Vector4f n;

      if (!isFinite ((*input_)[(*indices_)[idx]]) ||

          this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) ==  ||

          !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature))

      {

        output.points[idx].normal[] = output.points[idx].normal[] = output.points[idx].normal[] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN ();

        output.is_dense = false;

        continue;

      }

      output.points[idx].normal_x = n[];

      output.points[idx].normal_y = n[];

      output.points[idx].normal_z = n[];

      flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_,

                                  output.points[idx].normal[], output.points[idx].normal[], output.points[idx].normal[]);

    }

  }

}

openMP---第一篇的更多相关文章

从0开始搭建SQL Server AlwaysOn 第一篇（配置域控）
从0开始搭建SQL Server AlwaysOn 第一篇(配置域控) 第一篇http://www.cnblogs.com/lyhabc/p/4678330.html第二篇http://www.cnb ...
Python爬虫小白入门（四）PhatomJS+Selenium第一篇
一.前言在上一篇博文中,我们的爬虫面临着一个问题,在爬取Unsplash网站的时候,由于网站是下拉刷新,并没有分页.所以不能够通过页码获取页面的url来分别发送网络请求.我也尝试了其他方式,比如下拉 ...
Three.js 第一篇：绘制一个静态的3D球体
第一篇就画一个球体吧首先我们知道Three.js其实是一个3D的JS引擎,其中的强大之处就在于这个JS框架并不是依托于JQUERY来写的.那么,我们在写这一篇绘制3D球体的文章的时候,应该注意哪些地 ...
深入学习jQuery选择器系列第一篇——基础选择器和层级选择器
× 目录 [1]id选择器 [2]元素选择器 [3]类选择器[4]通配选择器[5]群组选择器[6]后代选择器[7]兄弟选择器前面的话选择器是jQuery的根基,在jQuery中,对事件处理.遍历D ...
【第一篇】ASP.NET MVC快速入门之数据库操作（MVC5+EF6）
目录 [第一篇]ASP.NET MVC快速入门之数据库操作(MVC5+EF6) [第二篇]ASP.NET MVC快速入门之数据注解(MVC5+EF6) [第三篇]ASP.NET MVC快速入门之安全策 ...
Android基础学习第一篇—Project目录结构
写在前面的话: 1. 最近在自学Android,也是边看书边写一些Demo,由于知识点越来越多,脑子越来越记不清楚,所以打算写成读书笔记,供以后查看,也算是把自己学到所理解的东西写出来,献丑,如有不对 ...
深入理解ajax系列第一篇——XHR对象
× 目录 [1]创建对象 [2]发送请求 [3]接收响应[4]异步处理[5]实例演示前面的话 ajax是asynchronous javascript and XML的简写,中文翻译是异步的java ...
深入理解javascript对象系列第一篇——初识对象
× 目录 [1]定义 [2]创建 [3]组成[4]引用[5]方法前面的话 javascript中的难点是函数.对象和继承,前面已经介绍过函数系列.从本系列开始介绍对象部分,本文是该系列的第一篇——初 ...
深入理解this机制系列第一篇——this的4种绑定规则
× 目录 [1]默认绑定 [2]隐式绑定 [3]隐式丢失[4]显式绑定[5]new绑定[6]严格模式前面的话如果要问javascript中哪两个知识点容易混淆,作用域查询和this机制绝对名列前茅 ...
前端工程师技能之photoshop巧用系列第一篇——准备篇
× 目录 [1]作用 [2]初始化 [3]常用工具[4]快捷键前面的话 photoshop是前端工程师无法回避的一个软件,这个软件本身很强大,但我们仅仅需要通过这个工具来完成基本的切图工作即可.本文 ...

随机推荐

postgres 索引
索引是一种特殊的查询表,可以使用搜索引擎的数据库以加快数据检索.简单地说,索引是表中的数据的一个指针,在一个数据库中的索引是非常相似,如:一本书的目录. 例如,如果想在一本书中引用的所有页面讨论某个话 ...
高性能SQLServer分页语句
第一种方法:效率最高 SELECT TOP 页大小 * FROM( SELECT ROW_NUMBER() OVER (ORDER BY id) AS RowNumber,* FROM table1 ...
辨析 const指针和指向常量的指针
辨析以下几种指针p的定义. ; int *p = &tmp; const int *p = &tmp; int const* p = &tmp; int * const p = ...
mybatis-03
mybatis-03 1.mybatis的别名[两种]在MyBatis中可以为变量类型定义别名.简化映射文件的定义,在核心配置文件中定义的别名.别名应用:MyBatis框架先将resultType定义 ...
deep_learning_Function_ lambda函数详解
这里总结了关于 Python 中的 lambda 函数的“一个语法,三个特性,四个用法”. 一个语法: 在 Python 中,lambda 函数的语法是唯一的.其形式如下: lambda argume ...
mysql数据库：mysql初识
1.什么是数据库 ***** 存放数据的仓库已学习的文件操作的缺陷 1.IO操作效率问题 2.多用户竞争数据 3.网络访问 ...
Linux用户组管理及用户权限4
权限管理: ls -l rwxrwxrwx: 左三位:定义user(owner)的权限中三位:定义group的权限 ...
CentOS7 xrdp 安装和设置
1) 安装 $ sudo yum install xrdp $ sudo yum install tigervnc $ sudo yum install tigervnc-server 2) 设置密码 ...
腾讯数据安全专家谈联邦学习开源项目FATE：通往隐私保护理想未来的桥梁
数据孤岛.数据隐私以及数据安全,是目前人工智能和云计算在大规模产业化应用过程中绕不开的“三座大山”. “联邦学习”作为新一代的人工智能算法,能在数据不出本地的情况下,实现共同建模,提升AI模型的效果, ...
实操 | 内存占用减少高达90%，还不用升级硬件？没错，这篇文章教你妙用Pandas轻松处理大规模数据
注:Pandas(Python Data Analysis Library) 是基于 NumPy 的一种工具,该工具是为了解决数据分析任务而创建的.此外,Pandas 纳入了大量库和一些标准的数据模型 ...

openMP---第一篇

openMP---第一篇的更多相关文章

随机推荐

热门专题