【C++】Strassen算法代码

本文仅代码，无理论解释

实话实说，我觉得这个算法在C系列的语言下，简直垃圾到爆炸……毕竟是一群完全不懂程序数学家对着纸弄出来的，看起来好像非常的有用，实际上耗时是非常爆炸的。

但是《算法导论》里有啊……然后上课又要求手写一个

于是我就手写了一个……我尽可能的减少使用的空间同时加快速度了，当 n = 512 的时候，内存使用量峰值没有超过 10mb，而且是通过递归实现 Strassen 算法

其中，in.txt 已经预先准备了 3000000 个范围在 0-100 随机数，避免程序在运算过程中爆 int（虽然完全可以取1000）

/**

 * Created by Mauve on 3/29/2020.

 * Copyright © 2020 Mauve, All Rights Reserved

 */

#include <bits/stdc++.h>

using namespace std;

/**

 * 矩阵相乘

 * 最终结果耗时结果保存至

 * https://www.desmos.com/calculator/gl4tm5i1zu

 */

struct mat {

    unsigned row, col;

    mat(unsigned r, unsigned c) : row(r), col(c) {}

    virtual int &pos_ref(unsigned i, unsigned j) = 0;

    virtual int pos(unsigned i, unsigned j) const = 0;

};

struct base_mat;

struct sub_mat;

stack<sub_mat *> sub_data;

struct base_mat : mat {

    int *data;

    base_mat(unsigned r, unsigned c) : mat(r, c), data(new int[row * col]) {}

    ~base_mat() {

        delete[] data;

    }

    inline int &pos_ref(unsigned i, unsigned j) override {

        return *(data + i * col + j);

    }

    inline int pos(unsigned i, unsigned j) const override {

        return *(data + i * col + j);

    }

};

unsigned min_mul;

struct sub_mat : mat {

    mat *a, *b;

    bool is_add;

    unsigned offset_ai, offset_aj, offset_bi, offset_bj;

    explicit sub_mat(mat *data) : mat(data->row, data->col), a(data), b(nullptr),

                                  is_add(false), offset_ai(0), offset_aj(0),

                                  offset_bi(0), offset_bj(0) { sub_data.push(this); }

    sub_mat(mat *data, bool of_i, bool of_j) : mat(data->row >> 1u, data->col >> 1u), a(data), b(nullptr),

                                               is_add(false), offset_ai(of_i ? data->row >> 1u : 0),

                                               offset_aj(of_j ? data->col >> 1u : 0),

                                               offset_bi(0), offset_bj(0) { sub_data.push(this); }

    inline int &pos_ref(unsigned i, unsigned j) override {

        assert(b == nullptr);

        return a->pos_ref(i + offset_ai, j + offset_aj);

    }

    inline int pos(unsigned i, unsigned j) const override {

        if (b == nullptr)

            return a->pos(i + offset_ai, j + offset_aj);

        return a->pos(i + offset_ai, j + offset_aj) + (is_add ? 1 : -1) * b->pos(i + offset_bi, j + offset_bj);

    }

    inline sub_mat *operator+(sub_mat &other) {

        auto res = new sub_mat(this);

        res->b = &other;

        res->is_add = true;

        return res;

    }

    inline sub_mat *operator-(sub_mat &other) {

        auto res = new sub_mat(this);

        res->b = &other;

        res->is_add = false;

        return res;

    }

    mat *operator*(sub_mat &other) {

        assert(col == other.row);

        auto res = new base_mat(row, other.col);

        if (col & 1u || row & 1u || col <= min_mul || row <= min_mul || other.col <= min_mul) {

            memset(res->data, 0, sizeof(int) * res->row * res->col);

            for (int k = 0; k < col; k++)

                for (int i = 0; i < row; ++i)

                    for (int j = 0; j < other.col; ++j)

                        res->pos_ref(i, j) += pos(i, k) * other.pos(k, j);

        } else {

            size_t sub_data_size = sub_data.size();

#define a(i, j) (*new sub_mat(this, i == 2 , j == 2))

#define b(i, j) (*new sub_mat(&other, i == 2 , j == 2))

            auto m1 = *(a(1, 1) + a(2, 2)) * *(b(1, 1) + b (2, 2));

            auto m2 = *(a(2, 1) + a(2, 2)) * b(1, 1);

            auto m3 = a(1, 1) * *(b(1, 2) - b(2, 2));

            auto m4 = a(2, 2) * *(b(2, 1) - b(1, 1));

            auto m5 = *(a(1, 1) + a(1, 2)) * b(2, 2);

            auto m6 = *(a(2, 1) - a(1, 1)) * *(b(1, 1) + b(1, 2));

            auto m7 = *(a(1, 2) - a(2, 2)) * *(b(2, 1) + b(2, 2));

#undef a

#undef b

            unsigned half_row = row >> 1u, half_col = col >> 1u;

#define m(t) (m##t->pos(i, j))

            // C11

            for (unsigned i = 0; i < half_row; ++i)

                for (unsigned j = 0; j < half_col; ++j)

                    res->pos_ref(i, j) = m(1) + m(4) - m(5) + m(7);

            // C12

            for (unsigned i = 0; i < half_row; ++i)

                for (unsigned j = 0; j < half_col; ++j)

                    res->pos_ref(i, j + half_col) = m(3) + m(5);

            // C21

            for (unsigned i = 0; i < half_row; ++i)

                for (unsigned j = 0; j < half_col; ++j)

                    res->pos_ref(i + half_row, j) = m(2) + m(4);

            // C22

            for (unsigned i = 0; i < half_row; ++i)

                for (unsigned j = 0; j < half_col; ++j)

                    res->pos_ref(i + half_row, j + half_col) = m(1) - m(2) + m(3) + m(6);

#undef m

            delete dynamic_cast<base_mat *>(m1);

            delete dynamic_cast<base_mat *>(m2);

            delete dynamic_cast<base_mat *>(m3);

            delete dynamic_cast<base_mat *>(m4);

            delete dynamic_cast<base_mat *>(m5);

            delete dynamic_cast<base_mat *>(m6);

            delete dynamic_cast<base_mat *>(m7);

            while (sub_data.size() > sub_data_size) {

                delete sub_data.top();

                sub_data.pop();

            }

        }

        return res;

    }

};

unsigned N = 2;

void solve() {

    cerr << "N = " << N << endl;

    base_mat a(N, N), b(N, N);

    for (int i = 0; i < N; ++i)

        for (int j = 0; j < N; ++j)

            cin >> a.pos_ref(i, j);

    for (int i = 0; i < N; ++i)

        for (int j = 0; j < N; ++j)

            cin >> b.pos_ref(i, j);

    for (int t = 1; t < min(10u, N); t += 3) {

        auto x = new sub_mat(&a), y = new sub_mat(&b);

        min_mul = t;

        auto time_1 = clock();

        auto z = *x * *y;

        auto time_2 = clock();

        cerr << "t = " << t << " time: " << double(time_2 - time_1) / CLOCKS_PER_SEC << endl;

        delete dynamic_cast<base_mat *>(z);

        while (!sub_data.empty()) {

            delete sub_data.top();

            sub_data.pop();

        }

    }

    auto x = new sub_mat(&a), y = new sub_mat(&b);

    min_mul = 10000;

    auto time_1 = clock();

    auto z = *x * *y;

    auto time_2 = clock();

    cerr << "tradition: " << double(time_2 - time_1) / CLOCKS_PER_SEC << endl;

    delete dynamic_cast<base_mat *>(z);

    while (!sub_data.empty()) {

        delete sub_data.top();

        sub_data.pop();

    }

    N *= 2;

    if (N >= 1000) exit(0);

}

signed main() {

    ios_base::sync_with_stdio(false);

    cin.tie(nullptr);

    cout.tie(nullptr);

#ifdef ACM_LOCAL

    freopen("in.txt", "r", stdin);

    freopen("out.txt", "w", stdout);

    long long test_index_for_debug = 1;

    char acm_local_for_debug;

    while (cin >> acm_local_for_debug && acm_local_for_debug != '~') {

        cin.putback(acm_local_for_debug);

        if (test_index_for_debug > 20) {

            throw runtime_error("Check the stdin!!!");

        }

        auto start_clock_for_debug = clock();

        solve();

        auto end_clock_for_debug = clock();

        cout << "Test " << test_index_for_debug << " successful" << endl;

        cerr << "Test " << test_index_for_debug++ << " Run Time: "

             << double(end_clock_for_debug - start_clock_for_debug) / CLOCKS_PER_SEC << "s" << endl;

        cout << "--------------------------------------------------" << endl;

    }

#else

    solve();

#endif

    return 0;

}

【C++】Strassen算法代码的更多相关文章

【算法导论C++代码】Strassen算法
简单方阵矩乘法 SQUARE-MATRIX-MULTIPLY(A,B) n = A.rows let C be a new n*n natrix to n to n cij = to n cij=ci ...
Conquer and Divide经典例子之Strassen算法解决大型矩阵的相乘
在通过汉诺塔问题理解递归的精髓中我讲解了怎么把一个复杂的问题一步步recursively划分了成简单显而易见的小问题.其实这个解决问题的思路就是算法中常用的divide and conquer, 这篇 ...
Strassen算法
如题,该算法是来自德国的牛逼的数学家strassen搞出来的,因为把n*n矩阵之间的乘法复杂度降低到n^(lg7)(lg的底是2),一开始想当然地认为朴素的做法是n^3,哪里还能有复杂度更低的做法,但 ...
4-2.矩阵乘法的Strassen算法详解
题目描述请编程实现矩阵乘法,并考虑当矩阵规模较大时的优化方法. 思路分析根据wikipedia上的介绍:两个矩阵的乘法仅当第一个矩阵B的列数和另一个矩阵A的行数相等时才能定义.如A是m×n矩阵和B ...
算法导论-矩阵乘法-strassen算法
目录 1.矩阵相乘的朴素算法 2.矩阵相乘的strassen算法 3.完整测试代码c++ 4.性能分析 5.参考资料内容 1.矩阵相乘的朴素算法 T(n) = Θ(n3) 朴素矩阵相乘算法,思想明了 ...
整数快速乘法/快速幂+矩阵快速幂+Strassen算法
快速幂算法可以说是ACM一类竞赛中必不可少,并且也是非常基础的一类算法,鉴于我一直学的比较零散,所以今天用这个帖子总结一下快速乘法通常有两类应用:一.整数的运算,计算(a*b) mod c 二.矩 ...
第四章分治策略 4.2 矩阵乘法的Strassen算法
package chap04_Divide_And_Conquer; import static org.junit.Assert.*; import java.util.Arrays; import ...
LaTeX 算法代码排版 --latex2e范例总结
LaTeX 写作: 算法代码排版 --latex2e范例总结 latex2e 宏包的使用范例: \usepackage[ruled]{algorithm2e} ...
KMP算法代码
以下是本人根据上一篇博客随笔http://www.cnblogs.com/jiayouwyhit/p/3251832.html,所写的KMP算法代码(暂未优化),个人认为在基于上一篇博客的基础上,代码 ...

随机推荐

事务以及Spring的事务管理
一.什么是事务? 事务是逻辑上的一组操作,要么都执行,要么都不执行二.事务的特性(ACID) 原子性: 事务是最小的执行单位,不允许分割.事务的原子性确保动作要么全部完成,要么完全不起作用: 一致性 ...
sycCMS PHP V1.0---呵呵呵呵呵
闲的无聊,随便找了份代码看了看. //search.php 第17行第49行 ...... $keyword=SafeRequest("keyword","post&q ...
Python计算给定日期的周内的某一天
先理一下思路:1.weekday会根据某个日期返回0到6的一个数字来表示星期几对吧,0==星期一我们来列一个表: [0,1,2,3,4,5,6] 2.知道了星期几之后,你可以计算出那一周相对于这个0到 ...
Reids(4)——神奇的HyperLoglog解决统计问题
一.HyperLogLog 简介 HyperLogLog 是最早由 Flajolet 及其同事在 2007 年提出的一种估算基数的近似最优算法.但跟原版论文不同的是,好像很多书包括 Redis 作者 ...
利用动态资源分配优化Spark应用资源利用率
背景在某地市开展项目的时候,发现数据采集,数据探索,预处理,数据统计,训练预测都需要很多资源,现场资源不够用. 目前该项目的资源3台旧的服务器,每台的资源内存为128G,cores 为24 (co ...
一次js自定义播放器，canvas绘制弹幕的尝试
不多bb,就直接说实现了什么功能: 1. 视频播放进度调整 2. 视频小窗口实时预览 3. 声音调整 4. 画中画模式 5. 网页全屏 6. 视频全屏 7. canvas绘制弹幕 8. 选中弹幕悬停 ...
并查集（不相交集）的Remove操作
给并查集(不相交集)的添加一个$Remove(X)$操作,该操作把$X$从当前的集合中除去并把它放到自己的集合中. 实现思想英文原句 We assume that the tree is i ...
AWS EC2+Docker+JMeter构建分布式负载测试基础架构
目录概述及范围前提条件 Part 1: Local setup-本地配置 Part 2: Cloud端基础架构--Infrastructure 总结: 原文链接 @ 概述及范围本文介绍有关如何使 ...
小白自学机器学习----3.令人头秃的pytorch安装（No module named 'tools.nnwrap' 错误）
tensorflow 刚刚会写基础的模块了,今天找到研究方向的代码是pytorch实现的总是看到这句话,人生苦短,我用pytorch 看来pytorch应该比tensorflow好学,但是!! py ...
Safari配置WebApp----添加启动图和桌面图标让你的WebApp在ios设备上体验如原生一样
现在很多开发者的Web应用程序的设计样式和交互类似本机应用程序,例如,它的缩放比例适合iOS上的整个屏幕.当用户将其添加到主屏幕时,您可以通过使其看起来像本机应用程序一样,在iOS上为您的Web应用程 ...

【C++】Strassen算法代码

【C++】Strassen算法代码的更多相关文章

随机推荐

热门专题