C++利用内部库<omp.h>进行并行加速的实例,但实际效果确更慢,找不出原因。希望各路大神帮忙。
基本功能是计算 π \pi π: π = 4 ∫ 0 1 1 1 + x 2 d x \pi = 4\int^1_0 \frac{1}{1+x^2}dx π=4∫011+x21dx
//g++ avx_omp.cpp -fopenmp -O2 #include <iostream> #include <ctime> #include <omp.h> using namespace std; //正常的逐个累加运算 double compute_pi_naive(size_t dt){ double pi = 0.0; double delta = 1.0/dt; for (size_t i =0;i<dt;i++){ double x = (double)i/dt; pi += delta /(1+x*x); // cout<<omp_get_thread_num()<<endl; } return pi*4.0; } //多线程并行 double compute_pi_omp(size_t dt){ double pi = 0.0; double delta = 1.0/dt; omp_set_num_threads(4);//设置四个核 #pragma omp parallel { #pragma omp for reduction(+:pi) for (size_t i =0;i<dt;i++){ double x = (double)i/dt; pi += delta /(1+x*x); // cout<<omp_get_thread_num()<<endl; } } return pi*4.0; return pi*4.0; } int main(){ clock_t start,end; size_t dt = 134217728; double result1,result2; //普通函数计时 start = clock(); result1 = compute_pi_naive(dt); end = clock(); cout<<"naive:\n"<< result1 <<endl<<end- start <<endl; //omp start = clock(); result2 = compute_pi_omp(dt); end = clock(); cout<<"omp:\n" <<result2 <<endl<<end- start <<endl; return 0; }利用了四个线程,结果竟然更慢,真是无语了。