为了使事情更笼统,以gsl_integration_qag
为例(因为它需要一个工作区来管理时间间隔)。
这是一个经过精心设计的简单example(problem.pyx
):
from libc.math cimport cos
from cython.parallel import prange
from cython_gsl cimport *
cdef double mean(double[:] arr):
cdef Py_ssize_t i
cdef double sum_ = 0.0
for i in range(arr.shape[0]):
sum_ += arr[i]
return sum_ / arr.shape[0]
cdef double integrand(double x, void *params) nogil:
cdef double a = (<double*>params)[0]
cdef double b = (<double*>params)[1]
return a*x + b
def use_gsl_integration_sequential():
cdef double result, error
cdef Py_ssize_t i, j
cdef double result_sum=0.0
cdef double results[3]
cdef double a=0.0, b=0.0
cdef double da = 0.1
cdef size_t space_size = 1000
cdef gsl_function gsl_func
cdef double params[2]
gsl_func.function = &integrand
cdef gsl_integration_workspace *ws
ws= gsl_integration_workspace_alloc(space_size)
for i in range(3):
a += da
result_sum = 0.0 # reset the sum(thank J.J.Hakala for pointing out this)
for j in range(4): # here may be range(100000) in practice!
b = a + j
params[0] = a; params[1] = b
gsl_func.params = params
gsl_integration_qag(&gsl_func, 0.0, 1.0, 1e-8, 1e-8, space_size, GSL_INTEG_GAUSS15, ws, &result, &error)
result_sum += result
results[i] = result_sum
gsl_integration_workspace_free(ws)
# do some other stuff with the 'results', for example:
return mean(results)
# Now, I want to parallel the inner loop
def use_gsl_integration_parallel():
cdef double result, error
cdef Py_ssize_t i, j
cdef double result_sum=0.0
cdef double results[3]
cdef double a, b
cdef double da = 0.1
cdef size_t space_size = 1000
cdef gsl_function gsl_func
cdef double params[2]
gsl_func.function = &integrand
cdef gsl_integration_workspace *ws
ws= gsl_integration_workspace_alloc(space_size)
for i in range(3):
# a should be read-only in the follow prange block.
a += da
# here may be prange(100000,...) in practice!
for j in prange(4, nogil=True):
# b should be local private.
b = a + j
# params also should be local private
params[0] = a; params[1] = b
# gsl_func is shared, only its params differ.
# According to the gsl manual(see follow link), workspaces should be allocated on a per-thread basis, but how?
gsl_func.params = params
gsl_integration_qag(&gsl_func, 0.0, 1.0, 1e-8, 1e-8, space_size, GSL_INTEG_GAUSS15, ws, &result, &error)
result_sum += result
results[i] = result_sum
gsl_integration_workspace_free(ws)
return mean(results)
有点长的代码,仅用于完整(用于复制),但是我认为很简单(可读)(๑•ᴗ•๑)
然后编译并链接:
cython -a -2 problem.pyx
gcc -O3 -fopenmp -c problem.c -o problem.o -IC:\gsl2.1x86\include -IC:\python-2.7.10\include
gcc -shared -fPIC -fopenmp -o problem.pyd -LC:\gsl2.1x86\libs -LC:\python-2.7.10\libs problem.o -l:libgsl.a -l:libgslcblas.dll.a -l:libpython27.dll.a
在IPython中:
In [1]: from problem import *
In [2]: use_gsl_integration_sequential()
Out[2]: 7.2
结果use_gsl_integration_parallel()
是不确定的,我尝试了几次,最多一次,它会崩溃,有些幸运,得到了不确定的值,所以一定有问题!我只是找不到这样一个平行的例子。有人可以帮我吗?
环境:
win10-64位,gcc(tdm-1)5.1.0 32位,python-2.7.10 32位,cython0.24,gsl-2.1
一些有用的参考资料?:
https://github.com/twiecki/CythonGSL
https://www.gnu.org/software/gsl/manual/html_node/Thread_002dsafety.html#Thread_002dsafety
(对不起,无法发布更多信息,两个链接是我的限制...
当我测试该代码时,出现了段错误(Linux,gcc 4.9.2)。
我认为原始代码存在以下问题
以下版本给出了7.2。是连续的版本实际上是正确的,因为它不设置result_sum = 0.0
在
for i in range(3):
a += da
result_sum = 0.0
from openmp cimport omp_get_max_threads, omp_get_thread_num
def use_gsl_integration_parallel():
cdef double result
cdef Py_ssize_t i, j
cdef double result_sum
cdef double results[3]
cdef double results2[4]
cdef double a
cdef double b[4]
cdef double error[4]
cdef double da = 0.1
cdef size_t space_size = 1000
cdef gsl_function gsl_func[4]
cdef double params[4][2]
cdef int tid
cdef gsl_integration_workspace ** ws = <gsl_integration_workspace **>malloc(omp_get_max_threads() * sizeof(gsl_integration_workspace *))
for j in range(omp_get_max_threads()):
ws[j] = gsl_integration_workspace_alloc(space_size)
for i in range(3):
a += da
for j in prange(4, nogil=True):
tid = omp_get_thread_num()
b[tid] = a + j
params[tid][0] = a;
params[tid][1] = b[tid]
gsl_func[tid].function = &integrand
gsl_func[tid].params = params[tid]
gsl_integration_qag(&gsl_func[tid], 0.0, 1.0, 1e-8, 1e-8,
space_size, GSL_INTEG_GAUSS15, ws[tid],
&results2[j], &error[j])
# printf("tid %d\n", tid)
results[i] = sum(results2)
for j in range(omp_get_max_threads()):
gsl_integration_workspace_free(ws[j])
free(ws)
return mean(results)
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句