I want to compute the numerical gradient of a function in parallel. However, I realized that evaluating my function in a multiprocessing pool gives me a different result than before. It seems like the matrix multiplication is different. I reconstructed the problem in the code below. For a smaller matrix size like (5,2) the results are equivalent. I am using Python 3.7.4 and Numpy 1.18.5. Is there a way to get equivalent results also for larger matrices?
import numpy as np
import multiprocessing as mp
import os
def setNumSubLibraryThreads(n):
os.environ["OMP_NUM_THREADS"] = str(n)
os.environ["OPENBLAS_NUM_THREADS"] = str(n)
os.environ["MKL_NUM_THREADS"] = str(n)
os.environ["VECLIB_MAXIMUM_THREADS"] = str(n)
os.environ["NUMEXPR_NUM_THREADS"] = str(n)
def solveInParallel(f, args):
cpu_count = mp.cpu_count()
setNumSubLibraryThreads(1)
p = mp.Pool(cpu_count)
sol = np.array(p.map(f,args))
p.close()
setNumSubLibraryThreads(cpu_count)
return sol
def g(args):
mat1, mat2 = args
return mat1.T.dot(mat2)
if __name__ == '__main__':
np.random.seed(42)
mat1 = np.random.random((1000,20))
mat2 = np.random.random((1000,20))
ref = mat1.T.dot(mat2)
res_par_mat = solveInParallel(g, [(mat1, mat2)])
print(res_par_mat[0] == ref)
np.__config__.show() gives
blas_mkl_info:
NOT AVAILABLE
blis_info:
NOT AVAILABLE
openblas_info:
libraries = ['openblas', 'openblas']
library_dirs = ['...']
language = c
define_macros = [('HAVE_CBLAS', None)]
blas_opt_info:
libraries = ['openblas', 'openblas']
library_dirs = [...]
language = c
define_macros = [('HAVE_CBLAS', None)]
lapack_mkl_info:
NOT AVAILABLE
openblas_lapack_info:
libraries = ['openblas', 'openblas']
library_dirs = [...]
language = c
define_macros = [('HAVE_CBLAS', None)]
lapack_opt_info:
libraries = ['openblas', 'openblas']
library_dirs = [...]
language = c
define_macros = [('HAVE_CBLAS', None)]