1。去除不必要的显式for循环,使用向量化计算。
- 1 import time
- 2 import numpy as np
- 3
- 4
- 5 def for_time():
- 6 """Make a array, len = 1000000, use for loop add one."""
- 7 start = time.time()
- 8 list_data = np.arange(0, 10000000, 1)
- 9 for i in range(1000000):
- 10 list_data[i] += 1
- 11 print 'for loop used time: ', time.time() - start
- 12
- 13
- 14 def vector_time():
- 15 """make a array, use vector calculation add one."""
- 16 start = time.time()
- 17 list_data = np.arange(0, 10000000, 1)
- 18 list_data += 1
- 19 print 'vector calculation used time: ', time.time() - start
- 20
- 21
- 22 if __name__ == '__main__':
- 23 for_time()
- 24 vector_time()
for loop used time: 0.359999895096
vector calculation used time: 0.0160000324249
2. 使用多进程,开核。
- 1 import multiprocessing
- 2
- 3
- 4 def use_pool(func, args):
- 5 pool = multiprocessing.Pool(processes=2)
- 6 res = pool.map(func, args)
- 7 pool.close()
- 8 pool.join()
- 9 return res
3.使用sklearn.extenals.joblib 扩展库
- 1 from sklearn.externals.joblib import Parallel, delayed
- 2
- 3
- 4 def parallel(func, arg):
- 5 Parallel(-1)(delayed(func)(i) for i in arg)
4. 使用bottleneck库。
该库基于Cpython实现,着眼于高性能。