2015-12-04 15:47:52 +01:00
|
|
|
/**
|
2016-09-11 22:20:15 +02:00
|
|
|
* Author......: See docs/credits.txt
|
2015-12-04 15:47:52 +01:00
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
2016-05-09 21:32:12 +02:00
|
|
|
/**
|
|
|
|
* device type
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DEVICE_TYPE_CPU 2
|
|
|
|
#define DEVICE_TYPE_GPU 4
|
|
|
|
#define DEVICE_TYPE_ACCEL 8
|
|
|
|
|
|
|
|
#if DEVICE_TYPE == DEVICE_TYPE_CPU
|
|
|
|
#define IS_CPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
|
|
|
|
#define IS_GPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
|
|
|
|
#define IS_ACCEL
|
2016-04-20 21:19:15 +02:00
|
|
|
#endif
|
|
|
|
|
2015-12-04 15:47:52 +01:00
|
|
|
/**
|
|
|
|
* vendor specific
|
|
|
|
*/
|
|
|
|
|
2016-05-09 21:32:12 +02:00
|
|
|
#if VENDOR_ID == (1 << 0)
|
2015-12-04 15:47:52 +01:00
|
|
|
#define IS_AMD
|
2016-05-14 19:45:51 +02:00
|
|
|
#elif VENDOR_ID == (1 << 1)
|
|
|
|
#define IS_APPLE
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 2)
|
|
|
|
#define IS_INTEL_BEIGNET
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 3)
|
|
|
|
#define IS_INTEL_SDK
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 4)
|
|
|
|
#define IS_MESA
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 5)
|
2015-12-04 15:47:52 +01:00
|
|
|
#define IS_NV
|
2016-05-14 19:45:51 +02:00
|
|
|
#elif VENDOR_ID == (1 << 6)
|
|
|
|
#define IS_POCL
|
|
|
|
#define IS_GENERIC
|
2016-01-13 17:10:40 +01:00
|
|
|
#else
|
2016-01-07 20:14:34 +01:00
|
|
|
#define IS_GENERIC
|
2016-01-04 13:17:20 +01:00
|
|
|
#endif
|
|
|
|
|
2018-11-20 10:06:34 +01:00
|
|
|
#define LOCAL_MEM_TYPE_LOCAL 1
|
|
|
|
#define LOCAL_MEM_TYPE_GLOBAL 2
|
|
|
|
|
|
|
|
#if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL
|
2018-08-13 12:10:03 +02:00
|
|
|
#define REAL_SHM
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef REAL_SHM
|
|
|
|
#define SHM_TYPE __local
|
|
|
|
#else
|
|
|
|
#define SHM_TYPE __constant
|
|
|
|
#endif
|
|
|
|
|
2018-02-06 19:12:24 +01:00
|
|
|
/**
|
|
|
|
* function declarations can have a large influence depending on the opencl runtime
|
|
|
|
*/
|
|
|
|
|
2018-02-08 09:49:59 +01:00
|
|
|
#if defined IS_CPU
|
2018-02-16 18:56:21 +01:00
|
|
|
#define DECLSPEC inline
|
2018-02-08 09:49:59 +01:00
|
|
|
#elif defined IS_GPU
|
|
|
|
#if defined IS_AMD
|
2018-02-08 09:42:59 +01:00
|
|
|
#define DECLSPEC inline
|
|
|
|
#else
|
2018-02-06 22:05:15 +01:00
|
|
|
#define DECLSPEC
|
2018-02-08 09:42:59 +01:00
|
|
|
#endif
|
2018-02-06 22:05:15 +01:00
|
|
|
#else
|
2018-02-08 09:42:59 +01:00
|
|
|
#define DECLSPEC
|
2018-02-06 22:05:15 +01:00
|
|
|
#endif
|
2018-02-06 19:12:24 +01:00
|
|
|
|
2015-12-04 15:47:52 +01:00
|
|
|
/**
|
|
|
|
* AMD specific
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
2018-07-22 12:20:20 +02:00
|
|
|
#if defined(cl_amd_media_ops)
|
2016-01-04 13:17:20 +01:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
2018-07-22 12:20:20 +02:00
|
|
|
#endif
|
|
|
|
#if defined(cl_amd_media_ops2)
|
2015-12-04 15:47:52 +01:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
|
|
|
|
#endif
|
2018-07-22 12:20:20 +02:00
|
|
|
#endif
|
2015-12-04 15:47:52 +01:00
|
|
|
|
2015-12-15 12:04:22 +01:00
|
|
|
/**
|
2016-05-09 21:32:12 +02:00
|
|
|
* Unrolling is generally enabled, for all device types and hash modes
|
|
|
|
* There's a few exception when it's better not to unroll
|
2016-11-13 20:58:28 +01:00
|
|
|
* Some algorithms run into too much register pressure due to loop unrolling
|
2015-12-15 12:04:22 +01:00
|
|
|
*/
|
2015-12-04 15:47:52 +01:00
|
|
|
|
2016-11-13 20:58:28 +01:00
|
|
|
// generic vendors: those algos have shown that they produce better results on both amd and nv when not unrolled
|
|
|
|
// so we can assume they will produce better results on other vendors as well
|
2016-05-09 21:32:12 +02:00
|
|
|
|
2019-03-03 11:39:18 +01:00
|
|
|
#ifdef NO_UNROLL
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|