2015-12-04 15:47:52 +01:00
|
|
|
/**
|
2016-09-11 22:20:15 +02:00
|
|
|
* Author......: See docs/credits.txt
|
2015-12-04 15:47:52 +01:00
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
2019-03-23 22:15:38 +01:00
|
|
|
#ifndef _INC_VENDOR_H
|
|
|
|
#define _INC_VENDOR_H
|
|
|
|
|
2019-04-25 14:45:17 +02:00
|
|
|
#if defined _CPU_OPENCL_EMU_H
|
|
|
|
#define IS_NATIVE
|
|
|
|
#elif defined __CUDACC__
|
|
|
|
#define IS_CUDA
|
2021-07-09 05:50:40 +02:00
|
|
|
#elif defined __HIPCC__
|
|
|
|
#define IS_HIP
|
2022-11-16 14:26:54 +01:00
|
|
|
#elif defined __METAL__
|
2022-02-04 19:54:00 +01:00
|
|
|
#define IS_METAL
|
2019-04-25 14:45:17 +02:00
|
|
|
#else
|
|
|
|
#define IS_OPENCL
|
|
|
|
#endif
|
|
|
|
|
2022-02-04 19:54:00 +01:00
|
|
|
#if defined IS_METAL
|
|
|
|
#include <metal_stdlib>
|
|
|
|
|
|
|
|
using namespace metal;
|
|
|
|
#endif
|
|
|
|
|
2019-04-25 14:45:17 +02:00
|
|
|
#if defined IS_NATIVE
|
2019-05-06 14:34:16 +02:00
|
|
|
#define CONSTANT_VK
|
2019-03-22 22:27:58 +01:00
|
|
|
#define CONSTANT_AS
|
|
|
|
#define GLOBAL_AS
|
2019-05-07 09:01:32 +02:00
|
|
|
#define LOCAL_VK
|
2019-03-22 22:27:58 +01:00
|
|
|
#define LOCAL_AS
|
2022-02-04 19:54:00 +01:00
|
|
|
#define PRIVATE_AS
|
2019-03-22 22:27:58 +01:00
|
|
|
#define KERNEL_FQ
|
2021-07-11 12:38:59 +02:00
|
|
|
#elif defined IS_CUDA
|
|
|
|
#define CONSTANT_VK __constant__
|
|
|
|
#define CONSTANT_AS
|
|
|
|
#define GLOBAL_AS
|
|
|
|
#define LOCAL_VK __shared__
|
|
|
|
#define LOCAL_AS
|
2022-02-04 19:54:00 +01:00
|
|
|
#define PRIVATE_AS
|
2021-07-11 12:38:59 +02:00
|
|
|
#define KERNEL_FQ extern "C" __global__
|
|
|
|
#elif defined IS_HIP
|
2019-05-06 14:34:16 +02:00
|
|
|
#define CONSTANT_VK __constant__
|
2019-04-25 14:45:17 +02:00
|
|
|
#define CONSTANT_AS
|
|
|
|
#define GLOBAL_AS
|
2019-05-07 09:01:32 +02:00
|
|
|
#define LOCAL_VK __shared__
|
2019-04-25 14:45:17 +02:00
|
|
|
#define LOCAL_AS
|
2022-02-04 19:54:00 +01:00
|
|
|
#define PRIVATE_AS
|
2019-05-03 15:50:07 +02:00
|
|
|
#define KERNEL_FQ extern "C" __global__
|
2022-02-04 19:54:00 +01:00
|
|
|
#elif defined IS_METAL
|
|
|
|
#define CONSTANT_VK constant
|
|
|
|
#define CONSTANT_AS constant
|
|
|
|
#define GLOBAL_AS device
|
|
|
|
#define LOCAL_VK threadgroup
|
|
|
|
#define LOCAL_AS threadgroup
|
|
|
|
#define PRIVATE_AS thread
|
|
|
|
#define KERNEL_FQ kernel
|
2019-04-25 14:45:17 +02:00
|
|
|
#elif defined IS_OPENCL
|
2019-05-06 14:34:16 +02:00
|
|
|
#define CONSTANT_VK __constant
|
2019-03-22 22:27:58 +01:00
|
|
|
#define CONSTANT_AS __constant
|
|
|
|
#define GLOBAL_AS __global
|
2019-05-07 09:01:32 +02:00
|
|
|
#define LOCAL_VK __local
|
2019-03-22 22:27:58 +01:00
|
|
|
#define LOCAL_AS __local
|
2022-02-04 19:54:00 +01:00
|
|
|
#define PRIVATE_AS
|
2019-03-22 22:27:58 +01:00
|
|
|
#define KERNEL_FQ __kernel
|
|
|
|
#endif
|
|
|
|
|
2019-04-04 20:01:37 +02:00
|
|
|
#ifndef MAYBE_UNUSED
|
|
|
|
#define MAYBE_UNUSED
|
|
|
|
#endif
|
|
|
|
|
2016-05-09 21:32:12 +02:00
|
|
|
/**
|
|
|
|
* device type
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DEVICE_TYPE_CPU 2
|
|
|
|
#define DEVICE_TYPE_GPU 4
|
|
|
|
#define DEVICE_TYPE_ACCEL 8
|
|
|
|
|
|
|
|
#if DEVICE_TYPE == DEVICE_TYPE_CPU
|
|
|
|
#define IS_CPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
|
|
|
|
#define IS_GPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
|
|
|
|
#define IS_ACCEL
|
2016-04-20 21:19:15 +02:00
|
|
|
#endif
|
|
|
|
|
2015-12-04 15:47:52 +01:00
|
|
|
/**
|
|
|
|
* vendor specific
|
|
|
|
*/
|
|
|
|
|
2016-05-09 21:32:12 +02:00
|
|
|
#if VENDOR_ID == (1 << 0)
|
2015-12-04 15:47:52 +01:00
|
|
|
#define IS_AMD
|
2016-05-14 19:45:51 +02:00
|
|
|
#elif VENDOR_ID == (1 << 1)
|
|
|
|
#define IS_APPLE
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 2)
|
|
|
|
#define IS_INTEL_BEIGNET
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 3)
|
|
|
|
#define IS_INTEL_SDK
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 4)
|
|
|
|
#define IS_MESA
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 5)
|
2021-07-11 12:38:59 +02:00
|
|
|
#define IS_NV
|
2016-05-14 19:45:51 +02:00
|
|
|
#elif VENDOR_ID == (1 << 6)
|
|
|
|
#define IS_POCL
|
|
|
|
#define IS_GENERIC
|
2021-07-11 12:38:59 +02:00
|
|
|
#elif VENDOR_ID == (1 << 8)
|
|
|
|
#define IS_AMD_USE_HIP
|
2016-01-13 17:10:40 +01:00
|
|
|
#else
|
2016-01-07 20:14:34 +01:00
|
|
|
#define IS_GENERIC
|
2016-01-04 13:17:20 +01:00
|
|
|
#endif
|
|
|
|
|
2020-01-25 12:09:39 +01:00
|
|
|
#if defined IS_AMD && HAS_VPERM == 1
|
|
|
|
#define IS_ROCM
|
|
|
|
#endif
|
|
|
|
|
2018-11-20 10:06:34 +01:00
|
|
|
#define LOCAL_MEM_TYPE_LOCAL 1
|
|
|
|
#define LOCAL_MEM_TYPE_GLOBAL 2
|
|
|
|
|
|
|
|
#if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL
|
2018-08-13 12:10:03 +02:00
|
|
|
#define REAL_SHM
|
|
|
|
#endif
|
|
|
|
|
2020-03-20 16:20:22 +01:00
|
|
|
// So far, only used by -m 22100 and only affects NVIDIA on OpenCL. CUDA seems to work fine.
|
|
|
|
#ifdef FORCE_DISABLE_SHM
|
|
|
|
#undef REAL_SHM
|
|
|
|
#endif
|
|
|
|
|
2018-08-13 12:10:03 +02:00
|
|
|
#ifdef REAL_SHM
|
2019-03-22 22:27:58 +01:00
|
|
|
#define SHM_TYPE LOCAL_AS
|
2018-08-13 12:10:03 +02:00
|
|
|
#else
|
2019-03-22 22:27:58 +01:00
|
|
|
#define SHM_TYPE CONSTANT_AS
|
2018-08-13 12:10:03 +02:00
|
|
|
#endif
|
|
|
|
|
2018-02-06 19:12:24 +01:00
|
|
|
/**
|
|
|
|
* function declarations can have a large influence depending on the opencl runtime
|
2019-04-13 18:46:19 +02:00
|
|
|
* fast but pure kernels on rocm is a good example
|
2018-02-06 19:12:24 +01:00
|
|
|
*/
|
|
|
|
|
2022-11-07 15:35:46 +01:00
|
|
|
#ifdef NO_INLINE
|
|
|
|
#define HC_INLINE
|
|
|
|
#else
|
|
|
|
#define HC_INLINE inline static
|
|
|
|
#endif
|
|
|
|
|
2020-03-03 12:36:55 +01:00
|
|
|
#if defined IS_AMD && defined IS_GPU
|
2022-11-07 15:35:46 +01:00
|
|
|
#define DECLSPEC HC_INLINE
|
2021-07-11 12:38:59 +02:00
|
|
|
#elif defined IS_HIP
|
2022-11-07 15:35:46 +01:00
|
|
|
#define DECLSPEC __device__ HC_INLINE
|
2020-03-03 12:36:55 +01:00
|
|
|
#else
|
|
|
|
#define DECLSPEC
|
2018-02-06 22:05:15 +01:00
|
|
|
#endif
|
2018-02-06 19:12:24 +01:00
|
|
|
|
2015-12-04 15:47:52 +01:00
|
|
|
/**
|
|
|
|
* AMD specific
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
2018-07-22 12:20:20 +02:00
|
|
|
#if defined(cl_amd_media_ops)
|
2016-01-04 13:17:20 +01:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
2018-07-22 12:20:20 +02:00
|
|
|
#endif
|
|
|
|
#if defined(cl_amd_media_ops2)
|
2015-12-04 15:47:52 +01:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
|
|
|
|
#endif
|
2018-07-22 12:20:20 +02:00
|
|
|
#endif
|
2015-12-04 15:47:52 +01:00
|
|
|
|
2020-01-12 13:32:02 +01:00
|
|
|
// Whitelist some OpenCL specific functions
|
|
|
|
// This could create more stable kernels on systems with bad OpenCL drivers
|
|
|
|
|
2021-07-11 12:38:59 +02:00
|
|
|
#ifdef IS_CUDA
|
2020-01-20 09:20:12 +01:00
|
|
|
#define USE_BITSELECT
|
|
|
|
#define USE_ROTATE
|
|
|
|
#endif
|
|
|
|
|
2021-07-11 12:38:59 +02:00
|
|
|
#ifdef IS_HIP
|
2021-07-15 23:34:27 +02:00
|
|
|
#define USE_BITSELECT
|
|
|
|
#define USE_ROTATE
|
2021-07-11 12:38:59 +02:00
|
|
|
#endif
|
|
|
|
|
2020-01-25 12:09:39 +01:00
|
|
|
#ifdef IS_ROCM
|
|
|
|
#define USE_BITSELECT
|
|
|
|
#define USE_ROTATE
|
|
|
|
#endif
|
|
|
|
|
2020-03-02 16:07:13 +01:00
|
|
|
#ifdef IS_INTEL_SDK
|
2020-03-03 08:52:26 +01:00
|
|
|
#ifdef IS_CPU
|
|
|
|
//#define USE_BITSELECT
|
|
|
|
//#define USE_ROTATE
|
|
|
|
#endif
|
2020-03-02 16:07:13 +01:00
|
|
|
#endif
|
|
|
|
|
2020-01-20 09:20:12 +01:00
|
|
|
#ifdef IS_OPENCL
|
2020-01-21 22:09:56 +01:00
|
|
|
//#define USE_BITSELECT
|
|
|
|
//#define USE_ROTATE
|
|
|
|
//#define USE_SWIZZLE
|
2020-01-12 13:32:02 +01:00
|
|
|
#endif
|
|
|
|
|
2022-02-10 21:53:08 +01:00
|
|
|
#ifdef IS_METAL
|
|
|
|
#define USE_ROTATE
|
|
|
|
|
|
|
|
// Metal support max VECT_SIZE = 4
|
|
|
|
#define s0 x
|
|
|
|
#define s1 y
|
|
|
|
#define s2 z
|
|
|
|
#define s3 w
|
2019-03-23 22:15:38 +01:00
|
|
|
#endif
|
2022-02-10 21:53:08 +01:00
|
|
|
|
|
|
|
#endif // _INC_VENDOR_H
|