diff --git a/Changelog b/Changelog index ee48876128..f44cec679f 100644 --- a/Changelog +++ b/Changelog @@ -27,6 +27,7 @@ version : - video setrange filter - nsp demuxer - support LibreSSL (via libtls) +- AVX-512/ZMM support added version 3.4: diff --git a/doc/APIchanges b/doc/APIchanges index da444ffb7c..df79758e86 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2017-10-21 API changes, most recent first: +2017-12-xx - xxxxxxx - lavu 56.7.100 - cpu.h + AVX-512 flags added. + 2017-xx-xx - xxxxxxx - lavc 58.8.100 - avcodec.h The MediaCodec decoders now support AVCodecContext.hw_device_ctx. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index c8401b8258..6548cc3042 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -80,7 +80,8 @@ void av_force_cpu_flags(int arg){ AV_CPU_FLAG_XOP | AV_CPU_FLAG_FMA3 | AV_CPU_FLAG_FMA4 | - AV_CPU_FLAG_AVX2 )) + AV_CPU_FLAG_AVX2 | + AV_CPU_FLAG_AVX512 )) && !(arg & AV_CPU_FLAG_MMX)) { av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n"); arg |= AV_CPU_FLAG_MMX; @@ -126,6 +127,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) #define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1) #define CPUFLAG_AESNI (AV_CPU_FLAG_AESNI | CPUFLAG_SSE42) +#define CPUFLAG_AVX512 (AV_CPU_FLAG_AVX512 | CPUFLAG_AVX2) static const AVOption cpuflags_opts[] = { { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, #if ARCH_PPC @@ -154,6 +156,7 @@ int av_parse_cpu_flags(const char *s) { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT }, .unit = "flags" }, { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, { "aesni" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI }, .unit = "flags" }, + { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512 }, .unit = "flags" }, #elif ARCH_ARM { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE }, .unit = "flags" }, { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 }, .unit = "flags" }, @@ -216,6 +219,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" }, { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" }, + { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" }, #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 9e5d40affe..8bb9eb606b 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -55,6 +55,7 @@ #define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions #define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1 #define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2 +#define AV_CPU_FLAG_AVX512 0x100000 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard #define AV_CPU_FLAG_VSX 0x0002 ///< ISA 2.06 diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index f02a54cbbb..ce45b715a0 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -73,6 +73,7 @@ static const struct { { AV_CPU_FLAG_BMI1, "bmi1" }, { AV_CPU_FLAG_BMI2, "bmi2" }, { AV_CPU_FLAG_AESNI, "aesni" }, + { AV_CPU_FLAG_AVX512, "avx512" }, #endif { 0 } }; diff --git a/libavutil/version.h b/libavutil/version.h index 9ae9768d4d..d81ec6fa7b 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 6 +#define LIBAVUTIL_VERSION_MINOR 7 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 309b8e746c..7f4e5d08bb 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -50,6 +50,7 @@ #define X86_FMA4(flags) CPUEXT(flags, FMA4) #define X86_AVX2(flags) CPUEXT(flags, AVX2) #define X86_AESNI(flags) CPUEXT(flags, AESNI) +#define X86_AVX512(flags) CPUEXT(flags, AVX512) #define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW) #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT) @@ -79,6 +80,7 @@ #define EXTERNAL_AVX2_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX) #define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX) #define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI) +#define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512) #define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW) #define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)