vk: Workgroup tuning for different vendors

This commit is contained in:
kd-11 2019-08-30 14:46:48 +03:00 committed by kd-11
commit 61af2b7dfc

View file

@ -89,13 +89,21 @@ namespace vk
{ {
case vk::driver_vendor::unknown: case vk::driver_vendor::unknown:
case vk::driver_vendor::INTEL: case vk::driver_vendor::INTEL:
// Intel hw has 8 threads, but LDS allocation behavior makes optimal group size between 64 and 256
// Based on intel's own OpenCL recommended settings
unroll_loops = true;
optimal_kernel_size = 1;
optimal_group_size = 128;
break;
case vk::driver_vendor::NVIDIA: case vk::driver_vendor::NVIDIA:
// Warps are multiples of 32. Increasing kernel depth seems to hurt performance (Nier, Big Duck sample)
unroll_loops = true; unroll_loops = true;
optimal_group_size = 32; optimal_group_size = 32;
optimal_kernel_size = 16; optimal_kernel_size = 1;
break; break;
case vk::driver_vendor::AMD: case vk::driver_vendor::AMD:
case vk::driver_vendor::RADV: case vk::driver_vendor::RADV:
// Wavefronts are multiples of 64
unroll_loops = false; unroll_loops = false;
optimal_kernel_size = 1; optimal_kernel_size = 1;
optimal_group_size = 64; optimal_group_size = 64;