mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-08 01:00:11 +00:00
vk: Workgroup tuning for different vendors
This commit is contained in:
parent
99fb6d6a5d
commit
61af2b7dfc
1 changed files with 9 additions and 1 deletions
|
@ -89,13 +89,21 @@ namespace vk
|
||||||
{
|
{
|
||||||
case vk::driver_vendor::unknown:
|
case vk::driver_vendor::unknown:
|
||||||
case vk::driver_vendor::INTEL:
|
case vk::driver_vendor::INTEL:
|
||||||
|
// Intel hw has 8 threads, but LDS allocation behavior makes optimal group size between 64 and 256
|
||||||
|
// Based on intel's own OpenCL recommended settings
|
||||||
|
unroll_loops = true;
|
||||||
|
optimal_kernel_size = 1;
|
||||||
|
optimal_group_size = 128;
|
||||||
|
break;
|
||||||
case vk::driver_vendor::NVIDIA:
|
case vk::driver_vendor::NVIDIA:
|
||||||
|
// Warps are multiples of 32. Increasing kernel depth seems to hurt performance (Nier, Big Duck sample)
|
||||||
unroll_loops = true;
|
unroll_loops = true;
|
||||||
optimal_group_size = 32;
|
optimal_group_size = 32;
|
||||||
optimal_kernel_size = 16;
|
optimal_kernel_size = 1;
|
||||||
break;
|
break;
|
||||||
case vk::driver_vendor::AMD:
|
case vk::driver_vendor::AMD:
|
||||||
case vk::driver_vendor::RADV:
|
case vk::driver_vendor::RADV:
|
||||||
|
// Wavefronts are multiples of 64
|
||||||
unroll_loops = false;
|
unroll_loops = false;
|
||||||
optimal_kernel_size = 1;
|
optimal_kernel_size = 1;
|
||||||
optimal_group_size = 64;
|
optimal_group_size = 64;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue