mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-08-10 18:18:49 +00:00
meh
This commit is contained in:
parent
2c393d35f0
commit
98c174edc4
520 changed files with 74815 additions and 58942 deletions
|
@ -83,22 +83,32 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
|
|||
bool dequantize, u8 scaling_exponent,
|
||||
AttributeFormat* native_format)
|
||||
{
|
||||
static const __m128i shuffle_lut[5][3] = {
|
||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00FFFFFFL), // 1x s8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL), // 2x s8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL)}, // 3x s8
|
||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0001L), // 1x u16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L), // 2x u16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L)}, // 3x u16
|
||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
|
||||
using ShuffleRow = std::array<__m128i, 3>;
|
||||
static const Common::EnumMap<ShuffleRow, ComponentFormat::InvalidFloat7> shuffle_lut = {
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00FFFFFFL), // 1x s8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL), // 2x s8
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL)}, // 3x s8
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0001L), // 1x u16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L), // 2x u16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L)}, // 3x u16
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x invalid
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x invalid
|
||||
ShuffleRow{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x invalid
|
||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x invalid
|
||||
};
|
||||
static const __m128 scale_factors[32] = {
|
||||
_mm_set_ps1(1. / (1u << 0)), _mm_set_ps1(1. / (1u << 1)), _mm_set_ps1(1. / (1u << 2)),
|
||||
|
@ -169,7 +179,7 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
|
|||
else
|
||||
MOVD_xmm(coords, data);
|
||||
|
||||
PSHUFB(coords, MPIC(&shuffle_lut[u32(format)][count_in - 1]));
|
||||
PSHUFB(coords, MPIC(&shuffle_lut[format][count_in - 1]));
|
||||
|
||||
// Sign-extend.
|
||||
if (format == ComponentFormat::Byte)
|
||||
|
@ -221,6 +231,9 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
|
|||
PSRLD(coords, 16);
|
||||
break;
|
||||
case ComponentFormat::Float:
|
||||
case ComponentFormat::InvalidFloat5:
|
||||
case ComponentFormat::InvalidFloat6:
|
||||
case ComponentFormat::InvalidFloat7:
|
||||
// Floats don't need to be scaled or converted,
|
||||
// so we can just load/swap/store them directly
|
||||
// and return early.
|
||||
|
@ -254,7 +267,7 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
|
|||
}
|
||||
}
|
||||
|
||||
if (format != ComponentFormat::Float)
|
||||
if (format < ComponentFormat::Float)
|
||||
{
|
||||
CVTDQ2PS(coords, R(coords));
|
||||
|
||||
|
@ -458,8 +471,8 @@ void VertexLoaderX64::GenerateVertexLoader()
|
|||
|
||||
if (m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent)
|
||||
{
|
||||
static constexpr Common::EnumMap<u8, static_cast<ComponentFormat>(7)> SCALE_MAP = {7, 6, 15, 14,
|
||||
0, 0, 0, 0};
|
||||
static constexpr Common::EnumMap<u8, ComponentFormat::InvalidFloat7> SCALE_MAP = {7, 6, 15, 14,
|
||||
0, 0, 0, 0};
|
||||
const u8 scaling_exponent = SCALE_MAP[m_VtxAttr.g0.NormalFormat];
|
||||
|
||||
// Normal
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue