This commit is contained in:
Lander Gallastegi 2025-04-19 17:54:14 +02:00 committed by GitHub
commit d33b7ed04d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
72 changed files with 9315 additions and 161 deletions

View file

@ -635,6 +635,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/assert.h
src/common/bit_field.h
src/common/bounded_threadsafe_queue.h
src/common/cartesian_invoke.h
src/common/concepts.h
src/common/config.cpp
src/common/config.h
@ -783,6 +784,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h
src/shader_recompiler/info.h
src/shader_recompiler/pools.h
src/shader_recompiler/params.h
src/shader_recompiler/runtime_info.h
src/shader_recompiler/specialization.h
@ -847,15 +849,39 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
src/shader_recompiler/ir/compute_value/compute.cpp
src/shader_recompiler/ir/compute_value/compute.h
src/shader_recompiler/ir/compute_value/do_bitcast.cpp
src/shader_recompiler/ir/compute_value/do_bitcast.h
src/shader_recompiler/ir/compute_value/do_composite.cpp
src/shader_recompiler/ir/compute_value/do_composite.h
src/shader_recompiler/ir/compute_value/do_convert.cpp
src/shader_recompiler/ir/compute_value/do_convert.h
src/shader_recompiler/ir/compute_value/do_float_operations.cpp
src/shader_recompiler/ir/compute_value/do_float_operations.h
src/shader_recompiler/ir/compute_value/do_integer_operations.cpp
src/shader_recompiler/ir/compute_value/do_integer_operations.h
src/shader_recompiler/ir/compute_value/do_logical_operations.cpp
src/shader_recompiler/ir/compute_value/do_logical_operations.h
src/shader_recompiler/ir/compute_value/do_nop_functions.h
src/shader_recompiler/ir/compute_value/do_packing.cpp
src/shader_recompiler/ir/compute_value/do_packing.h
src/shader_recompiler/ir/compute_value/imm_value.cpp
src/shader_recompiler/ir/compute_value/imm_value.h
src/shader_recompiler/ir/abstract_syntax_list.cpp
src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp
src/shader_recompiler/ir/attribute.h
src/shader_recompiler/ir/basic_block.cpp
src/shader_recompiler/ir/basic_block.h
src/shader_recompiler/ir/condition.h
src/shader_recompiler/ir/conditional_tree.cpp
src/shader_recompiler/ir/conditional_tree.h
src/shader_recompiler/ir/ir_emitter.cpp
src/shader_recompiler/ir/ir_emitter.h
src/shader_recompiler/ir/microinstruction.cpp
src/shader_recompiler/ir/num_executions.cpp
src/shader_recompiler/ir/num_executions.cpp
src/shader_recompiler/ir/opcodes.cpp
src/shader_recompiler/ir/opcodes.h
src/shader_recompiler/ir/opcodes.inc
@ -865,14 +891,43 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp
src/shader_recompiler/ir/program.h
src/shader_recompiler/ir/reinterpret.h
src/shader_recompiler/ir/reg.h
src/shader_recompiler/ir/reinterpret.h
src/shader_recompiler/ir/srt_gvn_table.h
src/shader_recompiler/ir/subprogram.cpp
src/shader_recompiler/ir/subprogram.h
src/shader_recompiler/ir/type.cpp
src/shader_recompiler/ir/type.h
src/shader_recompiler/ir/value.cpp
src/shader_recompiler/ir/value.h
)
if (ARCHITECTURE STREQUAL "x86_64")
set(SHADER_RECOMPILER ${SHADER_RECOMPILER}
src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp
src/shader_recompiler/backend/asm_x64/emit_x64.cpp
src/shader_recompiler/backend/asm_x64/emit_x64.h
src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp
src/shader_recompiler/backend/asm_x64/x64_emit_context.h
src/shader_recompiler/backend/asm_x64/x64_utils.cpp
src/shader_recompiler/backend/asm_x64/x64_utils.h)
endif()
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/amdgpu/liverpool.h
src/video_core/amdgpu/pixel_format.cpp

View file

@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <tuple>
namespace Common {
namespace Detail {
template <typename Func, typename OutputIt, std::size_t N, std::size_t Level, typename... ArgLists>
void CartesianInvokeImpl(Func func, OutputIt out_it,
std::tuple<typename ArgLists::const_iterator...>& arglists_its,
const std::tuple<const ArgLists&...>& arglists_tuple) {
if constexpr (Level == N) {
auto get_tuple = [&]<std::size_t... I>(std::index_sequence<I...>) {
return std::forward_as_tuple(*std::get<I>(arglists_its)...);
};
out_it = std::move(std::apply(func, get_tuple(std::make_index_sequence<N>{})));
return;
} else {
const auto& arglist = std::get<Level>(arglists_tuple);
for (auto it = arglist.begin(); it != arglist.end(); ++it) {
std::get<Level>(arglists_its) = it;
CartesianInvokeImpl<Func, OutputIt, N, Level + 1, ArgLists...>(
func, out_it, arglists_its, arglists_tuple);
}
}
}
} // namespace Detail
template <typename Func, typename OutputIt, typename... ArgLists>
void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) {
constexpr std::size_t N = sizeof...(ArgLists);
const std::tuple<const ArgLists&...> arglists_tuple = std::forward_as_tuple(arg_lists...);
std::tuple<typename ArgLists::const_iterator...> arglists_it;
Detail::CartesianInvokeImpl<Func, OutputIt, N, 0, ArgLists...>(func, out_it, arglists_it,
arglists_tuple);
}
} // namespace Common

View file

@ -3,6 +3,7 @@
#pragma once
#include <cstddef>
#include <tuple>
namespace Common {

View file

@ -0,0 +1,268 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/func_traits.h"
#include "shader_recompiler/backend/asm_x64/emit_x64.h"
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
namespace {
static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, bool invert) {
CodeGenerator& c = ctx.Code();
IR::Value cond = ref->Arg(0);
if (cond.IsImmediate()) {
// If imediate, we evaluate at compile time
if (cond.U1() != invert) {
c.jmp(label, CodeGenerator::LabelType::T_NEAR);
}
} else {
const OperandHolder& op = ctx.Def(cond.InstRecursive())[0];
c.test(op.Op(), 0x1);
if (invert) {
c.jz(label, CodeGenerator::LabelType::T_NEAR);
} else {
c.jnz(label, CodeGenerator::LabelType::T_NEAR);
}
}
}
template <typename ArgType>
std::remove_reference_t<ArgType> Arg(EmitContext& ctx, const IR::Value& arg) {
if constexpr (std::is_same_v<ArgType, const Operands&>) {
return ctx.Def(arg);
} else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
return arg;
} else if constexpr (std::is_same_v<ArgType, u32>) {
return arg.U32();
} else if constexpr (std::is_same_v<ArgType, u64>) {
return arg.U64();
} else if constexpr (std::is_same_v<ArgType, bool>) {
return arg.U1();
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
return arg.Attribute();
} else if constexpr (std::is_same_v<ArgType, IR::ScalarReg>) {
return arg.ScalarReg();
} else if constexpr (std::is_same_v<ArgType, IR::VectorReg>) {
return arg.VectorReg();
} else if constexpr (std::is_same_v<ArgType, const char*>) {
return arg.StringLiteral();
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return arg.Patch();
}
UNREACHABLE();
}
template <auto func, bool is_first_arg_inst, bool has_dest, size_t... I>
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
using Traits = Common::FuncTraits<decltype(func)>;
if constexpr (has_dest) {
if constexpr (is_first_arg_inst) {
func(ctx, inst, ctx.Def(inst),
Arg<typename Traits::template ArgType<I + 3>>(ctx, inst->Arg(I))...);
} else {
func(ctx, ctx.Def(inst),
Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
}
} else {
if constexpr (is_first_arg_inst) {
func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
} else {
func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
}
}
}
template <auto func, bool has_dest>
void Invoke(EmitContext& ctx, IR::Inst* inst) {
using Traits = Common::FuncTraits<decltype(func)>;
static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
if constexpr (Traits::NUM_ARGS == 1) {
Invoke<func, false, false>(ctx, inst, std::make_index_sequence<0>{});
} else {
using FirstArgType = typename Traits::template ArgType<1>;
static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
static constexpr size_t num_inst_args = Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1);
if constexpr (num_inst_args > 0 && has_dest) {
Invoke<func, is_first_arg_inst, true>(ctx, inst,
std::make_index_sequence<num_inst_args - 1>{});
} else {
Invoke<func, is_first_arg_inst, false>(ctx, inst,
std::make_index_sequence<num_inst_args>{});
}
}
}
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
switch (inst->GetOpcode()) {
#define OPCODE(name, result_type, ...) \
case IR::Opcode::name: \
Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); \
return;
#include "shader_recompiler/ir/opcodes.inc"
#undef OPCODE
}
UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode());
}
static bool IsLastInst(const IR::AbstractSyntaxList& list, IR::AbstractSyntaxList::const_iterator it) {
for (; it != list.end(); ++it) {
switch (it->type) {
case IR::AbstractSyntaxNode::Type::Return:
case IR::AbstractSyntaxNode::Type::Loop:
case IR::AbstractSyntaxNode::Type::EndIf:
continue;
default:
return false;
}
}
return true;
}
void Traverse(EmitContext& ctx, const IR::Program& program) {
CodeGenerator& c = ctx.Code();
for (auto it = program.syntax_list.begin(); it != program.syntax_list.end(); ++it) {
const IR::AbstractSyntaxNode& node = *it;
ctx.ResetTempRegs();
switch (node.type) {
case IR::AbstractSyntaxNode::Type::Block: {
IR::Block* block = node.data.block;
c.L(ctx.BlockLabel(block));
for (IR::Inst& inst : *block) {
ctx.ResetTempRegs();
EmitInst(ctx, &inst);
}
const auto& phi_assignments = ctx.PhiAssignments(block);
if (phi_assignments) {
for (const auto& [phi, value] : phi_assignments->get()) {
MovValue(ctx, ctx.Def(phi), value);
}
}
if (ctx.EndFlag() && IsLastInst(program.syntax_list, it)) {
c.jmp(ctx.EndLabel());
}
break;
}
case IR::AbstractSyntaxNode::Type::If: {
IR::Inst* ref = node.data.if_node.cond.InstRecursive();
Label& merge = ctx.BlockLabel(node.data.if_node.merge);
EmitCondition(ctx, ref, merge, true);
break;
}
case IR::AbstractSyntaxNode::Type::Repeat: {
IR::Inst* ref = node.data.repeat.cond.InstRecursive();
Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header);
EmitCondition(ctx, ref, loop_header, false);
break;
}
case IR::AbstractSyntaxNode::Type::Break: {
IR::Inst* ref = node.data.break_node.cond.InstRecursive();
Label& merge = ctx.BlockLabel(node.data.break_node.merge);
EmitCondition(ctx, ref, merge, true);
c.jz(merge);
break;
}
case IR::AbstractSyntaxNode::Type::Unreachable: {
c.int3();
break;
}
case IR::AbstractSyntaxNode::Type::Return:
case IR::AbstractSyntaxNode::Type::Loop:
case IR::AbstractSyntaxNode::Type::EndIf:
break;
}
}
}
} // Anonymous namespace
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) {
EmitContext context(program, c);
Traverse(context, program);
context.Code().L(context.EndLabel());
context.Epilogue();
}
void EmitPhi(EmitContext& ctx) {
}
void EmitVoid(EmitContext&) {}
void EmitIdentity(EmitContext& ctx) {
throw NotImplementedException("Forward identity declaration");
}
void EmitConditionRef(EmitContext& ctx) {
}
void EmitReference(EmitContext&) {}
void EmitPhiMove(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetExec(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVcc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVccHi(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetM0(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetExec(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVcc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVccHi(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetM0(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "shader_recompiler/ir/program.h"
namespace Shader::Backend::X64 {
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c);
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,138 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitSharedAtomicIAdd32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicIAdd32");
}
void EmitSharedAtomicUMax32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicUMax32");
}
void EmitSharedAtomicSMax32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicSMax32");
}
void EmitSharedAtomicUMin32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicUMin32");
}
void EmitSharedAtomicSMin32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicSMin32");
}
void EmitSharedAtomicAnd32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicAnd32");
}
void EmitSharedAtomicOr32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicOr32");
}
void EmitSharedAtomicXor32(EmitContext& ctx) {
throw NotImplementedException("SharedAtomicXor32");
}
void EmitBufferAtomicIAdd32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicIAdd32");
}
void EmitBufferAtomicSMin32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicSMin32");
}
void EmitBufferAtomicUMin32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicUMin32");
}
void EmitBufferAtomicSMax32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicSMax32");
}
void EmitBufferAtomicUMax32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicUMax32");
}
void EmitBufferAtomicInc32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicInc32");
}
void EmitBufferAtomicDec32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicDec32");
}
void EmitBufferAtomicAnd32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicAnd32");
}
void EmitBufferAtomicOr32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicOr32");
}
void EmitBufferAtomicXor32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicXor32");
}
void EmitBufferAtomicSwap32(EmitContext& ctx) {
throw NotImplementedException("BufferAtomicSwap32");
}
void EmitImageAtomicIAdd32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicIAdd32");
}
void EmitImageAtomicSMin32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicSMin32");
}
void EmitImageAtomicUMin32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicUMin32");
}
void EmitImageAtomicSMax32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicSMax32");
}
void EmitImageAtomicUMax32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicUMax32");
}
void EmitImageAtomicInc32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicInc32");
}
void EmitImageAtomicDec32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicDec32");
}
void EmitImageAtomicAnd32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicAnd32");
}
void EmitImageAtomicOr32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicOr32");
}
void EmitImageAtomicXor32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicXor32");
}
void EmitImageAtomicExchange32(EmitContext& ctx) {
throw NotImplementedException("ImageAtomicExchange32");
}
void EmitDataAppend(EmitContext& ctx) {
throw NotImplementedException("DataAppend");
}
void EmitDataConsume(EmitContext& ctx) {
throw NotImplementedException("DataConsume");
}
} // namespace Shader::Backend::SPIRV

View file

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitBarrier(EmitContext& ctx) {
}
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
}
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,204 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
using namespace Xbyak;
using namespace Xbyak::util;
namespace Shader::Backend::X64 {
void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
// We handle 16-bit floats in general purpose registers
MovGP(ctx, dest[0], src[0]);
}
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (src[0].IsMem()) {
MovGP(ctx, dest[0], src[0]);
} else if (dest[0].IsMem()) {
ctx.Code().movd(dest[0].Mem(), src[0].Xmm());
} else {
ctx.Code().movd(dword[rsp - 4], src[0].Xmm());
MovGP(ctx, dest[0], dword[rsp - 4]);
}
}
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (src[0].IsMem()) {
MovGP(ctx, dest[0], src[0]);
} else if (dest[0].IsMem()) {
ctx.Code().movq(dest[0].Mem(), src[0].Xmm());
} else {
ctx.Code().movq(qword[rsp - 8], src[0].Xmm());
MovGP(ctx, dest[0], qword[rsp - 8]);
}
}
void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].IsMem()) {
MovGP(ctx, dest[0], src[0]);
} else if (src[0].IsMem()) {
ctx.Code().movd(dest[0].Xmm(), src[0].Mem());
} else {
MovGP(ctx, dword[rsp - 4], src[0]);
ctx.Code().movd(dest[0].Xmm(), dword[rsp - 4]);
}
}
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].IsMem()) {
MovGP(ctx, dest[0], src[0]);
} else if (src[0].IsMem()) {
ctx.Code().movq(dest[0].Xmm(), src[0].Mem());
} else {
MovGP(ctx, qword[rsp - 8], src[0]);
ctx.Code().mov(dest[0].Xmm(), qword[rsp - 8]);
}
}
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
const bool is_mem = dest[0].IsMem() && (src[0].IsMem() || src[1].IsMem());
Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].Reg();
MovGP(ctx, tmp, src[1]);
ctx.Code().shl(tmp, 32);
ctx.Code().or_(tmp, src[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg src0 = src[0].IsMem() ? ctx.TempGPReg() : src[0].Reg();
MovGP(ctx, src0, src[0]);
Reg dest1 = dest[1].IsMem() ? ctx.TempGPReg() : dest[1].Reg().changeBit(64);
MovGP(ctx, dest1, src0);
ctx.Code().shr(dest1, 32);
MovGP(ctx, dest[1], dest1);
MovGP(ctx, dest[0], src0.cvt32());
}
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, src[0]);
ctx.Code().pinsrd(tmp, src[1].Op(), 1);
MovFloat(ctx, dest[0], tmp);
}
void EmitPackUnorm2x16(EmitContext& ctx) {
throw NotImplementedException("PackUnorm2x16");
}
void EmitUnpackUnorm2x16(EmitContext& ctx) {
throw NotImplementedException("UnpackUnorm2x16");
}
void EmitPackSnorm2x16(EmitContext& ctx) {
throw NotImplementedException("PackSnorm2x16");
}
void EmitUnpackSnorm2x16(EmitContext& ctx) {
throw NotImplementedException("UnpackSnorm2x16");
}
void EmitPackUint2x16(EmitContext& ctx) {
throw NotImplementedException("PackUint2x16");
}
void EmitUnpackUint2x16(EmitContext& ctx) {
throw NotImplementedException("UnpackUint2x16");
}
void EmitPackSint2x16(EmitContext& ctx) {
throw NotImplementedException("PackSint2x16");
}
void EmitUnpackSint2x16(EmitContext& ctx) {
throw NotImplementedException("UnpackSint2x16");
}
void EmitPackHalf2x16(EmitContext& ctx) {
throw NotImplementedException("PackHalf2x16");
}
void EmitUnpackHalf2x16(EmitContext& ctx) {
throw NotImplementedException("UnpackHalf2x16");
}
void EmitPackUnorm4x8(EmitContext& ctx) {
throw NotImplementedException("PackUnorm4x8");
}
void EmitUnpackUnorm4x8(EmitContext& ctx) {
throw NotImplementedException("UnpackUnorm4x8");
}
void EmitPackSnorm4x8(EmitContext& ctx) {
throw NotImplementedException("PackSnorm4x8");
}
void EmitUnpackSnorm4x8(EmitContext& ctx) {
throw NotImplementedException("UnpackSnorm4x8");
}
void EmitPackUint4x8(EmitContext& ctx) {
throw NotImplementedException("PackUint4x8");
}
void EmitUnpackUint4x8(EmitContext& ctx) {
throw NotImplementedException("UnpackUint4x8");
}
void EmitPackSint4x8(EmitContext& ctx) {
throw NotImplementedException("PackSint4x8");
}
void EmitUnpackSint4x8(EmitContext& ctx) {
throw NotImplementedException("UnpackSint4x8");
}
void EmitPackUfloat10_11_11(EmitContext& ctx) {
throw NotImplementedException("PackUfloat10_11_11");
}
void EmitUnpackUfloat10_11_11(EmitContext& ctx) {
throw NotImplementedException("UnpackUfloat10_11_11");
}
void EmitPackUnorm2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("PackUnorm2_10_10_10");
}
void EmitUnpackUnorm2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("UnpackUnorm2_10_10_10");
}
void EmitPackSnorm2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("PackSnorm2_10_10_10");
}
void EmitUnpackSnorm2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("UnpackSnorm2_10_10_10");
}
void EmitPackUint2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("PackUint2_10_10_10");
}
void EmitUnpackUint2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("UnpackUint2_10_10_10");
}
void EmitPackSint2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("PackSint2_10_10_10");
}
void EmitUnpackSint2_10_10_10(EmitContext& ctx) {
throw NotImplementedException("UnpackSint2_10_10_10");
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,350 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
namespace {
template <u32 N>
static const OperandHolder& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) {
if (index < N) {
return comp1[index];
} else {
return comp2[index - N];
}
}
}
void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
}
void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
MovGP(ctx, dest[2], src3[0]);
}
void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
MovGP(ctx, dest[2], src3[0]);
MovGP(ctx, dest[3], src4[0]);
}
void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
MovGP(ctx, dest[2], src1[1]);
MovGP(ctx, dest[3], src2[1]);
}
void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
if (index == 0) {
MovGP(ctx, dest[0], object[0]);
MovGP(ctx, dest[1], composite[1]);
} else {
MovGP(ctx, dest[0], composite[0]);
MovGP(ctx, dest[1], object[0]);
}
}
void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 3; ++i) {
if (i == index) {
MovGP(ctx, dest[i], object[0]);
} else {
MovGP(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 3; ++i) {
if (i == index) {
MovGP(ctx, dest[i], object[0]);
} else {
MovGP(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
}
void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
}
void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
}
void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
}
void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
MovGP(ctx, dest[2], src3[0]);
}
void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
MovGP(ctx, dest[0], src1[0]);
MovGP(ctx, dest[1], src2[0]);
MovGP(ctx, dest[2], src3[0]);
MovGP(ctx, dest[3], src4[0]);
}
void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovGP(ctx, dest[0], composite[index]);
}
void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
if (index == 0) {
MovGP(ctx, dest[0], object[0]);
MovGP(ctx, dest[1], composite[1]);
} else {
MovGP(ctx, dest[0], composite[0]);
MovGP(ctx, dest[1], object[0]);
}
}
void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 3; ++i) {
if (i == index) {
MovGP(ctx, dest[i], object[0]);
} else {
MovGP(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 4; ++i) {
if (i == index) {
MovGP(ctx, dest[i], object[0]);
} else {
MovGP(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
}
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
}
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
}
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovFloat(ctx, dest[0], src1[0]);
MovFloat(ctx, dest[1], src2[0]);
}
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
MovFloat(ctx, dest[0], src1[0]);
MovFloat(ctx, dest[1], src2[0]);
MovFloat(ctx, dest[2], src3[0]);
}
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
MovFloat(ctx, dest[0], src1[0]);
MovFloat(ctx, dest[1], src2[0]);
MovFloat(ctx, dest[2], src3[0]);
MovFloat(ctx, dest[3], src4[0]);
}
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovFloat(ctx, dest[0], src1[0]);
MovFloat(ctx, dest[1], src2[0]);
MovFloat(ctx, dest[2], src1[1]);
MovFloat(ctx, dest[3], src2[1]);
}
void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovFloat(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovFloat(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovFloat(ctx, dest[0], composite[index]);
}
void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
if (index == 0) {
MovFloat(ctx, dest[0], object[0]);
MovFloat(ctx, dest[1], composite[1]);
} else {
MovFloat(ctx, dest[0], composite[0]);
MovFloat(ctx, dest[1], object[0]);
}
}
void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 3; ++i) {
if (i == index) {
MovFloat(ctx, dest[i], object[0]);
} else {
MovFloat(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 4; ++i) {
if (i == index) {
MovFloat(ctx, dest[i], object[0]);
} else {
MovFloat(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
MovFloat(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
MovFloat(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
}
void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
MovFloat(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
MovFloat(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
MovFloat(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
}
void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
MovFloat(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
MovFloat(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
MovFloat(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
MovFloat(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
}
void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
MovDouble(ctx, dest[0], src1[0]);
MovDouble(ctx, dest[1], src2[0]);
}
void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
MovDouble(ctx, dest[0], src1[0]);
MovDouble(ctx, dest[1], src2[0]);
MovDouble(ctx, dest[2], src3[0]);
}
void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
MovDouble(ctx, dest[0], src1[0]);
MovDouble(ctx, dest[1], src2[0]);
MovDouble(ctx, dest[2], src3[0]);
MovDouble(ctx, dest[3], src4[0]);
}
void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovDouble(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovDouble(ctx, dest[0], composite[index]);
}
void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
MovDouble(ctx, dest[0], composite[index]);
}
void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
if (index == 0) {
MovDouble(ctx, dest[0], object[0]);
MovDouble(ctx, dest[1], composite[1]);
} else {
MovDouble(ctx, dest[0], composite[0]);
MovDouble(ctx, dest[1], object[0]);
}
}
void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 3; ++i) {
if (i == index) {
MovDouble(ctx, dest[i], object[0]);
} else {
MovDouble(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
for (u32 i = 0; i < 4; ++i) {
if (i == index) {
MovDouble(ctx, dest[i], object[0]);
} else {
MovDouble(ctx, dest[i], composite[i]);
}
}
}
void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
MovDouble(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
MovDouble(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
}
void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
MovDouble(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
MovDouble(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
MovDouble(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
}
void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
MovDouble(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
MovDouble(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
MovDouble(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
MovDouble(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,221 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) {
const u32 offset = static_cast<u32>(reg) << 2;
Reg tmp = ctx.TempGPReg();
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
MovGP( ctx, dest[0], dword[tmp]);
}
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
Reg tmp = ctx.TempGPReg();
MovGP(ctx, tmp, offset[0]);
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp * 4]);
MovGP(ctx, dword[tmp], value[0]);
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64);
Reg off_tmp = offset[0].IsMem() ? ctx.TempGPReg() : offset[0].Reg().changeBit(64);
MovGP(ctx, tmp, base[1]);
MovGP(ctx, off_tmp, offset[0]);
ctx.Code().shl(tmp, 32);
ctx.Code().or_(tmp, base[0].Op());
ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]);
MovGP(ctx, dest[0], dword[tmp]);
}
void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64);
// Reconstruct base address
Reg off_tmp = ctx.TempGPReg();
MovGP(ctx, tmp, handle[1]);
ctx.Code().and_(tmp, 0xFFF);
ctx.Code().shl(tmp, 32);
MovGP(ctx, off_tmp.cvt32(), handle[0]);
ctx.Code().and_(off_tmp.cvt32(), 0xFFFFFFFF);
ctx.Code().or_(tmp, off_tmp);
// TODO: we should correctly clamp the offset
MovGP(ctx, off_tmp, offset[0]);
ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]);
MovGP(ctx, dest[0], dword[tmp]);
}
void EmitReadStepRate(EmitContext& ctx) {
throw NotImplementedException("ReadStepRate");
}
void EmitGetAttribute(EmitContext& ctx, const Operands& dest) {
LOG_WARNING(Render_Recompiler, "GetAttribute stubbed, setting to 0.0");
if (dest[0].IsMem()) {
ctx.Code().mov(dest[0].Mem(), 0);
} else {
ctx.Code().pxor(dest[0].Xmm(), dest[0].Xmm());
}
}
void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest) {
LOG_WARNING(Render_Recompiler, "GetAttributeU32 stubbed, setting to 0");
if (dest[0].IsMem()) {
ctx.Code().mov(dest[0].Mem(), 0);
} else {
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
}
}
void EmitSetAttribute(EmitContext& ctx) {
throw NotImplementedException("SetAttribute");
}
void EmitGetTessGenericAttribute(EmitContext& ctx) {
throw NotImplementedException("GetTessGenericAttribute");
}
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) {
throw NotImplementedException("ReadTcsGenericOuputAttribute");
}
void EmitSetTcsGenericAttribute(EmitContext& ctx) {
throw NotImplementedException("SetTcsGenericAttribute");
}
void EmitGetPatch(EmitContext& ctx) {
throw NotImplementedException("GetPatch");
}
void EmitSetPatch(EmitContext& ctx) {
throw NotImplementedException("SetPatch");
}
void EmitLoadBufferU8(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU8");
}
void EmitLoadBufferU16(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU16");
}
void EmitLoadBufferU32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32");
}
void EmitLoadBufferU32x2(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x2");
}
void EmitLoadBufferU32x3(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x3");
}
void EmitLoadBufferU32x4(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x4");
}
void EmitLoadBufferF32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32");
}
void EmitLoadBufferF32x2(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x2");
}
void EmitLoadBufferF32x3(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x3");
}
void EmitLoadBufferF32x4(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x4");
}
void EmitLoadBufferFormatF32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferFormatF32");
}
void EmitStoreBufferU8(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU8");
}
void EmitStoreBufferU16(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU16");
}
void EmitStoreBufferU32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32");
}
void EmitStoreBufferU32x2(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x2");
}
void EmitStoreBufferU32x3(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x3");
}
void EmitStoreBufferU32x4(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x4");
}
void EmitStoreBufferF32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32");
}
void EmitStoreBufferF32x2(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x2");
}
void EmitStoreBufferF32x3(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x3");
}
void EmitStoreBufferF32x4(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x4");
}
void EmitStoreBufferFormatF32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferFormatF32");
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,279 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg();
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
ctx.Code().and_(tmp_reg, 0xFFFF);
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
ctx.Code().cvttss2si(tmp, src[0].Op());
ctx.Code().and_(tmp, 0xFFFF);
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
ctx.Code().cvttsd2si(tmp, src[0].Op());
ctx.Code().and_(tmp, 0xFFFF);
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg();
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
ctx.Code().cvttss2si(tmp, src[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
ctx.Code().cvttsd2si(tmp, src[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg();
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
ctx.Code().cvttss2si(tmp, src[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
ctx.Code().cvttsd2si(tmp, src[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F16(ctx, dest, src);
}
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F32(ctx, dest, src);
}
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F64(ctx, dest, src);
}
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F16(ctx, dest, src);
}
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F32(ctx, dest, src);
}
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F64(ctx, dest, src);
}
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F16(ctx, dest, src);
}
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F32(ctx, dest, src);
}
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F64(ctx, dest, src);
}
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitInlineF32ToF16(ctx, dest[0].Op(), src[0].Op());
}
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitInlineF16ToF32(ctx, dest[0].Op(), src[0].Op());
}
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtsd2ss(tmp, src[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtss2sd(tmp, src[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
Xmm tmp_xmm = ctx.TempXmmReg();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
}
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
Xmm tmp_xmm = ctx.TempXmmReg();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
}
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = ctx.TempXmmReg();
ctx.Code().cvtsi2ss(tmp, src[0].Op());
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
}
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = ctx.TempXmmReg();
ctx.Code().cvtsi2ss(tmp, src[0].Op());
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
}
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S8(ctx, dest, src);
}
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S16(ctx, dest, src);
}
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S32(ctx, dest, src);
}
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S64(ctx, dest, src);
}
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg().cvt32();
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg().cvt32();
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtsi2ss(tmp, src[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtsi2ss(tmp, src[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S8(ctx, dest, src);
}
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S16(ctx, dest, src);
}
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S32(ctx, dest, src);
}
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S64(ctx, dest, src);
}
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg().cvt32();
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg().cvt32();
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovGP(ctx, tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtsi2sd(tmp, src[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().cvtsi2sd(tmp, src[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S8(ctx, dest, src);
}
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S16(ctx, dest, src);
}
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S32(ctx, dest, src);
}
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S64(ctx, dest, src);
}
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,766 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
ctx.Code().and_(dest[0].Op(), 0x7FFF);
}
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg reg_tmp = ctx.TempXmmReg();
Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().mov(reg_tmp, 0x7FFFFFFF);
ctx.Code().movd(xmm_tmp, reg_tmp);
ctx.Code().andps(xmm_tmp, src[0].Op());
MovFloat(ctx, dest[0], xmm_tmp);
}
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg reg_tmp = ctx.TempGPReg();
Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF);
ctx.Code().movq(xmm_tmp, reg_tmp);
ctx.Code().andpd(xmm_tmp, src[0].Op());
MovFloat(ctx, dest[0], xmm_tmp);
}
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
ctx.Code().addss(tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
}
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().addss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().addsd(tmp, op2[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().subss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
Xmm tmp3 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
EmitInlineF16ToF32(ctx, tmp3, op3[0].Op());
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp3);
}
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm();
MovFloat(ctx, tmp1, op3[0]);
MovFloat(ctx, tmp2, op2[0]);
ctx.Code().vfmadd132ss(tmp2, tmp1, op1[0].Op());
MovFloat(ctx, dest[0], tmp2);
}
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm();
MovDouble(ctx, tmp1, op3[0]);
MovDouble(ctx, tmp2, op2[0]);
ctx.Code().vfmadd132sd(tmp2, tmp1, op1[0].Op());
MovDouble(ctx, dest[0], tmp2);
}
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
if (is_legacy) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp1, op1[0].Op());
MovFloat(ctx, tmp2, op1[0].Op());
ctx.Code().maxss(tmp2, op2[0].Op());
ctx.Code().cmpunordss(tmp1, tmp1);
ctx.Code().andps(tmp1, op2[0].Op());
ctx.Code().orps(tmp2, tmp1);
MovFloat(ctx, dest[0], tmp2);
} else {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().maxss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
}
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().maxsd(tmp, op2[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
if (is_legacy) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp1, op1[0].Op());
MovFloat(ctx, tmp2, op1[0].Op());
ctx.Code().minss(tmp2, op2[0].Op());
ctx.Code().cmpunordss(tmp1, tmp1);
ctx.Code().andps(tmp1, op2[0].Op());
ctx.Code().orps(tmp2, tmp1);
MovFloat(ctx, dest[0], tmp2);
} else {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().minss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
}
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().minsd(tmp, op2[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().minss(tmp, op2[0].Op());
ctx.Code().minss(tmp, op3[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().maxss(tmp, op2[0].Op());
ctx.Code().maxss(tmp, op3[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp2, op1[0]);
ctx.Code().maxss(tmp2, op2[0].Op());
ctx.Code().minss(tmp2, op3[0].Op());
MovFloat(ctx, tmp, op1[0]);
ctx.Code().minss(tmp, op2[0].Op());
ctx.Code().maxss(tmp, tmp2);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
ctx.Code().mulss(tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
}
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().mulss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().mulsd(tmp, op2[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().divss(tmp, op2[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().divsd(tmp, op2[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
MovGP(ctx, dest[0], op1[0]);
ctx.Code().xor_(dest[0].Op(), 0x8000);
}
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempGPReg().cvt32();
ctx.Code().mov(tmp_reg, 0x80000000);
ctx.Code().movd(tmp_xmm, tmp_reg);
ctx.Code().xorps(tmp_xmm, op1[0].Op());
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempXmmReg();
ctx.Code().mov(tmp_reg, 0x8000000000000000);
ctx.Code().movq(tmp_xmm, tmp_reg);
ctx.Code().xorpd(tmp_xmm, op1[0].Op());
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPSin(EmitContext& ctx) {
throw NotImplementedException("FPSin");
}
void EmitFPCos(EmitContext& ctx) {
throw NotImplementedException("FPCos");
}
void EmitFPExp2(EmitContext& ctx) {
throw NotImplementedException("FPExp2");
}
void EmitFPLdexp(EmitContext& ctx) {
throw NotImplementedException("FPLdexp");
}
void EmitFPLog2(EmitContext& ctx) {
throw NotImplementedException("FPLog2");
}
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().rcpss(tmp, op1[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempGPReg();
ctx.Code().mov(tmp_reg, 1);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
ctx.Code().divsd(tmp_xmm, op1[0].Op());
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().rsqrtss(tmp, op1[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempGPReg();
ctx.Code().mov(tmp_reg, 1);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
ctx.Code().divsd(tmp_xmm, op1[0].Op());
ctx.Code().sqrtsd(tmp_xmm, tmp_xmm);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().sqrtss(tmp, op1[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPSaturate16(EmitContext& ctx) {
throw NotImplementedException("FPSaturate16");
}
void EmitFPSaturate32(EmitContext& ctx) {
throw NotImplementedException("FPSaturate32");
}
void EmitFPSaturate64(EmitContext& ctx) {
throw NotImplementedException("FPSaturate64");
}
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
Xmm tmp3 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op[0].Op());
EmitInlineF16ToF32(ctx, tmp2, min[0].Op());
EmitInlineF16ToF32(ctx, tmp3, max[0].Op());
ctx.Code().maxss(tmp1, tmp2);
ctx.Code().minss(tmp1, tmp3);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
}
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op[0]);
ctx.Code().maxss(tmp, min[0].Op());
ctx.Code().minss(tmp, max[0].Op());
MovFloat(ctx, dest[0], tmp);
}
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op[0]);
ctx.Code().maxsd(tmp, min[0].Op());
ctx.Code().minsd(tmp, max[0].Op());
MovDouble(ctx, dest[0], tmp);
}
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
ctx.Code().roundss(tmp, tmp, 0x00);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
}
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundss(tmp, op1[0].Op(), 0x00);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundsd(tmp, op1[0].Op(), 0x00);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
ctx.Code().roundss(tmp, tmp, 0x01);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
}
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundss(tmp, op1[0].Op(), 0x01);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundsd(tmp, op1[0].Op(), 0x01);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
ctx.Code().roundss(tmp, tmp, 0x02);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
}
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundss(tmp, op1[0].Op(), 0x02);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
ctx.Code().roundsd(tmp, op1[0].Op(), 0x02);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPTrunc16(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp_xmm = ctx.TempXmmReg();
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, op[0].Op());
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
}
void EmitFPTrunc32(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempGPReg().cvt32();
ctx.Code().cvttss2si(tmp_reg, op[0].Op());
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitFPTrunc64(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Reg tmp_reg = ctx.TempGPReg();
ctx.Code().cvttsd2si(tmp_reg, op[0].Op());
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPFract32(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp, op[0]);
ctx.Code().roundss(tmp2, tmp, 0x01);
ctx.Code().subss(tmp, tmp2);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPFract64(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
Xmm tmp2 = ctx.TempXmmReg();
MovDouble(ctx, tmp, op[0]);
ctx.Code().roundsd(tmp2, tmp, 0x01);
ctx.Code().subsd(tmp, tmp2);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPFrexpSig32(EmitContext& ctx) {
throw NotImplementedException("FPFrexpSig32");
}
void EmitFPFrexpSig64(EmitContext& ctx) {
throw NotImplementedException("FPFrexpSig64");
}
void EmitFPFrexpExp32(EmitContext& ctx) {
throw NotImplementedException("FPFrexpExp32");
}
void EmitFPFrexpExp64(EmitContext& ctx) {
throw NotImplementedException("FPFrexpExp64");
}
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().sete(dest[0].Op());
}
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().sete(dest[0].Op());
}
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().sete(dest[0].Op());
}
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordNotEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordNotEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setne(dest[0].Op());
}
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().setne(dest[0].Op());
}
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().setne(dest[0].Op());
}
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setb(dest[0].Op());
}
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().setb(dest[0].Op());
}
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().setb(dest[0].Op());
}
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().seta(dest[0].Op());
}
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().seta(dest[0].Op());
}
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().seta(dest[0].Op());
}
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setbe(dest[0].Op());
}
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().setbe(dest[0].Op());
}
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().setbe(dest[0].Op());
}
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0].Op(), 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setae(dest[0].Op());
}
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0].Op());
ctx.Code().setae(dest[0].Op());
}
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0].Op());
ctx.Code().setae(dest[0].Op());
}
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op[0].Op());
ctx.Code().ucomiss(tmp, tmp);
ctx.Code().setp(dest[0].Op());
}
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovFloat(ctx, tmp, op[0]);
ctx.Code().ucomiss(tmp, tmp);
ctx.Code().setp(dest[0].Op());
}
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
MovDouble(ctx, tmp, op[0]);
ctx.Code().ucomisd(tmp, tmp);
ctx.Code().setp(dest[0].Op());
}
void EmitFPIsInf32(EmitContext& ctx) {
throw NotImplementedException("FPIsInf32");
}
void EmitFPIsInf64(EmitContext& ctx) {
throw NotImplementedException("FPIsInf64");
}
void EmitFPCmpClass32(EmitContext&) {
UNREACHABLE();
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,62 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitImageSampleRaw(EmitContext& ctx) {
// We can reach this here. We done resource tracking pass yet.
throw NotImplementedException("ImageSampleRaw");
}
void EmitImageSampleImplicitLod(EmitContext& ctx) {
throw NotImplementedException("ImageSampleImplicitLod");
}
void EmitImageSampleExplicitLod(EmitContext& ctx) {
throw NotImplementedException("ImageSampleExplicitLod");
}
void EmitImageSampleDrefImplicitLod(EmitContext& ctx) {
throw NotImplementedException("ImageSampleDrefImplicitLod");
}
void EmitImageSampleDrefExplicitLod(EmitContext& ctx) {
throw NotImplementedException("ImageSampleDrefExplicitLod");
}
void EmitImageGather(EmitContext& ctx) {
throw NotImplementedException("ImageGather");
}
void EmitImageGatherDref(EmitContext& ctx) {
throw NotImplementedException("ImageGatherDref");
}
void EmitImageQueryDimensions(EmitContext& ctx) {
throw NotImplementedException("ImageQueryDimensions");
}
void EmitImageQueryLod(EmitContext& ctx) {
throw NotImplementedException("ImageQueryLod");
}
void EmitImageGradient(EmitContext& ctx) {
throw NotImplementedException("ImageGradient");
}
void EmitImageRead(EmitContext& ctx) {
throw NotImplementedException("ImageRead");
}
void EmitImageWrite(EmitContext& ctx) {
throw NotImplementedException("ImageWrite");
}
void EmitCubeFaceIndex(EmitContext& ctx) {
throw NotImplementedException("CubeFaceIndex");
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,482 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <boost/container/static_vector.hpp>
#include <xbyak/xbyak.h>
#include "common/types.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::IR {
enum class Attribute : u64;
enum class ScalarReg : u32;
enum class Patch : u64;
class Inst;
class Value;
} // namespace Shader::IR
namespace Shader::Backend::X64 {
// Microinstruction emitters
void EmitPhi(EmitContext& ctx);
void EmitVoid(EmitContext& ctx);
void EmitIdentity(EmitContext& ctx);
void EmitConditionRef(EmitContext& ctx);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
void EmitGetSccLo(EmitContext& ctx);
void EmitGetVccLo(EmitContext& ctx);
void EmitGetVccHi(EmitContext& ctx);
void EmitGetM0(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
void EmitSetExec(EmitContext& ctx);
void EmitSetVcc(EmitContext& ctx);
void EmitSetSccLo(EmitContext& ctx);
void EmitSetVccLo(EmitContext& ctx);
void EmitSetVccHi(EmitContext& ctx);
void EmitSetM0(EmitContext& ctx);
void EmitFPCmpClass32(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx);
void EmitDiscard(EmitContext& ctx);
void EmitDiscardCond(EmitContext& ctx, const Operands& condition);
void EmitDebugPrint(EmitContext& ctx);
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg);
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);
void EmitGetScalarRegister(EmitContext& ctx);
void EmitSetScalarRegister(EmitContext& ctx);
void EmitGetVectorRegister(EmitContext& ctx);
void EmitSetVectorRegister(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset);
void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset);
void EmitLoadBufferU8(EmitContext& ctx);
void EmitLoadBufferU16(EmitContext& ctx);
void EmitLoadBufferU32(EmitContext& ctx);
void EmitLoadBufferU32x2(EmitContext& ctx);
void EmitLoadBufferU32x3(EmitContext& ctx);
void EmitLoadBufferU32x4(EmitContext& ctx);
void EmitLoadBufferF32(EmitContext& ctx);
void EmitLoadBufferF32x2(EmitContext& ctx);
void EmitLoadBufferF32x3(EmitContext& ctx);
void EmitLoadBufferF32x4(EmitContext& ctx);
void EmitLoadBufferFormatF32(EmitContext& ctx);
void EmitStoreBufferU8(EmitContext& ctx);
void EmitStoreBufferU16(EmitContext& ctx);
void EmitStoreBufferU32(EmitContext& ctx);
void EmitStoreBufferU32x2(EmitContext& ctx);
void EmitStoreBufferU32x3(EmitContext& ctx);
void EmitStoreBufferU32x4(EmitContext& ctx);
void EmitStoreBufferF32(EmitContext& ctx);
void EmitStoreBufferF32x2(EmitContext& ctx);
void EmitStoreBufferF32x3(EmitContext& ctx);
void EmitStoreBufferF32x4(EmitContext& ctx);
void EmitStoreBufferFormatF32(EmitContext& ctx);
void EmitBufferAtomicIAdd32(EmitContext& ctx);
void EmitBufferAtomicSMin32(EmitContext& ctx);
void EmitBufferAtomicUMin32(EmitContext& ctx);
void EmitBufferAtomicSMax32(EmitContext& ctx);
void EmitBufferAtomicUMax32(EmitContext& ctx);
void EmitBufferAtomicInc32(EmitContext& ctx);
void EmitBufferAtomicDec32(EmitContext& ctx);
void EmitBufferAtomicAnd32(EmitContext& ctx);
void EmitBufferAtomicOr32(EmitContext& ctx);
void EmitBufferAtomicXor32(EmitContext& ctx);
void EmitBufferAtomicSwap32(EmitContext& ctx);
void EmitGetAttribute(EmitContext& ctx, const Operands& dest);
void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest);
void EmitSetAttribute(EmitContext& ctx);
void EmitGetTessGenericAttribute(EmitContext& ctx);
void EmitSetTcsGenericAttribute(EmitContext& ctx);
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx);
void EmitGetPatch(EmitContext& ctx);
void EmitSetPatch(EmitContext& ctx);
void EmitSetFragColor(EmitContext& ctx);
void EmitSetSampleMask(EmitContext& ctx);
void EmitSetFragDepth(EmitContext& ctx);
void EmitWorkgroupId(EmitContext& ctx);
void EmitLocalInvocationId(EmitContext& ctx);
void EmitInvocationId(EmitContext& ctx);
void EmitInvocationInfo(EmitContext& ctx);
void EmitSampleId(EmitContext& ctx);
void EmitUndefU1(EmitContext& ctx);
void EmitUndefU8(EmitContext& ctx);
void EmitUndefU16(EmitContext& ctx);
void EmitUndefU32(EmitContext& ctx);
void EmitUndefU64(EmitContext& ctx);
void EmitLoadSharedU32(EmitContext& ctx, const Operands& dest, const Operands& offset);
void EmitLoadSharedU64(EmitContext& ctx, const Operands& dest, const Operands& offset);
void EmitWriteSharedU32(EmitContext& ctx);
void EmitWriteSharedU64(EmitContext& ctx);
void EmitSharedAtomicIAdd32(EmitContext& ctx);
void EmitSharedAtomicUMax32(EmitContext& ctx);
void EmitSharedAtomicSMax32(EmitContext& ctx);
void EmitSharedAtomicUMin32(EmitContext& ctx);
void EmitSharedAtomicSMin32(EmitContext& ctx);
void EmitSharedAtomicAnd32(EmitContext& ctx);
void EmitSharedAtomicOr32(EmitContext& ctx);
void EmitSharedAtomicXor32(EmitContext& ctx);
void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitPackUnorm2x16(EmitContext& ctx);
void EmitUnpackUnorm2x16(EmitContext& ctx);
void EmitPackSnorm2x16(EmitContext& ctx);
void EmitUnpackSnorm2x16(EmitContext& ctx);
void EmitPackUint2x16(EmitContext& ctx);
void EmitUnpackUint2x16(EmitContext& ctx);
void EmitPackSint2x16(EmitContext& ctx);
void EmitUnpackSint2x16(EmitContext& ctx);
void EmitPackHalf2x16(EmitContext& ctx);
void EmitUnpackHalf2x16(EmitContext& ctx);
void EmitPackUnorm4x8(EmitContext& ctx);
void EmitUnpackUnorm4x8(EmitContext& ctx);
void EmitPackSnorm4x8(EmitContext& ctx);
void EmitUnpackSnorm4x8(EmitContext& ctx);
void EmitPackUint4x8(EmitContext& ctx);
void EmitUnpackUint4x8(EmitContext& ctx);
void EmitPackSint4x8(EmitContext& ctx);
void EmitUnpackSint4x8(EmitContext& ctx);
void EmitPackUfloat10_11_11(EmitContext& ctx);
void EmitUnpackUfloat10_11_11(EmitContext& ctx);
void EmitPackUnorm2_10_10_10(EmitContext& ctx);
void EmitUnpackUnorm2_10_10_10(EmitContext& ctx);
void EmitPackSnorm2_10_10_10(EmitContext& ctx);
void EmitUnpackSnorm2_10_10_10(EmitContext& ctx);
void EmitPackUint2_10_10_10(EmitContext& ctx);
void EmitUnpackUint2_10_10_10(EmitContext& ctx);
void EmitPackSint2_10_10_10(EmitContext& ctx);
void EmitUnpackSint2_10_10_10(EmitContext& ctx);
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSin(EmitContext& ctx);
void EmitFPCos(EmitContext& ctx);
void EmitFPExp2(EmitContext& ctx);
void EmitFPLdexp(EmitContext& ctx);
void EmitFPLog2(EmitContext& ctx);
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSaturate16(EmitContext& ctx);
void EmitFPSaturate32(EmitContext& ctx);
void EmitFPSaturate64(EmitContext& ctx);
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPTrunc16(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFPTrunc32(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFPTrunc64(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFPFract32(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFPFract64(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFPFrexpSig32(EmitContext& ctx);
void EmitFPFrexpSig64(EmitContext& ctx);
void EmitFPFrexpExp32(EmitContext& ctx);
void EmitFPFrexpExp64(EmitContext& ctx);
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsInf32(EmitContext& ctx);
void EmitFPIsInf64(EmitContext& ctx);
void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitSMulExt(EmitContext& ctx);
void EmitUMulExt(EmitContext& ctx);
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);;
void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);;
void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count);
void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
void EmitBitReverse32(EmitContext& ctx);
void EmitBitCount32(EmitContext& ctx);
void EmitBitCount64(EmitContext& ctx);
void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitFindSMsb32(EmitContext& ctx);
void EmitFindUMsb32(EmitContext& ctx);
void EmitFindILsb32(EmitContext& ctx);
void EmitFindILsb64(EmitContext& ctx);
void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op);
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitImageSampleRaw(EmitContext& ctx);
void EmitImageSampleImplicitLod(EmitContext& ctx);
void EmitImageSampleExplicitLod(EmitContext& ctx);
void EmitImageSampleDrefImplicitLod(EmitContext& ctx);
void EmitImageSampleDrefExplicitLod(EmitContext& ctx);
void EmitImageGather(EmitContext& ctx);
void EmitImageGatherDref(EmitContext& ctx);
void EmitImageQueryDimensions(EmitContext& ctx);
void EmitImageQueryLod(EmitContext& ctx);
void EmitImageGradient(EmitContext& ctx);
void EmitImageRead(EmitContext& ctx);
void EmitImageWrite(EmitContext& ctx);
void EmitImageAtomicIAdd32(EmitContext& ctx);
void EmitImageAtomicSMin32(EmitContext& ctx);
void EmitImageAtomicUMin32(EmitContext& ctx);
void EmitImageAtomicSMax32(EmitContext& ctx);
void EmitImageAtomicUMax32(EmitContext& ctx);
void EmitImageAtomicInc32(EmitContext& ctx);
void EmitImageAtomicDec32(EmitContext& ctx);
void EmitImageAtomicAnd32(EmitContext& ctx);
void EmitImageAtomicOr32(EmitContext& ctx);
void EmitImageAtomicXor32(EmitContext& ctx);
void EmitImageAtomicExchange32(EmitContext& ctx);
void EmitCubeFaceIndex(EmitContext& ctx);
void EmitLaneId(EmitContext& ctx);
void EmitWarpId(EmitContext& ctx);
void EmitQuadShuffle(EmitContext& ctx);
void EmitReadFirstLane(EmitContext& ctx);
void EmitReadLane(EmitContext& ctx);
void EmitWriteLane(EmitContext& ctx);
void EmitDataAppend(EmitContext& ctx);
void EmitDataConsume(EmitContext& ctx);
void EmitEmitVertex(EmitContext& ctx);
void EmitEmitPrimitive(EmitContext& ctx);
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,624 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
namespace {
static bool IsReg(const OperandHolder& op, const Reg& reg) {
return op.IsReg() && op.Reg().getIdx() == reg.getIdx();
}
static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const OperandHolder& dest) {
if (IsReg(dest, save)) {
// Destination is reg, no need to save
return false;
}
ctx.Code().push(save);
return true;
}
static void EmitRestoreRegTemp(EmitContext ctx, const Reg& save) {
ctx.Code().pop(save);
}
} // namespace
void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) {
ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]);
} else {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().add(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
}
void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) {
ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]);
} else {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().add(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
}
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
OperandHolder carry = dest[1];
carry.Op().setBit(1);
MovGP(ctx, tmp, op1[0]);
ctx.Code().add(tmp.Op(), op2[0].Op());
ctx.Code().setc(carry.Op());
}
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().sub(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().sub(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSMulExt(EmitContext& ctx) {
throw NotImplementedException("SMulExtended");
}
void EmitUMulExt(EmitContext& ctx) {
throw NotImplementedException("UMulExtended");
}
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().imul(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().imul(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
OperandHolder tmp = op2[0];
while (IsReg(tmp, rax)) {
tmp = ctx.TempGPReg().cvt32();
}
MovGP(ctx, tmp, op2[0]);
MovGP(ctx, eax, op1[0]);
ctx.Code().idiv(tmp.Op());
MovGP(ctx, dest[0], eax);
if (rdx_saved) {
EmitRestoreRegTemp(ctx, rdx);
}
if (rax_saved) {
EmitRestoreRegTemp(ctx, rax);
}
}
void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
OperandHolder tmp = op2[0];
while (IsReg(tmp, rax)) {
tmp = ctx.TempGPReg().cvt32();
}
MovGP(ctx, tmp, op2[0]);
MovGP(ctx, eax, op1[0]);
ctx.Code().div(tmp.Op());
MovGP(ctx, dest[0], eax);
if (rdx_saved) {
EmitRestoreRegTemp(ctx, rdx);
}
if (rax_saved) {
EmitRestoreRegTemp(ctx, rax);
}
}
void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
OperandHolder tmp = op2[0];
while (IsReg(tmp, rax)) {
tmp = ctx.TempGPReg().cvt32();
}
MovGP(ctx, tmp, op2[0]);
MovGP(ctx, eax, op1[0]);
ctx.Code().idiv(tmp.Op());
MovGP(ctx, dest[0], edx);
if (rdx_saved) {
EmitRestoreRegTemp(ctx, rdx);
}
if (rax_saved) {
EmitRestoreRegTemp(ctx, rax);
}
}
void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
OperandHolder tmp = op2[0];
while (IsReg(tmp, rax)) {
tmp = ctx.TempGPReg().cvt32();
}
MovGP(ctx, tmp, op2[0]);
MovGP(ctx, eax, op1[0]);
ctx.Code().div(tmp.Op());
MovGP(ctx, dest[0], edx);
if (rdx_saved) {
EmitRestoreRegTemp(ctx, rdx);
}
if (rax_saved) {
EmitRestoreRegTemp(ctx, rax);
}
}
void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op) {
MovGP(ctx, dest[0], op[0]);
ctx.Code().neg(dest[0].Op());
}
void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op) {
MovGP(ctx, dest[0], op[0]);
ctx.Code().neg(dest[0].Op());
}
void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op) {
Label done;
MovGP(ctx, dest[0], op[0]);
ctx.Code().cmp(dest[0].Op(), 0);
ctx.Code().jns(done);
ctx.Code().neg(dest[0].Op());
ctx.Code().L(done);
}
void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().shl(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().shl(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().shr(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().shr(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().sar(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, shift[0]);
ctx.Code().sar(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().and_(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().and_(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().or_(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().or_(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().xor_(tmp.Op(), op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
Reg mask = ctx.TempGPReg().cvt32();
Reg tmp2 = ctx.TempGPReg().cvt32();
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, count[0]);
MovGP(ctx, tmp2, insert[0]);
ctx.Code().mov(mask, 1);
ctx.Code().shl(mask, cl);
ctx.Code().sub(mask, 1);
MovGP(ctx, cl, offset[0]);
ctx.Code().shl(mask, cl);
ctx.Code().shl(tmp2, cl);
ctx.Code().and_(tmp2, mask);
ctx.Code().not_(mask);
ctx.Code().and_(tmp.Op(), mask);
ctx.Code().or_(tmp.Op(), tmp2);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
Reg mask = ctx.TempGPReg().cvt32();
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, count[0]);
ctx.Code().mov(mask, 1);
ctx.Code().shl(mask, cl);
ctx.Code().sub(mask, 1);
MovGP(ctx, cl, offset[0]);
ctx.Code().shl(mask, cl);
ctx.Code().and_(tmp.Op(), mask);
ctx.Code().shr(tmp.Op(), cl);
ctx.Code().mov(ecx, 0x20);
ctx.Code().sub(ecx, count[0].Op());
ctx.Code().shl(tmp.Op(), cl);
ctx.Code().sar(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count) {
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
Reg mask = ctx.TempGPReg().cvt32();
MovGP(ctx, tmp, base[0]);
MovGP(ctx, cl, count[0]);
ctx.Code().mov(mask, 1);
ctx.Code().shl(mask, cl);
ctx.Code().sub(mask, 1);
MovGP(ctx, cl, offset[0]);
ctx.Code().shl(mask, cl);
ctx.Code().and_(tmp.Op(), mask);
ctx.Code().shr(tmp.Op(), cl);
MovGP(ctx, dest[0], tmp);
if (rcx_saved) {
EmitRestoreRegTemp(ctx, rcx);
}
}
void EmitBitReverse32(EmitContext& ctx) {
throw NotImplementedException("BitReverse32");
}
void EmitBitCount32(EmitContext& ctx) {
throw NotImplementedException("BitCount32");
}
void EmitBitCount64(EmitContext& ctx) {
throw NotImplementedException("BitCount64");
}
void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op) {
MovGP(ctx, dest[0], op[0]);
ctx.Code().not_(dest[0].Op());
}
void EmitFindSMsb32(EmitContext& ctx) {
throw NotImplementedException("FindSMsb32");
}
void EmitFindUMsb32(EmitContext& ctx) {
throw NotImplementedException("FindUMsb32");
}
void EmitFindILsb32(EmitContext& ctx) {
throw NotImplementedException("FindILsb32");
}
void EmitFindILsb64(EmitContext& ctx) {
throw NotImplementedException("FindILsb64");
}
void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovg(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmova(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovl(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovb(tmp, op2[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovg(tmp, op2[0].Op());
ctx.Code().cmp(tmp, op3[0].Op());
ctx.Code().cmovg(tmp, op3[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmova(tmp, op2[0].Op());
ctx.Code().cmp(tmp, op3[0].Op());
ctx.Code().cmova(tmp, op3[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovl(tmp, op2[0].Op());
ctx.Code().cmp(tmp, op3[0].Op());
ctx.Code().cmovl(tmp, op3[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovb(tmp, op2[0].Op());
ctx.Code().cmp(tmp, op3[0].Op());
ctx.Code().cmovb(tmp, op3[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
Reg tmp2 = ctx.TempGPReg().cvt32();
MovGP(ctx, tmp2, op1[0]);
ctx.Code().cmp(tmp2, op2[0].Op());
ctx.Code().cmovl(tmp2, op2[0].Op());
ctx.Code().cmp(tmp2, op3[0].Op());
ctx.Code().cmovg(tmp2, op3[0].Op());
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmovg(tmp, op2[0].Op());
ctx.Code().cmp(tmp, tmp);
ctx.Code().cmovl(tmp, tmp2);
MovGP(ctx, dest[0], tmp);
}
void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
Reg tmp2 = ctx.TempGPReg().cvt32();
MovGP(ctx, tmp, op1[0]);
ctx.Code().cmp(tmp, op2[0].Op());
ctx.Code().cmova(tmp, op2[0].Op());
ctx.Code().cmp(tmp, op3[0].Op());
ctx.Code().cmovb(tmp, op3[0].Op());
MovGP(ctx, tmp2, op1[0]);
ctx.Code().cmp(tmp2, op2[0].Op());
ctx.Code().cmovb(tmp2, op2[0].Op());
ctx.Code().cmp(tmp2, tmp);
ctx.Code().cmova(tmp2, tmp);
MovGP(ctx, dest[0], tmp2);
}
void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, value[0]);
ctx.Code().cmp(tmp, min[0].Op());
ctx.Code().cmovl(tmp, min[0].Op());
ctx.Code().cmp(tmp, max[0].Op());
ctx.Code().cmovg(tmp, max[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) {
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
MovGP(ctx, tmp, value[0]);
ctx.Code().cmp(tmp, min[0].Op());
ctx.Code().cmovb(tmp, min[0].Op());
ctx.Code().cmp(tmp, max[0].Op());
ctx.Code().cmova(tmp, max[0].Op());
MovGP(ctx, dest[0], tmp);
}
void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setl(dest[0].Op());
}
void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setl(dest[0].Op());
}
void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setb(dest[0].Op());
}
void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setb(dest[0].Op());
}
void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().sete(dest[0].Op());
}
void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().sete(dest[0].Op());
}
void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setle(dest[0].Op());
}
void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setbe(dest[0].Op());
}
void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setg(dest[0].Op());
}
void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().seta(dest[0].Op());
}
void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setne(dest[0].Op());
}
void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setne(dest[0].Op());
}
void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setge(dest[0].Op());
}
void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
MovGP(ctx, tmp, lhs[0]);
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
ctx.Code().setae(dest[0].Op());
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().or_(tmp.Op(), op2[0].Op());
ctx.Code().and_(tmp.Op(), 1);
MovGP(ctx, dest[0], tmp);
}
void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().and_(tmp.Op(), op2[0].Op());
ctx.Code().and_(tmp.Op(), 1);
MovGP(ctx, dest[0], tmp);
}
void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
MovGP(ctx, tmp, op1[0]);
ctx.Code().xor_(tmp.Op(), op2[0].Op());
ctx.Code().and_(tmp.Op(), 1);
MovGP(ctx, dest[0], tmp);
}
void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) {
MovGP(ctx, dest[0], op[0]);
ctx.Code().not_(dest[0].Op());
ctx.Code().and_(dest[0].Op(), 1);
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
Label false_label, end_label;
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
MovGP(ctx, tmp, cond[0]);
ctx.Code().test(tmp, tmp);
ctx.Code().jz(false_label);
MovGP(ctx, dest[0], true_value[0]);
ctx.Code().jmp(end_label);
ctx.Code().L(false_label);
MovGP(ctx, dest[0], false_value[0]);
ctx.Code().L(end_label);
}
void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
EmitSelectU1(ctx, dest, cond, true_value, false_value);
}
void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
EmitSelectU1(ctx, dest, cond, true_value, false_value);
}
void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
EmitSelectU1(ctx, dest, cond, true_value, false_value);
}
void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
EmitSelectU1(ctx, dest, cond, true_value, false_value);
}
void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
EmitSelectU1(ctx, dest, cond, true_value, false_value);
}
void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
Label false_label, end_label;
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
MovGP(ctx, tmp, cond[0]);
ctx.Code().test(tmp, tmp);
ctx.Code().jz(false_label);
MovFloat(ctx, dest[0], true_value[0]);
ctx.Code().jmp(end_label);
ctx.Code().L(false_label);
MovFloat(ctx, dest[0], false_value[0]);
ctx.Code().L(end_label);
}
void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
Label false_label, end_label;
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
MovGP(ctx, tmp, cond[0]);
ctx.Code().test(tmp, tmp);
ctx.Code().jz(false_label);
MovDouble(ctx, dest[0], true_value[0]);
ctx.Code().jmp(end_label);
ctx.Code().L(false_label);
MovDouble(ctx, dest[0], false_value[0]);
ctx.Code().L(end_label);
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitLoadSharedU32(EmitContext& ctx, const Operands& dest, const Operands& offset) {
LOG_WARNING(Render_Recompiler, "EmitLoadSharedU32 stubbed, setting to 0");
if (dest[0].IsMem()) {
ctx.Code().mov(dest[0].Mem(), 0);
} else {
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
}
}
void EmitLoadSharedU64(EmitContext& ctx, const Operands& dest, const Operands& offset) {
LOG_WARNING(Render_Recompiler, "EmitLoadSharedU64 stubbed, setting to 0");
if (dest[0].IsMem()) {
ctx.Code().mov(dest[0].Mem(), 0);
} else {
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
}
if (dest[1].IsMem()) {
ctx.Code().mov(dest[1].Mem(), 0);
} else {
ctx.Code().xor_(dest[1].Reg(), dest[1].Reg());
}
}
void EmitWriteSharedU32(EmitContext& ctx) {
throw NotImplementedException("WriteSharedU32");
}
void EmitWriteSharedU64(EmitContext& ctx) {
throw NotImplementedException("WriteSharedU64");
}
}

View file

@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitPrologue(EmitContext& ctx) {
ctx.Prologue();
}
void ConvertDepthMode(EmitContext& ctx) {
}
void ConvertPositionToClipSpace(EmitContext& ctx) {
}
void EmitEpilogue(EmitContext& ctx) {
ctx.SetEndFlag();
}
void EmitDiscard(EmitContext& ctx) {
ctx.SetEndFlag();
}
void EmitDiscardCond(EmitContext& ctx, const Operands& condition) {
Reg tmp = condition[0].IsMem() ? ctx.TempGPReg().cvt8() : condition[0].Reg().cvt8();
MovGP(ctx, tmp, condition[0]);
ctx.Code().test(tmp, tmp);
ctx.Code().jnz(ctx.EndLabel());
}
void EmitEmitVertex(EmitContext& ctx) {
}
void EmitEmitPrimitive(EmitContext& ctx) {
}
void EmitEndPrimitive(EmitContext& ctx) {
}
void EmitDebugPrint(EmitContext& ctx) {
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitUndefU1(EmitContext& ctx) {
UNREACHABLE_MSG("x64 Instruction");
}
void EmitUndefU8(EmitContext&) {
UNREACHABLE_MSG("x64 Instruction");
}
void EmitUndefU16(EmitContext&) {
UNREACHABLE_MSG("x64 Instruction");
}
void EmitUndefU32(EmitContext& ctx) {
UNREACHABLE_MSG("x64 Instruction");
}
void EmitUndefU64(EmitContext&) {
UNREACHABLE_MSG("x64 Instruction");
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {
void EmitWarpId(EmitContext& ctx) {
}
void EmitLaneId(EmitContext& ctx) {
}
void EmitQuadShuffle(EmitContext& ctx) {
}
void EmitReadFirstLane(EmitContext& ctx) {
}
void EmitReadLane(EmitContext& ctx) {
}
void EmitWriteLane(EmitContext& ctx) {
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,372 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
using namespace Xbyak;
using namespace Xbyak::util;
namespace Shader::Backend::X64 {
EmitContext::EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_)
: program(program_), code(code_) {
for (IR::Block* block : program.blocks) {
block_labels[block] = {};
}
AllocateRegisters();
}
Reg64& EmitContext::TempGPReg(bool reserve) {
ASSERT(temp_gp_reg_index < temp_gp_regs.size());
u64 idx = temp_gp_reg_index;
if (reserve) {
temp_gp_reg_index++;
}
Reg64& reg = temp_gp_regs[idx];
if (idx > num_scratch_gp_regs &&
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
preserved_regs.push_back(reg);
code.sub(rsp, 8);
code.mov(ptr[rsp], reg);
}
return reg;
}
Xmm& EmitContext::TempXmmReg(bool reserve) {
ASSERT(temp_xmm_reg_index < temp_xmm_regs.size());
u64 idx = temp_xmm_reg_index;
if (reserve) {
temp_xmm_reg_index++;
}
Xmm& reg = temp_xmm_regs[idx];
if (idx > num_scratch_xmm_regs &&
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
preserved_regs.push_back(reg);
code.sub(rsp, 16);
code.movups(ptr[rsp], reg);
}
return reg;
}
void EmitContext::PopTempGPReg() {
ASSERT(temp_gp_reg_index > 0);
temp_gp_reg_index--;
}
void EmitContext::PopTempXmmReg() {
ASSERT(temp_xmm_reg_index > 0);
temp_xmm_reg_index--;
}
void EmitContext::ResetTempRegs() {
temp_gp_reg_index = 0;
temp_xmm_reg_index = 0;
}
const Operands& EmitContext::Def(IR::Inst* inst) {
return inst_to_operands.at(inst);
}
Operands EmitContext::Def(const IR::Value& value) {
if (!value.IsImmediate()) {
return Def(value.InstRecursive());
}
Operands operands;
Reg64& tmp = TempGPReg(false);
switch (value.Type()) {
case IR::Type::U1:
operands.push_back(TempGPReg().cvt8());
code.mov(operands.back().Reg(), value.U1());
break;
case IR::Type::U8:
operands.push_back(TempGPReg().cvt8());
code.mov(operands.back().Reg(), value.U8());
break;
case IR::Type::U16:
operands.push_back(TempGPReg().cvt16());
code.mov(operands.back().Reg(), value.U16());
break;
case IR::Type::U32:
operands.push_back(TempGPReg().cvt32());
code.mov(operands.back().Reg(), value.U32());
break;
case IR::Type::F32: {
code.mov(tmp.cvt32(), std::bit_cast<u32>(value.F32()));
Xmm& xmm32 = TempXmmReg();
code.movd(xmm32, tmp.cvt32());
operands.push_back(xmm32);
break;
}
case IR::Type::U64:
operands.push_back(TempGPReg());
code.mov(operands.back().Reg(), value.U64());
break;
case IR::Type::F64: {
code.mov(tmp, std::bit_cast<u64>(value.F64()));
Xmm& xmm64 = TempXmmReg();
code.movq(xmm64, tmp);
operands.push_back(xmm64);
break;
}
case IR::Type::ScalarReg:
operands.push_back(TempGPReg().cvt32());
code.mov(operands.back().Reg(), std::bit_cast<u32>(value.ScalarReg()));
break;
case IR::Type::VectorReg:
operands.push_back(TempXmmReg().cvt32());
code.mov(operands.back().Reg(), std::bit_cast<u32>(value.VectorReg()));
break;
case IR::Type::Attribute:
operands.push_back(TempGPReg());
code.mov(operands.back().Reg(), std::bit_cast<u64>(value.Attribute()));
break;
case IR::Type::Patch:
operands.push_back(TempGPReg());
code.mov(operands.back().Reg(), std::bit_cast<u64>(value.Patch()));
break;
default:
UNREACHABLE_MSG("Unsupported value type: {}", IR::NameOf(value.Type()));
break;
}
return operands;
}
std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
EmitContext::PhiAssignments(IR::Block* block) const {
auto it = phi_assignments.find(block);
if (it != phi_assignments.end()) {
return std::cref(it->second);
}
return std::nullopt;
}
void EmitContext::Prologue() {
if (inst_stack_space > 0) {
code.sub(rsp, inst_stack_space);
code.mov(r11, rsp);
}
}
void EmitContext::Epilogue() {
for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) {
Reg& reg = *it;
if (reg.isMMX()) {
code.movups(reg.cvt128(), ptr[rsp]);
code.add(rsp, 16);
} else {
code.mov(reg, ptr[rsp]);
code.add(rsp, 8);
}
}
preserved_regs.clear();
if (inst_stack_space > 0) {
code.add(rsp, inst_stack_space);
}
}
void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
ActiveIntervalList& active_intervals) {
const auto get_operand = [&](IR::Inst* inst) -> Address {
size_t current_sp = inst_stack_space;
inst_stack_space += 8;
switch (GetRegBytesOfType(IR::Value(inst))) {
case 1:
return byte[r11 + current_sp];
case 2:
return word[r11 + current_sp];
case 4:
return dword[r11 + current_sp];
case 8:
return qword[r11 + current_sp];
default:
UNREACHABLE_MSG("Unsupported register size: {}", GetRegBytesOfType(inst));
return ptr[r11 + current_sp];
}
};
auto spill_candidate = std::max_element(
active_intervals.begin(), active_intervals.end(),
[](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; });
if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) {
inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst);
} else {
Operands& operands = inst_to_operands[spill_candidate->inst];
OperandHolder op = operands[spill_candidate->component];
inst_to_operands[interval.inst][interval.component] =
op.IsXmm() ? op : ResizeRegToType(op.Reg(), interval.inst);
operands[spill_candidate->component] = get_operand(spill_candidate->inst);
*spill_candidate = interval;
}
}
void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& insts) {
IR::Inst* inst = interval.inst;
size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst));
interval.start = dist;
interval.end = dist;
const auto enlarge_interval = [&](IR::Inst* inst) {
size_t position = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst));
interval.start = std::min(interval.start, position);
interval.end = std::max(interval.end, position);
};
for (const auto& use : inst->Uses()) {
IR::Inst* target_inst = use.user;
if (use.user->GetOpcode() == IR::Opcode::Phi) {
// We assign the value at the end of the phi block
target_inst = &use.user->PhiBlock(use.operand)->back();
}
// If the user is in a loop and the instruction is not, we need to extend the interval
// to the end of the loop
u32 target_depth = inst->GetParent()->CondData().depth;
const auto* cond_data = &target_inst->GetParent()->CondData();
const IR::AbstractSyntaxNode* target_loop = nullptr;
while (cond_data && cond_data->depth > target_depth) {
if (cond_data->asl_node->type == IR::AbstractSyntaxNode::Type::Loop) {
target_loop = cond_data->asl_node;
}
cond_data = cond_data->parent;
}
if (target_loop) {
IR::Block* cont_block = target_loop->data.loop.continue_block;
target_inst = &cont_block->back();
ASSERT(target_inst->GetOpcode() == IR::Opcode::ConditionRef);
}
enlarge_interval(target_inst);
}
if (inst->GetOpcode() == IR::Opcode::Phi) {
for (size_t i = 0; i < inst->NumArgs(); i++) {
IR::Block* block = inst->PhiBlock(i);
enlarge_interval(&block->back());
phi_assignments[block].emplace_back(inst, inst->Arg(i));
}
// Extend to predecessors
// Phis in loop headers need to extend to the end of the loop
for (IR::Block* pred : inst->GetParent()->ImmPredecessors()) {
IR::Inst* last_inst = &pred->back();
if (last_inst->GetOpcode() == IR::Opcode::ConditionRef) {
enlarge_interval(last_inst);
}
}
}
}
// Rregister utilization:
// Instruction registers:
// General purpose registers: rcx, rdx, rsi, r8, r9, r10
// XMM registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
//
// Value / temporary registers:
// General purpose registers: rax (scratch), rbx, r12, r13, r14, r15
// XMM registers: xmm7 (scratch), xmm7 (scratch), xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
// xmm15
//
// r11: Stack pointer for spilled instructions
// rdi: User data pointer
// rsp: Stack pointer
//
// If instruction registers are never used, will be used as temporary registers
void EmitContext::AllocateRegisters() {
const std::array<Reg64, 6> initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10};
const std::array<Xmm, 7> initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6};
const std::array<Reg64, 6> initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15};
const std::array<Xmm, 9> initial_xmm_temp_regs = {xmm7, xmm8, xmm9, xmm10, xmm11,
xmm12, xmm13, xmm14, xmm15};
boost::container::small_vector<InstInterval, 64> intervals;
FlatInstList insts;
// We copy insts tot the flat list for faster iteration
for (IR::Block* block : program.blocks) {
insts.reserve(insts.size() + block->size());
for (IR::Inst& inst : *block) {
insts.push_back(&inst);
}
}
for (IR::Inst* inst : insts) {
if (inst->GetOpcode() == IR::Opcode::ConditionRef || inst->Type() == IR::Type::Void) {
continue;
}
intervals.emplace_back(inst, 0, 0);
AdjustInstInterval(intervals.back(), insts);
}
std::sort(intervals.begin(), intervals.end(),
[](const InstInterval& a, const InstInterval& b) { return a.start < b.start; });
RegAllocContext ctx;
ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_inst_regs.begin(),
initial_gp_inst_regs.end());
ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_inst_regs.begin(),
initial_xmm_inst_regs.end());
boost::container::static_vector<Reg64, 6> unused_gp_inst_regs;
boost::container::static_vector<Xmm, 7> unused_xmm_inst_regs;
unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(),
ctx.free_gp_regs.end());
unused_xmm_inst_regs.insert(unused_xmm_inst_regs.end(), ctx.free_xmm_regs.begin(),
ctx.free_xmm_regs.end());
for (const InstInterval& interval : intervals) {
// Free old interval resources
for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) {
if (it->end < interval.start) {
Reg64 reg = inst_to_operands[it->inst][it->component].Reg().cvt64();
ctx.free_gp_regs.push_back(reg);
it = ctx.active_gp_intervals.erase(it);
} else {
++it;
}
}
for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) {
if (it->end < interval.start) {
Xmm reg = inst_to_operands[it->inst][it->component].Xmm();
ctx.free_xmm_regs.push_back(reg);
it = ctx.active_xmm_intervals.erase(it);
} else {
++it;
}
}
u8 num_components = GetNumComponentsOfType(interval.inst);
bool is_floating = IsFloatingType(interval.inst);
auto& operands = inst_to_operands[interval.inst];
operands.resize(num_components);
if (is_floating) {
for (size_t i = 0; i < num_components; ++i) {
ActiveInstInterval active(interval, i);
if (!ctx.free_xmm_regs.empty()) {
Xmm& reg = ctx.free_xmm_regs.back();
ctx.free_xmm_regs.pop_back();
operands[active.component] = reg;
unused_xmm_inst_regs.erase(
std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg),
unused_xmm_inst_regs.end());
ctx.active_xmm_intervals.push_back(active);
} else {
SpillInst(ctx, active, ctx.active_xmm_intervals);
}
}
} else {
for (size_t i = 0; i < num_components; ++i) {
ActiveInstInterval active(interval, i);
if (!ctx.free_gp_regs.empty()) {
Reg64& reg = ctx.free_gp_regs.back();
ctx.free_gp_regs.pop_back();
operands[active.component] = ResizeRegToType(reg, active.inst);
unused_gp_inst_regs.erase(
std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg),
unused_gp_inst_regs.end());
ctx.active_gp_intervals.push_back(active);
} else {
SpillInst(ctx, active, ctx.active_gp_intervals);
}
}
}
}
temp_gp_regs.insert(temp_gp_regs.end(), unused_gp_inst_regs.begin(), unused_gp_inst_regs.end());
temp_xmm_regs.insert(temp_xmm_regs.end(), unused_xmm_inst_regs.begin(),
unused_xmm_inst_regs.end());
num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch
num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch
temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_temp_regs.begin(),
initial_gp_temp_regs.end());
temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_temp_regs.begin(),
initial_xmm_temp_regs.end());
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,199 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <boost/container/flat_map.hpp>
#include <boost/container/small_vector.hpp>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "shader_recompiler/ir/program.h"
namespace Shader::Backend::X64 {
class OperandHolder {
public:
OperandHolder() : op() {}
OperandHolder(const OperandHolder&) = default;
OperandHolder(OperandHolder&&) = default;
OperandHolder& operator=(const OperandHolder&) = default;
OperandHolder& operator=(OperandHolder&&) = default;
OperandHolder(const Xbyak::Reg& reg_) : reg(reg_) {}
OperandHolder(const Xbyak::Xmm& xmm_) : xmm(xmm_) {}
OperandHolder(const Xbyak::Address& mem_) : mem(mem_) {}
OperandHolder(const Xbyak::Operand& op_) : op(op_) {}
[[nodiscard]] inline Xbyak::Operand& Op() {
return op;
}
[[nodiscard]] inline const Xbyak::Operand& Op() const {
return op;
}
[[nodiscard]] inline Xbyak::Reg& Reg() {
ASSERT(IsReg());
return reg;
}
[[nodiscard]] inline const Xbyak::Reg& Reg() const {
ASSERT(IsReg());
return reg;
}
[[nodiscard]] inline Xbyak::Xmm& Xmm() {
ASSERT(IsXmm());
return xmm;
}
[[nodiscard]] inline const Xbyak::Xmm& Xmm() const {
ASSERT(IsXmm());
return xmm;
}
[[nodiscard]] inline Xbyak::Address& Mem() {
ASSERT(IsMem());
return mem;
}
[[nodiscard]] inline const Xbyak::Address& Mem() const {
ASSERT(IsMem());
return mem;
}
[[nodiscard]] inline bool IsReg() const {
return op.isREG();
}
[[nodiscard]] inline bool IsXmm() const {
return op.isXMM();
}
[[nodiscard]] inline bool IsMem() const {
return op.isMEM();
}
private:
union {
Xbyak::Operand op;
Xbyak::Reg reg;
Xbyak::Xmm xmm;
Xbyak::Address mem;
};
};
using Operands = boost::container::static_vector<OperandHolder, 4>;
class EmitContext {
public:
static constexpr size_t NumGPRegs = 16;
static constexpr size_t NumXmmRegs = 16;
using PhiAssignmentList = boost::container::small_vector<std::pair<IR::Inst*, IR::Value>, 4>;
EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_);
[[nodiscard]] Xbyak::CodeGenerator& Code() const {
return code;
}
[[nodiscard]] const IR::Program& Program() const {
return program;
}
[[nodiscard]] Xbyak::Label& EndLabel() {
return end_label;
}
[[nodiscard]] Xbyak::Label& BlockLabel(IR::Block* block) {
return block_labels.at(block);
}
void SetEndFlag() {
end_flag = true;
}
[[nodiscard]] bool EndFlag() {
bool flag = end_flag;
end_flag = false;
return flag;
}
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
void PopTempGPReg();
void PopTempXmmReg();
void ResetTempRegs();
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::rdi;}
[[nodiscard]] const Operands& Def(IR::Inst* inst);
[[nodiscard]] Operands Def(const IR::Value& value);
[[nodiscard]] std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
PhiAssignments(IR::Block* block) const;
void Prologue();
void Epilogue();
private:
struct InstInterval {
IR::Inst* inst;
size_t start;
size_t end;
};
struct ActiveInstInterval : InstInterval {
size_t component;
ActiveInstInterval(const InstInterval& interval, size_t component_)
: InstInterval(interval), component(component_) {}
};
using ActiveIntervalList = boost::container::small_vector<ActiveInstInterval, 8>;
struct RegAllocContext {
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> free_gp_regs;
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> free_xmm_regs;
ActiveIntervalList active_gp_intervals;
ActiveIntervalList active_xmm_intervals;
};
using FlatInstList = boost::container::small_vector<IR::Inst*, 64>;
const IR::Program& program;
Xbyak::CodeGenerator& code;
// Map of blocks to their phi assignments
boost::container::small_flat_map<IR::Block*, PhiAssignmentList, 8> phi_assignments;
// Map of instructions to their operands
boost::container::small_flat_map<IR::Inst*, Operands, 64> inst_to_operands;
// Space used for spilled instructions
size_t inst_stack_space = 0;
// Temporary register allocation
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> temp_gp_regs;
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> temp_xmm_regs;
size_t temp_gp_reg_index = 0;
size_t temp_xmm_reg_index = 0;
size_t num_scratch_gp_regs = 0;
size_t num_scratch_xmm_regs = 0;
// Preseved registers
boost::container::static_vector<Xbyak::Reg, NumGPRegs + NumXmmRegs> preserved_regs;
// Labels
boost::container::small_flat_map<IR::Block*, Xbyak::Label, 8> block_labels;
Xbyak::Label end_label;
// End flag, used to defer jump to end label
bool end_flag = false;
void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
ActiveIntervalList& active_intervals);
void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts);
void AllocateRegisters();
};
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,403 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
using namespace Xbyak;
using namespace Xbyak::util;
namespace Shader::Backend::X64 {
bool IsFloatingType(const IR::Value& value) {
// We store F16 on general purpose registers since we don't do
// arithmetic on them
IR::Type type = value.Type();
return type == IR::Type::F32 || type == IR::Type::F64;
}
size_t GetRegBytesOfType(const IR::Value& value) {
switch (value.Type()) {
case IR::Type::U1:
case IR::Type::U8:
return 1;
case IR::Type::U16:
case IR::Type::F16:
case IR::Type::F16x2:
case IR::Type::F16x3:
case IR::Type::F16x4:
return 2;
case IR::Type::U32:
case IR::Type::U32x2:
case IR::Type::U32x3:
case IR::Type::U32x4:
case IR::Type::F32:
case IR::Type::F32x2:
case IR::Type::F32x3:
case IR::Type::F32x4:
case IR::Type::ScalarReg:
case IR::Type::VectorReg:
return 4;
case IR::Type::U64:
case IR::Type::F64:
case IR::Type::F64x2:
case IR::Type::F64x3:
case IR::Type::F64x4:
case IR::Type::Attribute:
case IR::Type::Patch:
return 8;
default:
break;
}
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
return 0;
}
u8 GetNumComponentsOfType(const IR::Value& value) {
switch (value.Type()) {
case IR::Type::U1:
case IR::Type::U8:
case IR::Type::U16:
case IR::Type::F16:
case IR::Type::U32:
case IR::Type::F32:
case IR::Type::U64:
case IR::Type::F64:
case IR::Type::ScalarReg:
case IR::Type::VectorReg:
case IR::Type::Attribute:
case IR::Type::Patch:
return 1;
case IR::Type::U32x2:
case IR::Type::F32x2:
case IR::Type::F16x2:
case IR::Type::F64x2:
return 2;
case IR::Type::U32x3:
case IR::Type::F32x3:
case IR::Type::F16x3:
case IR::Type::F64x3:
return 3;
case IR::Type::U32x4:
case IR::Type::F32x4:
case IR::Type::F16x4:
case IR::Type::F64x4:
return 4;
default:
break;
}
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
return 0;
}
Reg ResizeRegToType(const Reg& reg, const IR::Value& value) {
ASSERT(reg.getKind() == Operand::Kind::REG);
switch (GetRegBytesOfType(value)) {
case 1:
return reg.cvt8();
case 2:
return reg.cvt16();
case 4:
return reg.cvt32();
case 8:
return reg.cvt64();
default:
break;
}
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
return reg;
}
void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
CodeGenerator& c = ctx.Code();
if (src.Op() == dst.Op()) {
return;
}
if (src.IsMem() && dst.IsMem()) {
Reg tmp = ctx.TempGPReg(false).cvt32();
c.mov(tmp, src.Mem());
c.mov(dst.Mem(), tmp);
} else if (src.IsMem() && dst.IsXmm()) {
c.movss(dst.Xmm(), src.Mem());
} else if (src.IsXmm() && dst.IsMem()) {
c.movss(dst.Mem(), src.Xmm());
} else if (src.IsXmm() && dst.IsXmm()) {
c.movaps(dst.Xmm(), src.Xmm());
} else {
UNREACHABLE_MSG("Unsupported mov float {} {}", src.Op().toString(), dst.Op().toString());
}
}
void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
CodeGenerator& c = ctx.Code();
if (src.Op() == dst.Op()) {
return;
}
if (src.IsMem() && dst.IsMem()) {
const Reg64& tmp = ctx.TempGPReg(false);
c.mov(tmp, src.Mem());
c.mov(dst.Mem(), tmp);
} else if (src.IsMem() && dst.IsXmm()) {
c.movsd(dst.Xmm(), src.Mem());
} else if (src.IsXmm() && dst.IsMem()) {
c.movsd(dst.Mem(), src.Xmm());
} else if (src.IsXmm() && dst.IsXmm()) {
c.movapd(dst.Xmm(), src.Xmm());
} else {
UNREACHABLE_MSG("Unsupported mov double {} {}", src.Op().toString(), dst.Op().toString());
}
}
void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
CodeGenerator& c = ctx.Code();
if (src.Op() == dst.Op()) {
return;
}
const bool is_mem2mem = src.IsMem() && dst.IsMem();
const u32 src_bit = src.Op().getBit();
const u32 dst_bit = dst.Op().getBit();
OperandHolder tmp = is_mem2mem ? ctx.TempGPReg(false).changeBit(dst_bit) : dst;
if (src_bit < dst_bit) {
if (!tmp.IsMem() && !src.Op().isBit(32)) {
c.movzx(tmp.Reg(), src.Op());
} else if (tmp.IsMem()) {
Address addr = tmp.Mem();
c.mov(addr, 0);
addr.setBit(dst_bit);
c.mov(addr, src.Reg());
} else {
c.mov(tmp.Reg().cvt32(), src.Op());
}
} else if (src_bit > dst_bit) {
OperandHolder src_tmp = src;
src_tmp.Op().setBit(dst_bit);
c.mov(tmp.Op(), src_tmp.Op());
} else {
c.mov(tmp.Op(), src.Op());
}
if (is_mem2mem) {
c.mov(dst.Op(), tmp.Op());
}
}
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
if (!src.IsImmediate()) {
IR::Inst* src_inst = src.InstRecursive();
const Operands& src_op = ctx.Def(src_inst);
if (IsFloatingType(src)) {
switch (GetRegBytesOfType(src)) {
case 32:
for (size_t i = 0; i < src_op.size(); i++) {
MovFloat(ctx, dst[i], src_op[i]);
}
break;
case 64:
for (size_t i = 0; i < src_op.size(); i++) {
MovDouble(ctx, dst[i], src_op[i]);
}
break;
default:
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
break;
}
} else {
for (size_t i = 0; i < src_op.size(); i++) {
MovGP(ctx, dst[i], src_op[i]);
}
}
} else {
CodeGenerator& c = ctx.Code();
const bool is_mem = dst[0].IsMem();
Reg64& tmp = ctx.TempGPReg(false);
switch (src.Type()) {
case IR::Type::U1:
c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U1());
break;
case IR::Type::U8:
c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U8());
break;
case IR::Type::U16:
c.mov(is_mem ? tmp.cvt16() : dst[0].Reg(), src.U16());
break;
case IR::Type::U32:
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), src.U32());
break;
case IR::Type::F32:
c.mov(tmp.cvt32(), static_cast<u32>(src.F32()));
if (!is_mem) {
c.movd(dst[0].Xmm(), tmp.cvt32());
return;
}
break;
case IR::Type::U64:
c.mov(is_mem ? tmp : dst[0].Reg(), src.U64());
break;
case IR::Type::F64:
c.mov(tmp, static_cast<u64>(src.F64()));
if (!is_mem) {
c.movq(dst[0].Xmm(), tmp);
return;
}
break;
case IR::Type::ScalarReg:
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast<u32>(src.ScalarReg()));
break;
case IR::Type::VectorReg:
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast<u32>(src.VectorReg()));
break;
case IR::Type::Attribute:
c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast<u64>(src.Attribute()));
break;
case IR::Type::Patch:
c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast<u64>(src.Patch()));
break;
default:
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
break;
}
if (is_mem) {
c.mov(dst[0].Mem(), tmp);
}
}
}
void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = ctx.TempGPReg().cvt32();
c.movzx(mantissa, src);
// Extract sign, exponent, and mantissa
c.mov(sign, mantissa);
c.and_(sign, 0x8000);
c.shl(sign, 16);
c.mov(exponent, mantissa);
c.and_(exponent, 0x7C00);
c.shr(exponent, 10);
c.and_(mantissa, 0x03FF);
// Check for zero exponent and mantissa
c.test(exponent, exponent);
c.jnz(nonzero_exp);
c.test(mantissa, mantissa);
c.jz(zero_mantissa);
// Nromalize subnormal number
c.mov(exponent, 1);
c.L(norm_loop);
c.test(mantissa, 0x400);
c.jnz(norm_done);
c.shl(mantissa, 1);
c.dec(exponent);
c.jmp(norm_loop);
c.L(norm_done);
c.and_(mantissa, 0x03FF);
c.jmp(normal);
// Zero mantissa
c.L(zero_mantissa);
c.and_(mantissa, sign);
c.jmp(done);
// Non-zero exponent
c.L(nonzero_exp);
c.cmp(exponent, 0x1F);
c.jne(normal);
// Infinite or NaN
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, 0x7F800000);
c.jmp(done);
// Normal number
c.L(normal);
c.add(exponent, 112);
c.shl(exponent, 23);
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, exponent);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.movd(dest.getReg().cvt128(), mantissa);
}
ctx.PopTempGPReg();
ctx.PopTempGPReg();
ctx.PopTempGPReg();
}
void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label zero_exp, underflow, overflow, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
if (src.isMEM()) {
c.mov(mantissa, src);
} else {
c.movd(mantissa, src.getReg().cvt128());
}
// Extract sign, exponent, and mantissa
c.mov(exponent, mantissa);
c.mov(sign, mantissa);
c.and_(exponent, 0x7F800000);
c.and_(mantissa, 0x007FFFFF);
c.shr(exponent, 23);
c.shl(mantissa, 3);
c.shr(sign, 16);
c.and_(sign, 0x8000);
// Subnormal numbers will be zero
c.test(exponent, exponent);
c.jz(zero_exp);
// Check for overflow and underflow
c.sub(exponent, 112);
c.cmp(exponent, 0);
c.jle(underflow);
c.cmp(exponent, 0x1F);
c.jge(overflow);
// Normal number
c.shl(exponent, 10);
c.shr(mantissa, 13);
c.or_(mantissa, exponent);
c.or_(mantissa, sign);
c.jmp(done);
// Undeflow
c.L(underflow);
c.xor_(mantissa, mantissa);
c.jmp(done);
// Overflow
c.L(overflow);
c.mov(mantissa, 0x7C00);
c.or_(mantissa, sign);
c.jmp(done);
// Zero value
c.L(zero_exp);
c.and_(mantissa, sign);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.and_(mantissa, 0xFFFF);
}
ctx.PopTempGPReg();
ctx.PopTempGPReg();
ctx.PopTempGPReg();
}
} // namespace Shader::Backend::X64

View file

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/ir/type.h"
namespace Shader::Backend::X64 {
bool IsFloatingType(const IR::Value& value);
size_t GetRegBytesOfType(const IR::Value& value);
u8 GetNumComponentsOfType(const IR::Value& value);
Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, const IR::Value& value);
void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
inline bool IsFloatingType(IR::Inst* inst) {
return IsFloatingType(IR::Value(inst));
}
inline size_t GetRegBytesOfType(IR::Inst* inst) {
return GetRegBytesOfType(IR::Value(inst));
}
inline u8 GetNumComponentsOfType(IR::Inst* inst) {
return GetNumComponentsOfType(IR::Value(inst));
}
inline Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Inst* inst) {
return ResizeRegToType(reg, IR::Value(inst));
}
} // namespace Shader::Backend::X64

View file

@ -128,6 +128,10 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
return ud_reg;
}
void EmitSetUserData(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
@ -163,13 +167,10 @@ void EmitGetGotoVariable(EmitContext&) {
using BufferAlias = EmitContext::BufferAlias;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
const auto& srt_flatbuf = ctx.buffers.back();
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const Id ptr{
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))};
return ctx.OpLoad(ctx.U32[1], ptr);
}

View file

@ -52,6 +52,7 @@ void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitSetUserData(EmitContext& ctx);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);
void EmitGetScalarRegister(EmitContext& ctx);

View file

@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return smrd.offset;
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
return ir.Imm32(inst.src[1].code);
}
UNREACHABLE();
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index));
}
}

View file

@ -255,8 +255,9 @@ struct Info {
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
if (srt_info.walker_func) {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
srt_info.walker_func(flattened_ud_buf.data());
}
}
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {

View file

@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "abstract_syntax_list.h"
namespace Shader::IR {
std::string DumpASLNode(const AbstractSyntaxNode& node,
const std::map<const Block*, size_t>& block_to_index,
const std::map<const Inst*, size_t>& inst_to_index) {
switch (node.type) {
case AbstractSyntaxNode::Type::Block:
return fmt::format("Block: ${}", block_to_index.at(node.data.block));
case AbstractSyntaxNode::Type::If:
return fmt::format("If: cond = %{}, body = ${}, merge = ${}",
inst_to_index.at(node.data.if_node.cond.Inst()),
block_to_index.at(node.data.if_node.body),
block_to_index.at(node.data.if_node.merge));
case AbstractSyntaxNode::Type::EndIf:
return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge));
case AbstractSyntaxNode::Type::Loop:
return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}",
block_to_index.at(node.data.loop.body),
block_to_index.at(node.data.loop.continue_block),
block_to_index.at(node.data.loop.merge));
case AbstractSyntaxNode::Type::Repeat:
return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}",
inst_to_index.at(node.data.repeat.cond.Inst()),
block_to_index.at(node.data.repeat.loop_header),
block_to_index.at(node.data.repeat.merge));
case AbstractSyntaxNode::Type::Break:
return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}",
inst_to_index.at(node.data.break_node.cond.Inst()),
block_to_index.at(node.data.break_node.merge),
block_to_index.at(node.data.break_node.skip));
case AbstractSyntaxNode::Type::Return:
return "Return";
case AbstractSyntaxNode::Type::Unreachable:
return "Unreachable";
};
UNREACHABLE();
}
} // namespace Shader::IR

View file

@ -3,6 +3,7 @@
#pragma once
#include <map>
#include <vector>
#include "shader_recompiler/ir/value.h"
@ -53,4 +54,8 @@ struct AbstractSyntaxNode {
};
using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
std::string DumpASLNode(const AbstractSyntaxNode& node,
const std::map<const Block*, size_t>& block_to_index,
const std::map<const Inst*, size_t>& inst_to_index);
} // namespace Shader::IR

View file

@ -23,6 +23,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base
return instructions.insert(insertion_point, *inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};
inst->SetParent(this);
return instructions.insert(insertion_point, *inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};

View file

@ -11,6 +11,7 @@
#include "common/object_pool.h"
#include "common/types.h"
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
@ -18,6 +19,12 @@ namespace Shader::IR {
class Block {
public:
struct ConditionalData {
u32 depth;
const ConditionalData* parent;
const AbstractSyntaxNode* asl_node;
};
using InstructionList = boost::intrusive::list<Inst>;
using size_type = InstructionList::size_type;
using iterator = InstructionList::iterator;
@ -40,6 +47,9 @@ public:
/// Prepends a copy of an instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
/// Prepends a new instruction to this basic block before the insertion point (without args).
iterator PrependNewInst(iterator insertion_point, Opcode op, u32 flags);
/// Prepends a new instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args = {}, u32 flags = 0);
@ -64,6 +74,24 @@ public:
[[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
return imm_successors;
}
// Returns if the block has a given immediate predecessor.
[[nodiscard]] bool HasImmPredecessor(const Block* block) const noexcept {
return std::ranges::find(imm_predecessors, block) != imm_predecessors.end();
}
// Returns if the block has a given immediate successor.
[[nodiscard]] bool HasImmSuccessor(const Block* block) const noexcept {
return std::ranges::find(imm_successors, block) != imm_successors.end();
}
// Set the conditional data for this block.
void SetConditionalData(const ConditionalData& data) {
cond_data = data;
}
// Get the conditional data for this block.
[[nodiscard]] const ConditionalData& CondData() const {
return cond_data;
}
/// Intrusively store the host definition of this instruction.
template <typename T>
@ -164,6 +192,9 @@ private:
/// Block immediate successors
std::vector<Block*> imm_successors;
// Conditional data
Block::ConditionalData cond_data;
/// Intrusively store if the block is sealed in the SSA pass.
bool is_ssa_sealed{false};

View file

@ -0,0 +1,94 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/cartesian_invoke.h"
#include "common/func_traits.h"
#include "shader_recompiler/ir/compute_value/compute.h"
#include "shader_recompiler/ir/compute_value/do_bitcast.h"
#include "shader_recompiler/ir/compute_value/do_composite.h"
#include "shader_recompiler/ir/compute_value/do_convert.h"
#include "shader_recompiler/ir/compute_value/do_float_operations.h"
#include "shader_recompiler/ir/compute_value/do_integer_operations.h"
#include "shader_recompiler/ir/compute_value/do_logical_operations.h"
#include "shader_recompiler/ir/compute_value/do_nop_functions.h"
#include "shader_recompiler/ir/compute_value/do_packing.h"
namespace Shader::IR::ComputeValue {
template <auto func, size_t... I>
static void Invoke(ImmValueList& inst_values, const std::array<ImmValueList, sizeof...(I)>& args,
std::index_sequence<I...>) {
func(inst_values, args[I]...);
}
template <auto func>
static void Invoke(Inst* inst, ImmValueList& inst_values, Cache& cache) {
using Traits = Common::FuncTraits<decltype(func)>;
constexpr size_t num_args = Traits::NUM_ARGS - 1;
ASSERT(inst->NumArgs() >= num_args);
std::array<ImmValueList, num_args> args{};
for (size_t i = 0; i < num_args; ++i) {
Compute(inst->Arg(i), args[i], cache);
}
Invoke<func>(inst_values, args, std::make_index_sequence<num_args>{});
}
static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, Cache& cache) {
switch (inst->GetOpcode()) {
#define OPCODE(name, result_type, ...) \
case Opcode::name: \
Invoke<&Do##name>(inst, inst_values, cache); \
break;
#include "shader_recompiler/ir/opcodes.inc"
#undef OPCODE
default:
UNREACHABLE_MSG("Invalid opcode: {}", inst->GetOpcode());
}
}
static bool IsSelectInst(Inst* inst) {
switch (inst->GetOpcode()) {
case Opcode::SelectU1:
case Opcode::SelectU8:
case Opcode::SelectU16:
case Opcode::SelectU32:
case Opcode::SelectU64:
case Opcode::SelectF32:
case Opcode::SelectF64:
return true;
default:
return false;
}
}
void Compute(const Value& value, ImmValueList& values, Cache& cache) {
Value resolved = value.Resolve();
if (ImmValue::IsSupportedValue(resolved)) {
values.insert(ImmValue(resolved));
return;
}
if (resolved.IsImmediate()) {
return;
}
Inst* inst = resolved.InstRecursive();
auto it = cache.find(inst);
if (it != cache.end()) {
values.insert(it->second.begin(), it->second.end());
return;
}
auto& inst_values = cache.emplace(inst, ImmValueList{}).first->second;
if (inst->GetOpcode() == Opcode::Phi) {
for (size_t i = 0; i < inst->NumArgs(); ++i) {
Compute(inst->Arg(i), inst_values, cache);
}
} else if (IsSelectInst(inst)) {
Compute(inst->Arg(1), inst_values, cache);
Compute(inst->Arg(2), inst_values, cache);
} else {
DoInstructionOperation(inst, inst_values, cache);
}
values.insert(inst_values.begin(), inst_values.end());
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <unordered_map>
#include <unordered_set>
#include "shader_recompiler/ir/compute_value/imm_value.h"
#include "shader_recompiler/ir/value.h"
// Given a value (inmediate or not), compute all the possible inmediate values
// that can represent. If the value can't be computed statically, the list will
// be empty.
namespace Shader::IR::ComputeValue {
using ImmValueList = std::unordered_set<ImmValue>;
using Cache = std::unordered_map<Inst*, ImmValueList>;
void Compute(const Value& value, ImmValueList& values, Cache& cache);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/compute_value/do_bitcast.h"
namespace Shader::IR::ComputeValue {
void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values) {
inst_values.insert(src_values.begin(), src_values.end());
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values);
void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values);
void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values);
void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values);
void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values);
void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,330 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/compute_value/do_composite.h"
namespace Shader::IR::ComputeValue {
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
const auto op = [](const ImmValue& a, const ImmValue& b) { return ImmValue(a, b); };
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
}
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2) {
const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) {
return ImmValue(a, b, c);
};
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1,
arg2);
}
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3) {
const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c, const ImmValue& d) {
return ImmValue(a, b, c, d);
};
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1,
arg2, arg3);
}
void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
CommonCompositeConstruct(inst_values, arg0, arg1);
}
void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
}
void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
}
void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
Common::CartesianInvoke(ImmValue::CompositeFrom2x2,
std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
}
void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
CommonCompositeConstruct(inst_values, arg0, arg1);
}
void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
}
void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
}
void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
Common::CartesianInvoke(ImmValue::CompositeFrom2x2,
std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
}
void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
CommonCompositeConstruct(inst_values, arg0, arg1);
}
void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
}
void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
}
void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1) {
CommonCompositeConstruct(inst_values, arg0, arg1);
}
void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
}
void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3) {
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
}
void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Extract,
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
}
void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx) {
Common::CartesianInvoke(ImmValue::Insert,
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
}
void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2) {
UNREACHABLE_MSG("Unimplemented");
}
void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3) {
UNREACHABLE_MSG("Unimplemented");
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,134 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2);
void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3);
void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1);
void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2);
void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3);
void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2);
void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3);
void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1);
void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2);
void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3);
void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2);
void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3);
void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1);
void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2);
void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3);
void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1);
void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2);
void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0,
const ImmValueList& arg1, const ImmValueList& arg2,
const ImmValueList& arg3);
void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& idx);
void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec,
const ImmValueList& val, const ImmValueList& idx);
void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1);
void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2);
void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0,
const ImmValueList& vec1, const ImmValueList& idx0,
const ImmValueList& idx1, const ImmValueList& idx2,
const ImmValueList& idx3);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,81 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/compute_value/do_convert.h"
namespace Shader::IR::ComputeValue {
void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::U32, true, Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::U32, true, Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::U32, false, Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args) {
// Common::CartesianInvoke(ImmValue::Convert<Type::F16, true, Type::F32, true>,
// std::insert_iterator(inst_values, inst_values.begin()), args);
UNREACHABLE_MSG("F32 to F16 conversion is not implemented");
}
void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args) {
// Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::F16, true>,
// std::insert_iterator(inst_values, inst_values.begin()), args);
UNREACHABLE_MSG("F16 to F32 conversion is not implemented");
}
void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U16, false>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::U16, false, Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Convert<Type::U32, false, Type::U16, false>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args);
void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,278 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/compute_value/do_float_operations.h"
namespace Shader::IR::ComputeValue {
void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Abs<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Abs<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Add<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Add<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Sub<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::Fma<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
args2);
}
void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::Fma<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
args2);
}
void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args_legacy) {
const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) {
if (legacy.U1()) {
if (ImmValue::IsNan<Type::F32>(a))
return b;
if (ImmValue::IsNan<Type::F32>(b))
return a;
}
return ImmValue::Max<Type::F32, true>(a, b);
};
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0,
args1, args_legacy);
}
void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Max<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args_legacy) {
const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) {
if (legacy.U1()) {
if (ImmValue::IsNan<Type::F64>(a))
return b;
if (ImmValue::IsNan<Type::F64>(b))
return a;
}
return ImmValue::Min<Type::F32, true>(a, b);
};
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0,
args1, args_legacy);
}
void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Min<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MaxTri<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
args2);
}
void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MinTri<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
args2);
}
void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MedTri<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
args2);
}
void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mul<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mul<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Div<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Div<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Neg<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Neg<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Recip<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Recip<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Rsqrt<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Rsqrt<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Sqrt<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPSin(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Sin<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Exp2<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents) {
Common::CartesianInvoke(ImmValue::Ldexp<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args,
exponents);
}
void DoFPCos(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Cos<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Log2<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPSaturate32 not implemented");
}
void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPSaturate64 not implemented");
}
void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
const ImmValueList& maxs) {
Common::CartesianInvoke(ImmValue::Clamp<Type::F32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args, mins,
maxs);
}
void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
const ImmValueList& maxs) {
Common::CartesianInvoke(ImmValue::Clamp<Type::F64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args, mins,
maxs);
}
void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Round<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Round<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Floor<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Floor<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Ceil<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Ceil<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Trunc<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Trunc<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Fract<Type::F32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Fract<Type::F64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPFrexpSig32 not implemented");
}
void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPFrexpSig64 not implemented");
}
void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPFrexpExp32 not implemented");
}
void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args) {
UNREACHABLE_MSG("FPFrexpExp64 not implemented");
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,68 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2);
void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2);
void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args_legacy);
void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args_legacy);
void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2);
void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2);
void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2);
void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args);
void DoFPSin(ImmValueList& inst_values, const ImmValueList& args);
void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args);
void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents);
void DoFPCos(ImmValueList& inst_values, const ImmValueList& args);
void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args);
void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
const ImmValueList& maxs);
void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
const ImmValueList& maxs);
void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args);
void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,272 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/compute_value/do_integer_operations.h"
namespace Shader::IR::ComputeValue {
void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Add<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Add<Type::U64, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::AddCarry<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Sub<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Sub<Type::U64, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mul<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mul<Type::U64, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
UNREACHABLE_MSG("SMulExt not implemented");
}
void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
UNREACHABLE_MSG("UMulExt not implemented");
}
void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Div<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Div<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mod<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Mod<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoINeg32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Neg<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoINeg64(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Neg<Type::U64>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args) {
Common::CartesianInvoke(ImmValue::Abs<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args);
}
void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::LShift<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::LShift<Type::U64>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::RShift<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::RShift<Type::U64, false>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::RShift<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift) {
Common::CartesianInvoke(ImmValue::RShift<Type::U64, true>,
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
}
void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::And<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::And<Type::U64>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Or<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Or<Type::U64>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Xor<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& insert, const ImmValueList& offset,
const ImmValueList& count) {
UNREACHABLE_MSG("BitFieldInsert not implemented");
}
void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& offset, const ImmValueList& count) {
UNREACHABLE_MSG("BitFieldSExtract not implemented");
}
void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& offset, const ImmValueList& count) {
UNREACHABLE_MSG("BitFieldUExtract not implemented");
}
void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("BitReverse32 not implemented");
}
void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("BitCount32 not implemented");
}
void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("BitCount64 not implemented");
}
void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg) {
Common::CartesianInvoke(ImmValue::Not<Type::U32>,
std::insert_iterator(inst_values, inst_values.begin()), arg);
}
void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("FindSMsb32 not implemented");
}
void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("FindUMsb32 not implemented");
}
void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("FindILsb32 not implemented");
}
void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg) {
UNREACHABLE_MSG("FindILsb64 not implemented");
}
void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Min<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Min<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Max<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
Common::CartesianInvoke(ImmValue::Max<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
}
void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MinTri<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MinTri<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MaxTri<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MaxTri<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MedTri<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
const ImmValueList& args2) {
Common::CartesianInvoke(ImmValue::MedTri<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
}
void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
const ImmValueList& max) {
Common::CartesianInvoke(ImmValue::Clamp<Type::U32, true>,
std::insert_iterator(inst_values, inst_values.begin()), value, min,
max);
}
void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
const ImmValueList& max) {
Common::CartesianInvoke(ImmValue::Clamp<Type::U32, false>,
std::insert_iterator(inst_values, inst_values.begin()), value, min,
max);
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoINeg32(ImmValueList& inst_values, const ImmValueList& args);
void DoINeg64(ImmValueList& inst_values, const ImmValueList& args);
void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args);
void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args,
const ImmValueList& shift);
void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1);
void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1);
void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0,
const ImmValueList& args1);
void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& insert, const ImmValueList& offset,
const ImmValueList& count);
void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& offset, const ImmValueList& count);
void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg,
const ImmValueList& offset, const ImmValueList& count);
void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg);
void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg);
void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg);
void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg);
void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg);
void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg);
void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg);
void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg);
void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
const ImmValueList& max);
void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
const ImmValueList& max);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/compute_value/do_logical_operations.h"
namespace Shader::IR::ComputeValue {
void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
Common::CartesianInvoke(ImmValue::Or<Type::U1>,
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
}
void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
Common::CartesianInvoke(ImmValue::And<Type::U1>,
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
}
void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
Common::CartesianInvoke(ImmValue::Xor<Type::U1>,
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
}
void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1) {
Common::CartesianInvoke(ImmValue::Not<Type::U1>,
std::insert_iterator(inst_values, inst_values.begin()), arg1);
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1);
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,212 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
namespace Shader::IR::ComputeValue {
#define NOP_FUNCTION(name) \
inline void Do##name(ImmValueList& inst_values) {}
NOP_FUNCTION(Phi)
NOP_FUNCTION(Identity)
NOP_FUNCTION(Void)
NOP_FUNCTION(ConditionRef)
NOP_FUNCTION(Reference)
NOP_FUNCTION(PhiMove)
NOP_FUNCTION(Prologue)
NOP_FUNCTION(Epilogue)
NOP_FUNCTION(Discard)
NOP_FUNCTION(DiscardCond)
NOP_FUNCTION(DebugPrint)
NOP_FUNCTION(ReadConst)
NOP_FUNCTION(ReadConstBuffer)
NOP_FUNCTION(Barrier)
NOP_FUNCTION(WorkgroupMemoryBarrier)
NOP_FUNCTION(DeviceMemoryBarrier)
NOP_FUNCTION(EmitVertex)
NOP_FUNCTION(EmitPrimitive)
NOP_FUNCTION(LoadSharedU32)
NOP_FUNCTION(LoadSharedU64)
NOP_FUNCTION(WriteSharedU32)
NOP_FUNCTION(WriteSharedU64)
NOP_FUNCTION(SharedAtomicIAdd32)
NOP_FUNCTION(SharedAtomicSMin32)
NOP_FUNCTION(SharedAtomicUMin32)
NOP_FUNCTION(SharedAtomicSMax32)
NOP_FUNCTION(SharedAtomicUMax32)
NOP_FUNCTION(SharedAtomicAnd32)
NOP_FUNCTION(SharedAtomicOr32)
NOP_FUNCTION(SharedAtomicXor32)
NOP_FUNCTION(GetUserData)
NOP_FUNCTION(SetUserData)
NOP_FUNCTION(GetThreadBitScalarReg)
NOP_FUNCTION(SetThreadBitScalarReg)
NOP_FUNCTION(GetScalarRegister)
NOP_FUNCTION(SetScalarRegister)
NOP_FUNCTION(GetVectorRegister)
NOP_FUNCTION(SetVectorRegister)
NOP_FUNCTION(GetGotoVariable)
NOP_FUNCTION(SetGotoVariable)
NOP_FUNCTION(GetAttribute)
NOP_FUNCTION(GetAttributeU32)
NOP_FUNCTION(SetAttribute)
NOP_FUNCTION(GetPatch)
NOP_FUNCTION(SetPatch)
NOP_FUNCTION(GetTessGenericAttribute)
NOP_FUNCTION(SetTcsGenericAttribute)
NOP_FUNCTION(ReadTcsGenericOuputAttribute)
NOP_FUNCTION(GetScc)
NOP_FUNCTION(GetExec)
NOP_FUNCTION(GetVcc)
NOP_FUNCTION(GetVccLo)
NOP_FUNCTION(GetVccHi)
NOP_FUNCTION(GetM0)
NOP_FUNCTION(SetScc)
NOP_FUNCTION(SetExec)
NOP_FUNCTION(SetVcc)
NOP_FUNCTION(SetSccLo)
NOP_FUNCTION(SetVccLo)
NOP_FUNCTION(SetVccHi)
NOP_FUNCTION(SetM0)
NOP_FUNCTION(UndefU1)
NOP_FUNCTION(UndefU8)
NOP_FUNCTION(UndefU16)
NOP_FUNCTION(UndefU32)
NOP_FUNCTION(UndefU64)
NOP_FUNCTION(LoadBufferU8)
NOP_FUNCTION(LoadBufferU16)
NOP_FUNCTION(LoadBufferU32)
NOP_FUNCTION(LoadBufferU32x2)
NOP_FUNCTION(LoadBufferU32x3)
NOP_FUNCTION(LoadBufferU32x4)
NOP_FUNCTION(LoadBufferF32)
NOP_FUNCTION(LoadBufferF32x2)
NOP_FUNCTION(LoadBufferF32x3)
NOP_FUNCTION(LoadBufferF32x4)
NOP_FUNCTION(LoadBufferFormatF32)
NOP_FUNCTION(StoreBufferU8)
NOP_FUNCTION(StoreBufferU16)
NOP_FUNCTION(StoreBufferU32)
NOP_FUNCTION(StoreBufferU32x2)
NOP_FUNCTION(StoreBufferU32x3)
NOP_FUNCTION(StoreBufferU32x4)
NOP_FUNCTION(StoreBufferF32)
NOP_FUNCTION(StoreBufferF32x2)
NOP_FUNCTION(StoreBufferF32x3)
NOP_FUNCTION(StoreBufferF32x4)
NOP_FUNCTION(StoreBufferFormatF32)
NOP_FUNCTION(BufferAtomicIAdd32)
NOP_FUNCTION(BufferAtomicSMin32)
NOP_FUNCTION(BufferAtomicUMin32)
NOP_FUNCTION(BufferAtomicSMax32)
NOP_FUNCTION(BufferAtomicUMax32)
NOP_FUNCTION(BufferAtomicInc32)
NOP_FUNCTION(BufferAtomicDec32)
NOP_FUNCTION(BufferAtomicAnd32)
NOP_FUNCTION(BufferAtomicOr32)
NOP_FUNCTION(BufferAtomicXor32)
NOP_FUNCTION(BufferAtomicSwap32)
// Select instructions are handled separately
NOP_FUNCTION(SelectU1)
NOP_FUNCTION(SelectU8)
NOP_FUNCTION(SelectU16)
NOP_FUNCTION(SelectU32)
NOP_FUNCTION(SelectU64)
NOP_FUNCTION(SelectF32)
NOP_FUNCTION(SelectF64)
NOP_FUNCTION(FPOrdEqual32)
NOP_FUNCTION(FPOrdEqual64)
NOP_FUNCTION(FPUnordEqual32)
NOP_FUNCTION(FPUnordEqual64)
NOP_FUNCTION(FPOrdNotEqual32)
NOP_FUNCTION(FPOrdNotEqual64)
NOP_FUNCTION(FPUnordNotEqual32)
NOP_FUNCTION(FPUnordNotEqual64)
NOP_FUNCTION(FPOrdLessThan32)
NOP_FUNCTION(FPOrdLessThan64)
NOP_FUNCTION(FPUnordLessThan32)
NOP_FUNCTION(FPUnordLessThan64)
NOP_FUNCTION(FPOrdGreaterThan32)
NOP_FUNCTION(FPOrdGreaterThan64)
NOP_FUNCTION(FPUnordGreaterThan32)
NOP_FUNCTION(FPUnordGreaterThan64)
NOP_FUNCTION(FPOrdLessThanEqual32)
NOP_FUNCTION(FPOrdLessThanEqual64)
NOP_FUNCTION(FPUnordLessThanEqual32)
NOP_FUNCTION(FPUnordLessThanEqual64)
NOP_FUNCTION(FPOrdGreaterThanEqual32)
NOP_FUNCTION(FPOrdGreaterThanEqual64)
NOP_FUNCTION(FPUnordGreaterThanEqual32)
NOP_FUNCTION(FPUnordGreaterThanEqual64)
NOP_FUNCTION(FPIsNan32)
NOP_FUNCTION(FPIsNan64)
NOP_FUNCTION(FPIsInf32)
NOP_FUNCTION(FPIsInf64)
NOP_FUNCTION(FPCmpClass32)
NOP_FUNCTION(SLessThan32)
NOP_FUNCTION(SLessThan64)
NOP_FUNCTION(ULessThan32)
NOP_FUNCTION(ULessThan64)
NOP_FUNCTION(IEqual32)
NOP_FUNCTION(IEqual64)
NOP_FUNCTION(SLessThanEqual)
NOP_FUNCTION(ULessThanEqual)
NOP_FUNCTION(SGreaterThan)
NOP_FUNCTION(UGreaterThan)
NOP_FUNCTION(INotEqual32)
NOP_FUNCTION(INotEqual64)
NOP_FUNCTION(SGreaterThanEqual)
NOP_FUNCTION(UGreaterThanEqual)
NOP_FUNCTION(ImageSampleRaw)
NOP_FUNCTION(ImageSampleImplicitLod)
NOP_FUNCTION(ImageSampleExplicitLod)
NOP_FUNCTION(ImageSampleDrefImplicitLod)
NOP_FUNCTION(ImageSampleDrefExplicitLod)
NOP_FUNCTION(ImageGather)
NOP_FUNCTION(ImageGatherDref)
NOP_FUNCTION(ImageQueryDimensions)
NOP_FUNCTION(ImageQueryLod)
NOP_FUNCTION(ImageGradient)
NOP_FUNCTION(ImageRead)
NOP_FUNCTION(ImageWrite)
NOP_FUNCTION(ImageAtomicIAdd32)
NOP_FUNCTION(ImageAtomicSMin32)
NOP_FUNCTION(ImageAtomicUMin32)
NOP_FUNCTION(ImageAtomicSMax32)
NOP_FUNCTION(ImageAtomicUMax32)
NOP_FUNCTION(ImageAtomicInc32)
NOP_FUNCTION(ImageAtomicDec32)
NOP_FUNCTION(ImageAtomicAnd32)
NOP_FUNCTION(ImageAtomicOr32)
NOP_FUNCTION(ImageAtomicXor32)
NOP_FUNCTION(ImageAtomicExchange32)
NOP_FUNCTION(CubeFaceIndex)
NOP_FUNCTION(LaneId)
NOP_FUNCTION(WarpId)
NOP_FUNCTION(QuadShuffle)
NOP_FUNCTION(ReadFirstLane)
NOP_FUNCTION(ReadLane)
NOP_FUNCTION(WriteLane)
NOP_FUNCTION(DataAppend)
NOP_FUNCTION(DataConsume)
#undef NOP_FUNCTION
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,132 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/compute_value/do_packing.h"
namespace Shader::IR::ComputeValue {
void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
UNREACHABLE_MSG("Unimplemented");
}
} // namespace Shader::IR::ComputeValue

View file

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/compute_value/compute.h"
namespace Shader::IR::ComputeValue {
void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
} // namespace Shader::IR::ComputeValue

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,330 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <tuple>
#include <type_traits>
#include "common/assert.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR::ComputeValue {
// Holds an immediate value and provides helper functions to do arithmetic operations on it.
class ImmValue {
public:
ImmValue() noexcept = default;
ImmValue(const ImmValue& value) noexcept = default;
explicit ImmValue(const IR::Value& value) noexcept;
explicit ImmValue(bool value) noexcept;
explicit ImmValue(u8 value) noexcept;
explicit ImmValue(s8 value) noexcept;
explicit ImmValue(u16 value) noexcept;
explicit ImmValue(s16 value) noexcept;
explicit ImmValue(u32 value) noexcept;
explicit ImmValue(s32 value) noexcept;
explicit ImmValue(f32 value) noexcept;
explicit ImmValue(u64 value) noexcept;
explicit ImmValue(s64 value) noexcept;
explicit ImmValue(f64 value) noexcept;
ImmValue(u32 value1, u32 value2) noexcept;
ImmValue(u32 value1, u32 value2, u32 value3) noexcept;
ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept;
ImmValue(s32 value1, s32 value2) noexcept;
ImmValue(s32 value1, s32 value2, s32 value3) noexcept;
ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept;
ImmValue(f32 value1, f32 value2) noexcept;
ImmValue(f32 value1, f32 value2, f32 value3) noexcept;
ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept;
ImmValue(u64 value1, u64 value2) noexcept;
ImmValue(u64 value1, u64 value2, u64 value3) noexcept;
ImmValue(u64 value1, u64 value2, u64 value3, u64 value4) noexcept;
ImmValue(s64 value1, s64 value2) noexcept;
ImmValue(s64 value1, s64 value2, s64 value3) noexcept;
ImmValue(s64 value1, s64 value2, s64 value3, s64 value4) noexcept;
ImmValue(f64 value1, f64 value2) noexcept;
ImmValue(f64 value1, f64 value2, f64 value3) noexcept;
ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept;
ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept;
ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept;
ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3,
const ImmValue& value4) noexcept;
[[nodiscard]] static ImmValue CompositeFrom2x2(const ImmValue& value1,
const ImmValue& value2) noexcept;
[[nodiscard]] bool U1() const noexcept;
[[nodiscard]] u8 U8() const noexcept;
[[nodiscard]] s8 S8() const noexcept;
[[nodiscard]] u16 U16() const noexcept;
[[nodiscard]] s16 S16() const noexcept;
[[nodiscard]] u32 U32() const noexcept;
[[nodiscard]] s32 S32() const noexcept;
[[nodiscard]] f32 F32() const noexcept;
[[nodiscard]] u64 U64() const noexcept;
[[nodiscard]] s64 S64() const noexcept;
[[nodiscard]] f64 F64() const noexcept;
[[nodiscard]] std::tuple<u32, u32> U32x2() const noexcept;
[[nodiscard]] std::tuple<u32, u32, u32> U32x3() const noexcept;
[[nodiscard]] std::tuple<u32, u32, u32, u32> U32x4() const noexcept;
[[nodiscard]] std::tuple<s32, s32> S32x2() const noexcept;
[[nodiscard]] std::tuple<s32, s32, s32> S32x3() const noexcept;
[[nodiscard]] std::tuple<s32, s32, s32, s32> S32x4() const noexcept;
[[nodiscard]] std::tuple<f32, f32> F32x2() const noexcept;
[[nodiscard]] std::tuple<f32, f32, f32> F32x3() const noexcept;
[[nodiscard]] std::tuple<f32, f32, f32, f32> F32x4() const noexcept;
[[nodiscard]] std::tuple<f64, f64> F64x2() const noexcept;
[[nodiscard]] std::tuple<f64, f64, f64> F64x3() const noexcept;
[[nodiscard]] std::tuple<f64, f64, f64, f64> F64x4() const noexcept;
ImmValue& operator=(const ImmValue& value) noexcept = default;
[[nodiscard]] bool operator==(const ImmValue& other) const noexcept;
[[nodiscard]] bool operator!=(const ImmValue& other) const noexcept;
[[nodiscard]] static ImmValue Extract(const ImmValue& vec, const ImmValue& index) noexcept;
[[nodiscard]] static ImmValue Insert(const ImmValue& vec, const ImmValue& value,
const ImmValue& index) noexcept;
template <IR::Type NewType, bool NewSigned, IR::Type OldType, bool OldSigned>
[[nodiscard]] static ImmValue Convert(const ImmValue& in) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Add(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue AddCarry(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Sub(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Mul(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Div(const ImmValue& a, const ImmValue& b);
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Mod(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue And(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Or(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Xor(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue LShift(const ImmValue& a, const ImmValue& shift) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue RShift(const ImmValue& a, const ImmValue& shift) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Not(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Neg(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Abs(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Recip(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Sqrt(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Rsqrt(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Sin(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Cos(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Exp2(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Ldexp(const ImmValue& in, const ImmValue& exp) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Log2(const ImmValue& in) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Min(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Max(const ImmValue& a, const ImmValue& b) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
template <IR::Type Type, bool IsSigned>
[[nodiscard]] static ImmValue Clamp(const ImmValue& in, const ImmValue& min,
const ImmValue& max) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Floor(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Ceil(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Round(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Trunc(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Fract(const ImmValue& in) noexcept;
template <IR::Type Type>
[[nodiscard]] static ImmValue Fma(const ImmValue& a, const ImmValue& b,
const ImmValue& c) noexcept;
template <IR::Type Type>
[[nodiscard]] static bool IsNan(const ImmValue& in) noexcept;
[[nodiscard]] static bool IsSupportedValue(const IR::Value& value) noexcept;
private:
union Value {
bool imm_u1;
u8 imm_u8;
s8 imm_s8;
u16 imm_u16;
s16 imm_s16;
u32 imm_u32;
s32 imm_s32;
f32 imm_f32;
u64 imm_u64;
s64 imm_s64;
f64 imm_f64;
};
std::array<Value, 4> imm_values;
friend class std::hash<ImmValue>;
};
static_assert(std::is_trivially_copyable_v<ImmValue>);
inline bool ImmValue::U1() const noexcept {
return imm_values[0].imm_u1;
}
inline u8 ImmValue::U8() const noexcept {
return imm_values[0].imm_u8;
}
inline s8 ImmValue::S8() const noexcept {
return imm_values[0].imm_s8;
}
inline u16 ImmValue::U16() const noexcept {
return imm_values[0].imm_u16;
}
inline s16 ImmValue::S16() const noexcept {
return imm_values[0].imm_s16;
}
inline u32 ImmValue::U32() const noexcept {
return imm_values[0].imm_u32;
}
inline s32 ImmValue::S32() const noexcept {
return imm_values[0].imm_s32;
}
inline f32 ImmValue::F32() const noexcept {
return imm_values[0].imm_f32;
}
inline u64 ImmValue::U64() const noexcept {
return imm_values[0].imm_u64;
}
inline s64 ImmValue::S64() const noexcept {
return imm_values[0].imm_s64;
}
inline f64 ImmValue::F64() const noexcept {
return imm_values[0].imm_f64;
}
inline std::tuple<u32, u32> ImmValue::U32x2() const noexcept {
return {imm_values[0].imm_u32, imm_values[1].imm_u32};
}
inline std::tuple<u32, u32, u32> ImmValue::U32x3() const noexcept {
return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32};
}
inline std::tuple<u32, u32, u32, u32> ImmValue::U32x4() const noexcept {
return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32,
imm_values[3].imm_u32};
}
inline std::tuple<s32, s32> ImmValue::S32x2() const noexcept {
return {imm_values[0].imm_s32, imm_values[1].imm_s32};
}
inline std::tuple<s32, s32, s32> ImmValue::S32x3() const noexcept {
return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32};
}
inline std::tuple<s32, s32, s32, s32> ImmValue::S32x4() const noexcept {
return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32,
imm_values[3].imm_s32};
}
inline std::tuple<f32, f32> ImmValue::F32x2() const noexcept {
return {imm_values[0].imm_f32, imm_values[1].imm_f32};
}
inline std::tuple<f32, f32, f32> ImmValue::F32x3() const noexcept {
return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32};
}
inline std::tuple<f32, f32, f32, f32> ImmValue::F32x4() const noexcept {
return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32,
imm_values[3].imm_f32};
}
inline std::tuple<f64, f64> ImmValue::F64x2() const noexcept {
return {imm_values[0].imm_f64, imm_values[1].imm_f64};
}
inline std::tuple<f64, f64, f64> ImmValue::F64x3() const noexcept {
return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64};
}
inline std::tuple<f64, f64, f64, f64> ImmValue::F64x4() const noexcept {
return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64,
imm_values[3].imm_f64};
}
} // namespace Shader::IR::ComputeValue
namespace std {
template <>
struct hash<Shader::IR::ComputeValue::ImmValue> {
std::size_t operator()(const Shader::IR::ComputeValue::ImmValue& value) const;
};
} // namespace std

View file

@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/conditional_tree.h"
#include <span>
// This can be used to get, for a given block, the list of conditions that
// must be true for the block to be executed. Can be also useful for
// for determining the maximum number of thimes a block is executed.
namespace Shader::IR {
static void AddConditionalTree(std::span<AbstractSyntaxNode> asl_span,
Block::ConditionalData* parent) {
const auto get_span = [&asl_span](AbstractSyntaxNode& node,
Block* merge_block) -> std::span<AbstractSyntaxNode> {
auto it = std::find_if(asl_span.begin(), asl_span.end(),
[&node, &merge_block](const AbstractSyntaxNode& n) {
return n.data.block == merge_block;
});
ASSERT(it != asl_span.end());
std::ptrdiff_t merge_index = std::distance(asl_span.begin(), it);
return std::span<AbstractSyntaxNode>(&node + 1, asl_span.data() + merge_index);
};
const Block::ConditionalData* copied_parent = nullptr;
for (auto it = asl_span.begin(); it < asl_span.end(); ++it) {
AbstractSyntaxNode& node = *it;
if (node.type == AbstractSyntaxNode::Type::If ||
node.type == AbstractSyntaxNode::Type::Loop) {
ASSERT(copied_parent);
Block* merge_block;
switch (node.type) {
case AbstractSyntaxNode::Type::If:
merge_block = node.data.if_node.merge;
break;
case AbstractSyntaxNode::Type::Loop:
merge_block = node.data.loop.merge;
break;
default:
UNREACHABLE();
}
auto subspan = get_span(node, merge_block);
Block::ConditionalData cond{copied_parent->depth + 1, copied_parent, &node};
AddConditionalTree(subspan, &cond);
it += subspan.size();
} else if (node.type == AbstractSyntaxNode::Type::Block) {
Block* block = node.data.block;
if (!copied_parent) {
block->SetConditionalData(*parent);
copied_parent = &block->CondData();
} else {
block->SetConditionalData(*copied_parent);
}
}
}
}
void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list) {
Block::ConditionalData cond{0, nullptr, nullptr};
AddConditionalTree(syntax_list, &cond);
}
} // namespace Shader::IR

View file

@ -0,0 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/abstract_syntax_list.h"
namespace Shader::IR {
void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list);
} // namespace Shader::IR

View file

@ -102,6 +102,10 @@ void IREmitter::Reference(const Value& value) {
Inst(Opcode::Reference, value);
}
Value IREmitter::Phi(IR::Type type) {
return Inst(Opcode::Phi, Flags(type));
}
void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
Inst(Opcode::PhiMove, Value{&phi}, value);
}
@ -139,6 +143,10 @@ U32 IREmitter::GetUserData(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetUserData, reg);
}
void IREmitter::SetUserData(const U32& offset, const U32& data) {
Inst(Opcode::SetUserData, offset, data);
}
U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) {
ASSERT(static_cast<u32>(reg) < IR::NumScalarRegs);
return Inst<U1>(Opcode::GetThreadBitScalarReg, reg);

View file

@ -18,6 +18,8 @@ namespace Shader::IR {
class IREmitter {
public:
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
explicit IREmitter(IR::Inst& inst)
: block{inst.GetParent()}, insertion_point{Block::InstructionList::s_iterator_to(inst)} {}
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
: block{&block_}, insertion_point{insertion_point_} {}
@ -39,6 +41,7 @@ public:
U1 ConditionRef(const U1& value);
void Reference(const Value& value);
[[nodiscard]] Value Phi(IR::Type type);
void PhiMove(IR::Inst& phi, const Value& value);
void Prologue();
@ -52,6 +55,7 @@ public:
void DeviceMemoryBarrier();
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
void SetUserData(const U32& offset, const U32& data);
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);

View file

@ -103,6 +103,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::DebugPrint:
case Opcode::EmitVertex:
case Opcode::EmitPrimitive:
case Opcode::SetUserData:
return true;
default:
return false;

View file

@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cartesian_invoke.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/compute_value/compute.h"
#include "shader_recompiler/ir/num_executions.h"
namespace Shader::IR {
static bool Is64BitCondition(const Inst* inst) {
switch (inst->GetOpcode()) {
case Opcode::SLessThan64:
case Opcode::ULessThan64:
case Opcode::IEqual64:
case Opcode::INotEqual64:
return true;
default:
return false;
}
}
static u64 GetDistance32(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) {
return a.U32() < b.U32() ? b.U32() - a.U32() : a.U32() - b.U32();
}
static u64 GetDistance64(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) {
return a.U64() < b.U64() ? b.U64() - a.U64() : a.U64() - b.U64();
}
u64 GetNumExecutions(const Inst* inst) {
u64 num_executions = 1;
const auto* cond_data = &inst->GetParent()->CondData();
while (cond_data->asl_node) {
if (cond_data->asl_node->type == AbstractSyntaxNode::Type::Loop) {
ComputeValue::ImmValueList cond_arg0, cond_arg1;
ComputeValue::Cache cache;
Block* cont_block = cond_data->asl_node->data.loop.continue_block;
Inst* cond_inst = cont_block->back().Arg(0).InstRecursive();
ASSERT(cond_inst);
ComputeValue::Compute(cond_inst->Arg(0), cond_arg0, cache);
ComputeValue::Compute(cond_inst->Arg(1), cond_arg1, cache);
std::unordered_set<u64> distances;
if (Is64BitCondition(cond_inst)) {
Common::CartesianInvoke(GetDistance64,
std::insert_iterator(distances, distances.end()), cond_arg0,
cond_arg1);
} else {
Common::CartesianInvoke(GetDistance32,
std::insert_iterator(distances, distances.end()), cond_arg0,
cond_arg1);
}
if (!distances.empty()) {
// We assume that the iterator changes by 1 each loop iteration.
num_executions *=
std::max<u64>(1, *std::max_element(distances.begin(), distances.end())) + 1;
}
}
cond_data = cond_data->parent;
}
return num_executions;
}
} // namespace Shader::IR

View file

@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "shader_recompiler/ir/type.h"
// Get the number of times an instruction will be executed.
// 0 if it cannot be determined statically.
namespace Shader::IR {
u64 GetNumExecutions(const Inst* inst);
} // namespace Shader::IR

View file

@ -50,6 +50,8 @@ OPCODE(SharedAtomicIDecrement32, U32, U32,
// Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, )
// We don't use ScalarReg since we do arithmetics on the register index
OPCODE(SetUserData, Void, U32, U32 )
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
OPCODE(SetThreadBitScalarReg, Void, ScalarReg, U1, )
OPCODE(GetScalarRegister, U32, ScalarReg, )

View file

@ -4,19 +4,25 @@
#include <unordered_map>
#include <boost/container/flat_map.hpp>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "common/arch.h"
#include "common/config.h"
#include "common/io_file.h"
#include "common/logging/log.h"
#include "common/path_util.h"
#include "shader_recompiler/info.h"
#ifdef ARCH_X86_64
#include "shader_recompiler/backend/asm_x64/emit_x64.h"
#endif
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/num_executions.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/srt_gvn_table.h"
#include "shader_recompiler/ir/subprogram.h"
#include "shader_recompiler/ir/value.h"
#include "src/common/arch.h"
#include "src/common/decoder.h"
@ -57,28 +63,23 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
using namespace Shader;
struct PassInfo {
// map offset to inst
using PtrUserList = boost::container::flat_map<u32, Shader::IR::Inst*>;
struct ReadConstData {
u32 offset_dw;
u32 count_dw;
IR::Inst* unique_inst;
IR::Inst* original_inst;
};
Optimization::SrtGvnTable gvn_table;
// keys are GetUserData or ReadConst instructions that are used as pointers
std::unordered_map<IR::Inst*, PtrUserList> pointer_uses;
// GetUserData instructions corresponding to sgpr_base of SRT roots
boost::container::small_flat_map<IR::ScalarReg, IR::Inst*, 1> srt_roots;
// pick a single inst for a given value number
std::unordered_map<u32, IR::Inst*> vn_to_inst;
// map of all readconsts to their subprogram insts
boost::container::small_flat_map<IR::Inst*, IR::Inst*, 32> all_readconsts;
// subprogram insts mapped to their readconst data
boost::container::small_flat_map<IR::Inst*, ReadConstData, 32> readconst_data;
// Bumped during codegen to assign offsets to readconsts
u32 dst_off_dw;
PtrUserList* GetUsesAsPointer(IR::Inst* inst) {
auto it = pointer_uses.find(inst);
if (it != pointer_uses.end()) {
return &it->second;
}
return nullptr;
}
// Incremented during SRT program generation
u32 dst_off_dw = 0;
// Return a single instruction that this instruction is identical to, according
// to value number
@ -105,39 +106,84 @@ static inline void PopPtr(Xbyak::CodeGenerator& c) {
c.pop(rdi);
};
static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
Xbyak::CodeGenerator& c) {
PushPtr(c, off_dw);
PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree);
ASSERT(use_list);
// First copy all the src data from this tree level
// That way, all data that was contiguous in the guest SRT is also contiguous in the
// flattened buffer.
// TODO src and dst are contiguous. Optimize with wider loads/stores
// TODO if this subtree is dynamically indexed, don't compact it (keep it sparse)
for (auto [src_off_dw, use] : *use_list) {
c.mov(r10d, ptr[rdi + (src_off_dw << 2)]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d);
use->SetFlags<u32>(pass_info.dst_off_dw);
pass_info.dst_off_dw++;
static IR::U32 WrapInstWithCounter(IR::Inst* inst, u32 inital_value, IR::Block* first_block) {
const IR::Block::ConditionalData* loop_data = &inst->GetParent()->CondData();
while (loop_data != nullptr &&
loop_data->asl_node->type != IR::AbstractSyntaxNode::Type::Loop) {
loop_data = loop_data->parent;
}
// Then visit any children used as pointers
for (const auto [src_off_dw, use] : *use_list) {
if (pass_info.GetUsesAsPointer(use)) {
VisitPointer(src_off_dw, use, pass_info, c);
}
}
PopPtr(c);
ASSERT(loop_data != nullptr);
IR::Block* loop_body = loop_data->asl_node->data.loop.body;
// We are putting the Phi node in the loop header so that the counter is
// incremented each time the loop is executed. We point the Phi node to the
// first block so that the counter is not reset each time the loop is
// executed (nested loops)
IR::IREmitter ir_inst(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst));
IR::IREmitter ir_loop_header(*loop_body->ImmPredecessors().front());
IR::Inst* phi = ir_loop_header.Phi(IR::Type::U32).Inst();
IR::U32 inc = ir_inst.IAdd(IR::U32(phi), ir_inst.Imm32(1));
phi->AddPhiOperand(first_block, ir_loop_header.Imm32(inital_value));
phi->AddPhiOperand(inst->GetParent(), inc);
return IR::U32(phi);
}
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
Xbyak::CodeGenerator& c = g_srt_codegen;
static IR::Program GenerateSrtReadConstsSubProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) {
IR::SubProgram sub_gen(&program, pools);
for (auto& [inst, sub_inst] : pass_info.all_readconsts) {
sub_inst = sub_gen.AddInst(inst);
pass_info.readconst_data[sub_inst] = {0, 0, pass_info.DeduplicateInstruction(sub_inst),
inst};
}
IR::Program sub_program = sub_gen.GetSubProgram();
IR::Block* original_first_block = program.blocks.front();
IR::Block* sub_first_block = sub_program.blocks.front();
for (auto& [inst, data] : pass_info.readconst_data) {
if (inst != data.unique_inst) {
PassInfo::ReadConstData& unique_data = pass_info.readconst_data[data.unique_inst];
data.offset_dw = unique_data.offset_dw;
// In this context, count_dw is always the same as unique_data.count_dw
// There are no duplicate instructions in different loops
data.count_dw = unique_data.count_dw;
} else {
u32 count = static_cast<u32>(IR::GetNumExecutions(inst));
ASSERT_MSG(count > 0, "Dynamic loop range not supported yet");
data.count_dw = count;
data.offset_dw = pass_info.dst_off_dw;
pass_info.dst_off_dw += count;
IR::U32 save_offset;
if (data.count_dw > 1) {
save_offset = WrapInstWithCounter(inst, data.offset_dw, sub_first_block);
} else {
IR::IREmitter ir(*inst);
save_offset = ir.Imm32(data.offset_dw);
}
IR::IREmitter ir(*inst->GetParent(),
++IR::Block::InstructionList::s_iterator_to(*inst));
ir.SetUserData(save_offset, IR::U32(inst));
}
data.original_inst->SetFlags<u32>(1);
IR::IREmitter ir(*data.original_inst);
data.original_inst->SetArg(0, ir.Imm32(0));
if (data.count_dw > 1) {
IR::U32 counter =
WrapInstWithCounter(data.original_inst, data.offset_dw, original_first_block);
data.original_inst->SetArg(1, counter);
} else {
data.original_inst->SetArg(1, ir.Imm32(data.offset_dw));
}
}
DeadCodeEliminationPass(sub_program);
IR::DumpProgram(sub_program, sub_program.info, "srt");
return sub_program;
}
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) {
#ifdef ARCH_X86_64
Xbyak::CodeGenerator& c = g_srt_codegen;
Shader::Info& info = program.info;
if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) {
return;
}
@ -167,10 +213,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
VisitPointer(static_cast<u32>(sgpr_base), root, pass_info, c);
if (!pass_info.all_readconsts.empty()) {
IR::Program sub_program = GenerateSrtReadConstsSubProgram(program, pass_info, pools);
Backend::X64::EmitX64(sub_program, c);
}
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
c.ret();
c.ready();
@ -178,75 +227,30 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
size_t codesize = c.getCurr() - reinterpret_cast<const u8*>(info.srt_info.walker_func);
DumpSrtProgram(info, reinterpret_cast<const u8*>(info.srt_info.walker_func), codesize);
}
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
#elif
if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) {
UNREACHABLE_MSG("SRT program generation only supported on x86_64");
}
#endif
}
}; // namespace
void FlattenExtendedUserdataPass(IR::Program& program) {
void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools) {
Shader::Info& info = program.info;
PassInfo pass_info;
// traverse at end and assign offsets to duplicate readconsts, using
// vn_to_inst as the source
boost::container::small_vector<IR::Inst*, 32> all_readconsts;
for (auto r_it = program.post_order_blocks.rbegin(); r_it != program.post_order_blocks.rend();
r_it++) {
IR::Block* block = *r_it;
for (IR::Inst& inst : *block) {
for (auto it = program.post_order_blocks.rbegin(); it != program.post_order_blocks.rend();
++it) {
IR::Block* block = *it;
for (auto& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
if (!inst.Arg(1).IsImmediate()) {
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
continue;
}
all_readconsts.push_back(&inst);
if (pass_info.DeduplicateInstruction(&inst) != &inst) {
// This is a duplicate of a readconst we've already visited
continue;
}
IR::Inst* ptr_composite = inst.Arg(0).InstRecursive();
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
inst->GetOpcode() == IR::Opcode::ReadConst) {
return inst;
}
return std::nullopt;
};
auto base0 = IR::BreadthFirstSearch(ptr_composite->Arg(0), pred);
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
ASSERT_MSG(base0 && base1, "ReadConst not from constant memory");
IR::Inst* ptr_lo = base0.value();
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
auto ptr_uses_kv =
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
user_list[inst.Arg(1).U32()] = &inst;
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
pass_info.srt_roots[ud_reg] = ptr_lo;
}
pass_info.all_readconsts[&inst] = nullptr;
}
}
}
GenerateSrtProgram(info, pass_info);
// Assign offsets to duplicate readconsts
for (IR::Inst* readconst : all_readconsts) {
ASSERT(pass_info.vn_to_inst.contains(pass_info.gvn_table.GetValueNumber(readconst)));
IR::Inst* original = pass_info.DeduplicateInstruction(readconst);
readconst->SetFlags<u32>(original->Flags<u32>());
}
GenerateSrtProgram(program, pass_info, pools);
info.RefreshFlatBuf();
}

View file

@ -5,6 +5,7 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/pools.h"
namespace Shader {
struct Profile;
@ -16,7 +17,7 @@ void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::Program& program);
void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools);
void ReadLaneEliminationPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);

View file

@ -247,17 +247,22 @@ SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) {
}
return std::nullopt;
};
// Value may be modified between the ReadConst/GetUserData and inst.
// We don't take this into account.
const auto result = IR::BreadthFirstSearch(inst, pred);
ASSERT_MSG(result, "Unable to track sharp source");
inst = result.value();
visited_insts.emplace_back(inst);
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return static_cast<u32>(inst->Arg(0).ScalarReg());
} else {
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst,
"Sharp load not from constant memory");
return inst->Flags<u32>();
} else if (inst->GetOpcode() == IR::Opcode::ReadConst) {
// Sharp is stored in the offset argument.
// The vale is not inmediate if ReadConst is inside of a loop
// and the base or offset is different in each iteration. (we don't support this)
ASSERT(inst->Arg(1).IsImmediate());
return inst->Arg(1).U32();
}
UNREACHABLE_MSG("Sharp load not from constant memory or user data");
}
/// Tracks a sharp with validation of the chosen data type.

View file

@ -9,7 +9,7 @@
namespace Shader {
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(u32* /*flat_dst*/);
struct PersistentSrtInfo {
// Special case when fetch shader uses step rates.

View file

@ -6,13 +6,30 @@
#include <fmt/format.h>
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
std::string DumpProgram(const Program& program) {
void DumpProgram(const Program& program, const Info& info, const std::string& type) {
using namespace Common::FS;
if (!Config::dumpShaders()) {
return;
}
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto ir_filename =
fmt::format("{}_{:#018x}.{}irprogram.txt", info.stage, info.pgm_hash, type);
const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile};
size_t index{0};
std::map<const IR::Inst*, size_t> inst_to_index;
std::map<const IR::Block*, size_t> block_to_index;
@ -21,11 +38,20 @@ std::string DumpProgram(const Program& program) {
block_to_index.emplace(block, index);
++index;
}
std::string ret;
for (const auto& block : program.blocks) {
ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
std::string s = IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
ir_file.WriteString(s);
}
const auto asl_filename = fmt::format("{}_{:#018x}.{}asl.txt", info.stage, info.pgm_hash, type);
const auto asl_file =
IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile};
for (const auto& node : program.syntax_list) {
std::string s = IR::DumpASLNode(node, block_to_index, inst_to_index) + '\n';
asl_file.WriteString(s);
}
return ret;
}
} // namespace Shader::IR

View file

@ -21,6 +21,6 @@ struct Program {
Info& info;
};
[[nodiscard]] std::string DumpProgram(const Program& program);
void DumpProgram(const Program& program, const Info& info, const std::string& type = "");
} // namespace Shader::IR

View file

@ -51,20 +51,6 @@ private:
u32 vn;
switch (inst->GetOpcode()) {
case IR::Opcode::Phi: {
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
inst->GetOpcode() == IR::Opcode::ReadConst) {
return inst;
}
return std::nullopt;
};
IR::Inst* source = IR::BreadthFirstSearch(inst, pred).value();
vn = GetValueNumber(source);
value_numbers[IR::Value(inst)] = vn;
break;
}
case IR::Opcode::GetUserData:
case IR::Opcode::CompositeConstructU32x2:
case IR::Opcode::ReadConst: {

View file

@ -0,0 +1,300 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <boost/container/flat_set.hpp>
#include "shader_recompiler/ir/conditional_tree.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/ir/subprogram.h"
namespace Shader::IR {
SubProgram::SubProgram(Program* super_program, Pools& pools)
: super_program(super_program), pools(pools) {}
Block* SubProgram::AddBlock(Block* orig_block) {
auto it = orig_block_to_block.find(orig_block);
if (it != orig_block_to_block.end()) {
return it->second;
}
auto block = pools.block_pool.Create(pools.inst_pool);
orig_block_to_block[orig_block] = block;
return block;
}
Inst* SubProgram::AddInst(Inst* orig_inst) {
return AddInst(orig_inst, std::nullopt);
}
Block* SubProgram::GetBlock(Block* orig_block) {
auto it = orig_block_to_block.find(orig_block);
if (it != orig_block_to_block.end()) {
return it->second;
}
return nullptr;
}
Inst* SubProgram::GetInst(Inst* orig_inst) {
auto it = orig_inst_to_inst.find(orig_inst);
if (it != orig_inst_to_inst.end()) {
return it->second;
}
return nullptr;
}
Program SubProgram::GetSubProgram() {
ASSERT_MSG(!completed, "SubProgram already completed");
completed = true;
Program sub_program(super_program->info);
BuildBlockListAndASL(sub_program);
AddProlgueAndEpilogue(sub_program);
sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front());
AddConditionalTreeFromASL(sub_program.syntax_list);
for (Block* block : sub_program.blocks) {
block->SsaSeal();
}
return sub_program;
}
void SubProgram::AddProlgueAndEpilogue(Program& sub_program) {
// We may need to handle this better.
Block* epilogue_block = pools.block_pool.Create(pools.inst_pool);
Block* front_block = sub_program.blocks.front();
sub_program.blocks.back()->AddBranch(epilogue_block);
sub_program.blocks.push_back(epilogue_block);
sub_program.syntax_list.push_back(AbstractSyntaxNode{.data = {.block = epilogue_block},
.type = AbstractSyntaxNode::Type::Block});
sub_program.syntax_list.push_back(AbstractSyntaxNode{.type = AbstractSyntaxNode::Type::Return});
epilogue_block->AppendNewInst(Opcode::Epilogue, {});
front_block->PrependNewInst(front_block->begin(), Opcode::Prologue);
epilogue_block->SsaSeal();
}
Inst* SubProgram::AddInst(Inst* orig_inst,
std::optional<Block::InstructionList::iterator> insertion_point) {
auto it = orig_inst_to_inst.find(orig_inst);
if (it != orig_inst_to_inst.end()) {
return it->second;
}
Block* block = AddBlock(orig_inst->GetParent());
if (!insertion_point) {
if (block->back().GetOpcode() == Opcode::ConditionRef) {
insertion_point = --block->end();
} else {
insertion_point = block->end();
}
}
Inst* inst = &(
*block->PrependNewInst(*insertion_point, orig_inst->GetOpcode(), orig_inst->Flags<u32>()));
orig_inst_to_inst[orig_inst] = inst;
if (orig_inst->GetOpcode() == Opcode::Phi) {
AddPhi(orig_inst, inst);
} else {
for (size_t i = 0; i < orig_inst->NumArgs(); ++i) {
SetArg(inst, orig_inst, i);
}
}
return inst;
}
void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) {
// Current IR only has Phis with 2 arguments.
ASSERT(orig_phi->NumArgs() == 2);
Block* orig_block0 = orig_phi->PhiBlock(0);
Block* orig_block1 = orig_phi->PhiBlock(1);
Block* block0 = AddBlock(orig_block0);
Block* block1 = AddBlock(orig_block1);
const Value& arg0 = orig_phi->Arg(0);
const Value& arg1 = orig_phi->Arg(1);
AddPhiOperand(phi, block0, arg0);
AddPhiOperand(phi, block1, arg1);
const auto get_conds =
[orig_block0,
orig_block1]() -> std::pair<const Block::ConditionalData&, const Block::ConditionalData&> {
const Block::ConditionalData& cond0 = orig_block0->CondData();
const Block::ConditionalData& cond1 = orig_block1->CondData();
if (cond0.depth > cond1.depth) {
return {cond0, cond1};
}
return {cond1, cond0};
};
const auto is_negated_cond = [](Inst* ref1, Inst* ref2) {
IR::Value cond1 = ref1->Arg(0);
IR::Value cond2 = ref2->Arg(0);
if (cond1.IsImmediate() || cond2.IsImmediate()) {
if (!cond1.IsImmediate() || !cond2.IsImmediate()) {
return false;
}
return cond1.U1() != cond2.U1();
}
Inst* cond1_inst = cond1.InstRecursive();
Inst* cond2_inst = cond2.InstRecursive();
if (cond1_inst->GetOpcode() == Opcode::LogicalNot) {
return cond1_inst->Arg(0) == cond2;
}
if (cond2_inst->GetOpcode() == Opcode::LogicalNot) {
return cond2_inst->Arg(0) == cond1;
}
return false;
};
const auto& [start_cond, target_cond] = get_conds();
const Block::ConditionalData* cond = &start_cond;
while (cond->depth > target_cond.depth) {
if (cond->asl_node->type == AbstractSyntaxNode::Type::If) {
Inst* cond_ref_inst = cond->asl_node->data.if_node.cond.InstRecursive();
AddInst(cond_ref_inst);
// Check if the condition has an else branch, and add it.
Block* merge_block = cond->asl_node->data.if_node.merge;
Inst* else_cond_ref_inst = &merge_block->back();
if (else_cond_ref_inst->GetOpcode() == Opcode::ConditionRef) {
// Check if one condition is the negation of the other.
if (is_negated_cond(cond_ref_inst, else_cond_ref_inst)) {
AddInst(else_cond_ref_inst);
}
}
} else if (cond->asl_node->type == AbstractSyntaxNode::Type::Loop) {
// In case of loop, we need to add the loop itself and also
// the break conditions.
Block* loop_merge = cond->asl_node->data.loop.merge;
for (Block* pred : loop_merge->ImmPredecessors()) {
if (pred->CondData().asl_node == cond->asl_node) {
ASSERT(pred->back().GetOpcode() == IR::Opcode::ConditionRef);
AddInst(&pred->back());
}
}
}
if (orig_phi->GetParent()->CondData().asl_node == cond->asl_node) {
break;
}
cond = cond->parent;
}
}
void SubProgram::SetArg(Inst* inst, Inst* orig_inst, size_t index) {
const Value& arg = orig_inst->Arg(index);
if (arg.IsImmediate()) {
inst->SetArg(index, arg);
} else {
Inst* arg_inst = arg.InstRecursive();
if (orig_inst->GetParent() == arg_inst->GetParent()) {
inst->SetArg(index,
Value(AddInst(arg_inst, Block::InstructionList::s_iterator_to(*inst))));
} else {
inst->SetArg(index, Value(AddInst(arg_inst, std::nullopt)));
}
}
}
void SubProgram::AddPhiOperand(Inst* phi, Block* block, const Value& arg) {
if (arg.IsImmediate()) {
phi->AddPhiOperand(block, arg);
} else {
phi->AddPhiOperand(block, Value(AddInst(arg.InstRecursive())));
}
}
void SubProgram::BuildBlockListAndASL(Program& sub_program) {
boost::container::flat_set<Block*> filter_blocks;
for (const AbstractSyntaxNode& orig_asl_node : super_program->syntax_list) {
AbstractSyntaxNode asl_node;
asl_node.type = orig_asl_node.type;
Block* orig_block = orig_asl_node.data.block;
switch (orig_asl_node.type) {
case AbstractSyntaxNode::Type::Block: {
Block* block = GetBlock(orig_block);
if (!block) {
continue;
}
if (!sub_program.syntax_list.empty()) {
Block* last_block = sub_program.blocks.back();
if (!last_block->HasImmSuccessor(block)) {
last_block->AddBranch(block);
}
}
asl_node.data.block = block;
sub_program.blocks.push_back(block);
break;
}
case AbstractSyntaxNode::Type::If: {
Inst* cond = GetInst(orig_asl_node.data.if_node.cond.InstRecursive());
if (!cond) {
continue;
}
Block* block = cond->GetParent();
Block* merge_block = AddBlock(orig_asl_node.data.if_node.merge);
Block* body_block = AddBlock(orig_asl_node.data.if_node.body);
asl_node.data.if_node.cond = U1(cond);
asl_node.data.if_node.body = body_block;
asl_node.data.if_node.merge = merge_block;
block->AddBranch(body_block);
block->AddBranch(merge_block);
filter_blocks.insert(merge_block);
break;
}
case AbstractSyntaxNode::Type::EndIf: {
Block* merge_block = GetBlock(orig_asl_node.data.end_if.merge);
if (!filter_blocks.contains(merge_block)) {
continue;
}
asl_node.data.end_if.merge = merge_block;
break;
}
case AbstractSyntaxNode::Type::Loop: {
Block* continue_block = GetBlock(orig_asl_node.data.loop.continue_block);
if (!continue_block) {
continue;
}
if (continue_block->back().GetOpcode() != Opcode::ConditionRef) {
continue;
}
Block* merge_block = AddBlock(orig_asl_node.data.loop.merge);
asl_node.data.loop.body = AddBlock(orig_asl_node.data.loop.body);
asl_node.data.loop.continue_block = continue_block;
asl_node.data.loop.merge = merge_block;
filter_blocks.insert(merge_block);
break;
}
case AbstractSyntaxNode::Type::Repeat: {
Inst* cond = GetInst(orig_asl_node.data.repeat.cond.InstRecursive());
if (!cond) {
continue;
}
Block* block = cond->GetParent();
Block* merge_block = AddBlock(orig_asl_node.data.repeat.merge);
Block* loop_header_block = AddBlock(orig_asl_node.data.repeat.loop_header);
asl_node.data.repeat.cond = U1(cond);
asl_node.data.repeat.loop_header = loop_header_block;
asl_node.data.repeat.merge = merge_block;
block->AddBranch(loop_header_block);
block->AddBranch(merge_block);
break;
}
case AbstractSyntaxNode::Type::Break: {
Inst* cond = GetInst(orig_asl_node.data.break_node.cond.InstRecursive());
if (!cond) {
continue;
}
Block* block = cond->GetParent();
Block* merge_block = AddBlock(orig_asl_node.data.break_node.merge);
Block* skip_block = AddBlock(orig_asl_node.data.break_node.skip);
asl_node.data.break_node.cond = U1(&block->back());
asl_node.data.break_node.merge = merge_block;
asl_node.data.break_node.skip = skip_block;
block->AddBranch(skip_block);
block->AddBranch(merge_block);
break;
}
case AbstractSyntaxNode::Type::Unreachable:
case AbstractSyntaxNode::Type::Return:
continue;
default:
break;
}
sub_program.syntax_list.push_back(asl_node);
}
for (Block* block : sub_program.blocks) {
block->has_multiple_predecessors = block->ImmPredecessors().size() > 1;
}
}
} // namespace Shader::IR

View file

@ -0,0 +1,46 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <boost/container/flat_map.hpp>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/pools.h"
namespace Shader::IR {
// Given an IR program, this class is used to create a subprogram that contains
// only the blocks and instructions that relevant to a group of given instructions.
// Taking into account only the given instructions, the instructions that it uses and
// conditions.
struct SubProgram {
SubProgram(Program* super_program, Pools& pools);
Block* AddBlock(Block* orig_block);
Inst* AddInst(Inst* orig_inst);
Block* GetBlock(Block* orig_block);
Inst* GetInst(Inst* orig_inst);
Program GetSubProgram();
private:
Inst* AddInst(Inst* orig_inst, std::optional<Block::InstructionList::iterator> insertion_point);
void AddPhi(Inst* orig_phi, Inst* phi);
void SetArg(Inst* inst, Inst* orig_inst, size_t index);
void AddPhiOperand(Inst* phi, Block* block, const Value& arg);
void BuildBlockListAndASL(Program& sub_program);
void AddProlgueAndEpilogue(Program& sub_program);
bool completed = false;
Program* super_program;
Pools& pools;
boost::container::flat_map<Block*, Block*> orig_block_to_block;
boost::container::flat_map<Inst*, Inst*> orig_inst_to_inst;
};
} // namespace Shader::IR

View file

@ -0,0 +1,26 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader {
struct Pools {
static constexpr u32 InstPoolSize = 8192;
static constexpr u32 BlockPoolSize = 32;
Common::ObjectPool<IR::Inst> inst_pool;
Common::ObjectPool<IR::Block> block_pool;
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
void ReleaseContents() {
inst_pool.ReleaseContents();
block_pool.ReleaseContents();
}
};
} // namespace Shader

View file

@ -4,6 +4,7 @@
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
#include "shader_recompiler/ir/conditional_tree.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/recompiler.h"
@ -59,6 +60,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
Shader::IR::AddConditionalTreeFromASL(program.syntax_list);
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
@ -72,7 +75,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
}
Shader::Optimization::RingAccessElimination(program, runtime_info);
Shader::Optimization::ReadLaneEliminationPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program, pools);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);
@ -82,6 +85,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::CollectShaderInfoPass(program);
Shader::IR::DumpProgram(program, info);
return program;
}

View file

@ -3,30 +3,15 @@
#pragma once
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/pools.h"
namespace Shader {
struct Profile;
struct RuntimeInfo;
struct Pools {
static constexpr u32 InstPoolSize = 8192;
static constexpr u32 BlockPoolSize = 32;
Common::ObjectPool<IR::Inst> inst_pool;
Common::ObjectPool<IR::Block> block_pool;
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
void ReleaseContents() {
inst_pool.ReleaseContents();
block_pool.ReleaseContents();
}
};
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
RuntimeInfo& runtime_info, const Profile& profile);