mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-20 03:24:49 +00:00
Merge 33d64c3159
into 69777e2ffa
This commit is contained in:
commit
d33b7ed04d
72 changed files with 9315 additions and 161 deletions
|
@ -635,6 +635,7 @@ set(COMMON src/common/logging/backend.cpp
|
|||
src/common/assert.h
|
||||
src/common/bit_field.h
|
||||
src/common/bounded_threadsafe_queue.h
|
||||
src/common/cartesian_invoke.h
|
||||
src/common/concepts.h
|
||||
src/common/config.cpp
|
||||
src/common/config.h
|
||||
|
@ -783,6 +784,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/recompiler.cpp
|
||||
src/shader_recompiler/recompiler.h
|
||||
src/shader_recompiler/info.h
|
||||
src/shader_recompiler/pools.h
|
||||
src/shader_recompiler/params.h
|
||||
src/shader_recompiler/runtime_info.h
|
||||
src/shader_recompiler/specialization.h
|
||||
|
@ -847,15 +849,39 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
|
||||
src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||
src/shader_recompiler/ir/compute_value/compute.cpp
|
||||
src/shader_recompiler/ir/compute_value/compute.h
|
||||
src/shader_recompiler/ir/compute_value/do_bitcast.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_bitcast.h
|
||||
src/shader_recompiler/ir/compute_value/do_composite.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_composite.h
|
||||
src/shader_recompiler/ir/compute_value/do_convert.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_convert.h
|
||||
src/shader_recompiler/ir/compute_value/do_float_operations.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_float_operations.h
|
||||
src/shader_recompiler/ir/compute_value/do_integer_operations.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_integer_operations.h
|
||||
src/shader_recompiler/ir/compute_value/do_logical_operations.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_logical_operations.h
|
||||
src/shader_recompiler/ir/compute_value/do_nop_functions.h
|
||||
src/shader_recompiler/ir/compute_value/do_packing.cpp
|
||||
src/shader_recompiler/ir/compute_value/do_packing.h
|
||||
src/shader_recompiler/ir/compute_value/imm_value.cpp
|
||||
src/shader_recompiler/ir/compute_value/imm_value.h
|
||||
src/shader_recompiler/ir/abstract_syntax_list.cpp
|
||||
src/shader_recompiler/ir/abstract_syntax_list.h
|
||||
src/shader_recompiler/ir/attribute.cpp
|
||||
src/shader_recompiler/ir/attribute.h
|
||||
src/shader_recompiler/ir/basic_block.cpp
|
||||
src/shader_recompiler/ir/basic_block.h
|
||||
src/shader_recompiler/ir/condition.h
|
||||
src/shader_recompiler/ir/conditional_tree.cpp
|
||||
src/shader_recompiler/ir/conditional_tree.h
|
||||
src/shader_recompiler/ir/ir_emitter.cpp
|
||||
src/shader_recompiler/ir/ir_emitter.h
|
||||
src/shader_recompiler/ir/microinstruction.cpp
|
||||
src/shader_recompiler/ir/num_executions.cpp
|
||||
src/shader_recompiler/ir/num_executions.cpp
|
||||
src/shader_recompiler/ir/opcodes.cpp
|
||||
src/shader_recompiler/ir/opcodes.h
|
||||
src/shader_recompiler/ir/opcodes.inc
|
||||
|
@ -865,14 +891,43 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/post_order.h
|
||||
src/shader_recompiler/ir/program.cpp
|
||||
src/shader_recompiler/ir/program.h
|
||||
src/shader_recompiler/ir/reinterpret.h
|
||||
src/shader_recompiler/ir/reg.h
|
||||
src/shader_recompiler/ir/reinterpret.h
|
||||
src/shader_recompiler/ir/srt_gvn_table.h
|
||||
src/shader_recompiler/ir/subprogram.cpp
|
||||
src/shader_recompiler/ir/subprogram.h
|
||||
src/shader_recompiler/ir/type.cpp
|
||||
src/shader_recompiler/ir/type.h
|
||||
src/shader_recompiler/ir/value.cpp
|
||||
src/shader_recompiler/ir/value.h
|
||||
)
|
||||
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
set(SHADER_RECOMPILER ${SHADER_RECOMPILER}
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64.h
|
||||
src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp
|
||||
src/shader_recompiler/backend/asm_x64/x64_emit_context.h
|
||||
src/shader_recompiler/backend/asm_x64/x64_utils.cpp
|
||||
src/shader_recompiler/backend/asm_x64/x64_utils.h)
|
||||
endif()
|
||||
|
||||
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||
src/video_core/amdgpu/liverpool.h
|
||||
src/video_core/amdgpu/pixel_format.cpp
|
||||
|
|
43
src/common/cartesian_invoke.h
Normal file
43
src/common/cartesian_invoke.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
|
||||
namespace Common {
|
||||
namespace Detail {
|
||||
|
||||
template <typename Func, typename OutputIt, std::size_t N, std::size_t Level, typename... ArgLists>
|
||||
void CartesianInvokeImpl(Func func, OutputIt out_it,
|
||||
std::tuple<typename ArgLists::const_iterator...>& arglists_its,
|
||||
const std::tuple<const ArgLists&...>& arglists_tuple) {
|
||||
if constexpr (Level == N) {
|
||||
auto get_tuple = [&]<std::size_t... I>(std::index_sequence<I...>) {
|
||||
return std::forward_as_tuple(*std::get<I>(arglists_its)...);
|
||||
};
|
||||
out_it = std::move(std::apply(func, get_tuple(std::make_index_sequence<N>{})));
|
||||
return;
|
||||
} else {
|
||||
const auto& arglist = std::get<Level>(arglists_tuple);
|
||||
for (auto it = arglist.begin(); it != arglist.end(); ++it) {
|
||||
std::get<Level>(arglists_its) = it;
|
||||
CartesianInvokeImpl<Func, OutputIt, N, Level + 1, ArgLists...>(
|
||||
func, out_it, arglists_its, arglists_tuple);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Detail
|
||||
|
||||
template <typename Func, typename OutputIt, typename... ArgLists>
|
||||
void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) {
|
||||
constexpr std::size_t N = sizeof...(ArgLists);
|
||||
const std::tuple<const ArgLists&...> arglists_tuple = std::forward_as_tuple(arg_lists...);
|
||||
|
||||
std::tuple<typename ArgLists::const_iterator...> arglists_it;
|
||||
Detail::CartesianInvokeImpl<Func, OutputIt, N, 0, ArgLists...>(func, out_it, arglists_it,
|
||||
arglists_tuple);
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <tuple>
|
||||
|
||||
namespace Common {
|
||||
|
|
268
src/shader_recompiler/backend/asm_x64/emit_x64.cpp
Normal file
268
src/shader_recompiler/backend/asm_x64/emit_x64.cpp
Normal file
|
@ -0,0 +1,268 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/backend/asm_x64/emit_x64.h"
|
||||
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace {
|
||||
|
||||
static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, bool invert) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
IR::Value cond = ref->Arg(0);
|
||||
if (cond.IsImmediate()) {
|
||||
// If imediate, we evaluate at compile time
|
||||
if (cond.U1() != invert) {
|
||||
c.jmp(label, CodeGenerator::LabelType::T_NEAR);
|
||||
}
|
||||
} else {
|
||||
const OperandHolder& op = ctx.Def(cond.InstRecursive())[0];
|
||||
c.test(op.Op(), 0x1);
|
||||
if (invert) {
|
||||
c.jz(label, CodeGenerator::LabelType::T_NEAR);
|
||||
} else {
|
||||
c.jnz(label, CodeGenerator::LabelType::T_NEAR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ArgType>
|
||||
std::remove_reference_t<ArgType> Arg(EmitContext& ctx, const IR::Value& arg) {
|
||||
if constexpr (std::is_same_v<ArgType, const Operands&>) {
|
||||
return ctx.Def(arg);
|
||||
} else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
|
||||
return arg;
|
||||
} else if constexpr (std::is_same_v<ArgType, u32>) {
|
||||
return arg.U32();
|
||||
} else if constexpr (std::is_same_v<ArgType, u64>) {
|
||||
return arg.U64();
|
||||
} else if constexpr (std::is_same_v<ArgType, bool>) {
|
||||
return arg.U1();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
|
||||
return arg.Attribute();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::ScalarReg>) {
|
||||
return arg.ScalarReg();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::VectorReg>) {
|
||||
return arg.VectorReg();
|
||||
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
||||
return arg.StringLiteral();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
||||
return arg.Patch();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <auto func, bool is_first_arg_inst, bool has_dest, size_t... I>
|
||||
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
|
||||
using Traits = Common::FuncTraits<decltype(func)>;
|
||||
if constexpr (has_dest) {
|
||||
if constexpr (is_first_arg_inst) {
|
||||
func(ctx, inst, ctx.Def(inst),
|
||||
Arg<typename Traits::template ArgType<I + 3>>(ctx, inst->Arg(I))...);
|
||||
} else {
|
||||
func(ctx, ctx.Def(inst),
|
||||
Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
||||
}
|
||||
} else {
|
||||
if constexpr (is_first_arg_inst) {
|
||||
func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
||||
} else {
|
||||
func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <auto func, bool has_dest>
|
||||
void Invoke(EmitContext& ctx, IR::Inst* inst) {
|
||||
using Traits = Common::FuncTraits<decltype(func)>;
|
||||
static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
|
||||
if constexpr (Traits::NUM_ARGS == 1) {
|
||||
Invoke<func, false, false>(ctx, inst, std::make_index_sequence<0>{});
|
||||
} else {
|
||||
using FirstArgType = typename Traits::template ArgType<1>;
|
||||
static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
|
||||
static constexpr size_t num_inst_args = Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1);
|
||||
if constexpr (num_inst_args > 0 && has_dest) {
|
||||
Invoke<func, is_first_arg_inst, true>(ctx, inst,
|
||||
std::make_index_sequence<num_inst_args - 1>{});
|
||||
} else {
|
||||
Invoke<func, is_first_arg_inst, false>(ctx, inst,
|
||||
std::make_index_sequence<num_inst_args>{});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
||||
switch (inst->GetOpcode()) {
|
||||
#define OPCODE(name, result_type, ...) \
|
||||
case IR::Opcode::name: \
|
||||
Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); \
|
||||
return;
|
||||
#include "shader_recompiler/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode());
|
||||
}
|
||||
|
||||
static bool IsLastInst(const IR::AbstractSyntaxList& list, IR::AbstractSyntaxList::const_iterator it) {
|
||||
for (; it != list.end(); ++it) {
|
||||
switch (it->type) {
|
||||
case IR::AbstractSyntaxNode::Type::Return:
|
||||
case IR::AbstractSyntaxNode::Type::Loop:
|
||||
case IR::AbstractSyntaxNode::Type::EndIf:
|
||||
continue;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Traverse(EmitContext& ctx, const IR::Program& program) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
for (auto it = program.syntax_list.begin(); it != program.syntax_list.end(); ++it) {
|
||||
const IR::AbstractSyntaxNode& node = *it;
|
||||
ctx.ResetTempRegs();
|
||||
switch (node.type) {
|
||||
case IR::AbstractSyntaxNode::Type::Block: {
|
||||
IR::Block* block = node.data.block;
|
||||
c.L(ctx.BlockLabel(block));
|
||||
for (IR::Inst& inst : *block) {
|
||||
ctx.ResetTempRegs();
|
||||
EmitInst(ctx, &inst);
|
||||
}
|
||||
const auto& phi_assignments = ctx.PhiAssignments(block);
|
||||
if (phi_assignments) {
|
||||
for (const auto& [phi, value] : phi_assignments->get()) {
|
||||
MovValue(ctx, ctx.Def(phi), value);
|
||||
}
|
||||
}
|
||||
if (ctx.EndFlag() && IsLastInst(program.syntax_list, it)) {
|
||||
c.jmp(ctx.EndLabel());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::AbstractSyntaxNode::Type::If: {
|
||||
IR::Inst* ref = node.data.if_node.cond.InstRecursive();
|
||||
Label& merge = ctx.BlockLabel(node.data.if_node.merge);
|
||||
EmitCondition(ctx, ref, merge, true);
|
||||
break;
|
||||
}
|
||||
case IR::AbstractSyntaxNode::Type::Repeat: {
|
||||
IR::Inst* ref = node.data.repeat.cond.InstRecursive();
|
||||
Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header);
|
||||
EmitCondition(ctx, ref, loop_header, false);
|
||||
break;
|
||||
}
|
||||
case IR::AbstractSyntaxNode::Type::Break: {
|
||||
IR::Inst* ref = node.data.break_node.cond.InstRecursive();
|
||||
Label& merge = ctx.BlockLabel(node.data.break_node.merge);
|
||||
EmitCondition(ctx, ref, merge, true);
|
||||
c.jz(merge);
|
||||
break;
|
||||
}
|
||||
case IR::AbstractSyntaxNode::Type::Unreachable: {
|
||||
c.int3();
|
||||
break;
|
||||
}
|
||||
case IR::AbstractSyntaxNode::Type::Return:
|
||||
case IR::AbstractSyntaxNode::Type::Loop:
|
||||
case IR::AbstractSyntaxNode::Type::EndIf:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) {
|
||||
EmitContext context(program, c);
|
||||
Traverse(context, program);
|
||||
context.Code().L(context.EndLabel());
|
||||
context.Epilogue();
|
||||
}
|
||||
|
||||
void EmitPhi(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitVoid(EmitContext&) {}
|
||||
|
||||
void EmitIdentity(EmitContext& ctx) {
|
||||
throw NotImplementedException("Forward identity declaration");
|
||||
}
|
||||
|
||||
void EmitConditionRef(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitReference(EmitContext&) {}
|
||||
|
||||
void EmitPhiMove(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetScc(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetExec(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVcc(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccHi(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetM0(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetScc(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetExec(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVcc(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccHi(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetM0(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
15
src/shader_recompiler/backend/asm_x64/emit_x64.h
Normal file
15
src/shader_recompiler/backend/asm_x64/emit_x64.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c);
|
||||
|
||||
} // namespace Shader::Backend::X64
|
138
src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp
Normal file
138
src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp
Normal file
|
@ -0,0 +1,138 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
|
||||
void EmitSharedAtomicIAdd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicIAdd32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicUMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicUMax32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicSMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicSMax32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicUMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicUMin32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicSMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicSMin32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicAnd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicAnd32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicOr32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicOr32");
|
||||
}
|
||||
|
||||
void EmitSharedAtomicXor32(EmitContext& ctx) {
|
||||
throw NotImplementedException("SharedAtomicXor32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicIAdd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicIAdd32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicSMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicSMin32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicUMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicUMin32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicSMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicSMax32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicUMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicUMax32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicInc32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicInc32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicDec32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicDec32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicAnd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicAnd32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicOr32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicOr32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicXor32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicXor32");
|
||||
}
|
||||
|
||||
void EmitBufferAtomicSwap32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BufferAtomicSwap32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicIAdd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicIAdd32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicSMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicSMin32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicUMin32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicUMin32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicSMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicSMax32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicUMax32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicUMax32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicInc32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicInc32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicDec32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicDec32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicAnd32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicAnd32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicOr32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicOr32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicXor32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicXor32");
|
||||
}
|
||||
|
||||
void EmitImageAtomicExchange32(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageAtomicExchange32");
|
||||
}
|
||||
|
||||
void EmitDataAppend(EmitContext& ctx) {
|
||||
throw NotImplementedException("DataAppend");
|
||||
}
|
||||
|
||||
void EmitDataConsume(EmitContext& ctx) {
|
||||
throw NotImplementedException("DataConsume");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
20
src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp
Normal file
20
src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp
Normal file
|
@ -0,0 +1,20 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitBarrier(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
|
@ -0,0 +1,204 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
// We handle 16-bit floats in general purpose registers
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (src[0].IsMem()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (dest[0].IsMem()) {
|
||||
ctx.Code().movd(dest[0].Mem(), src[0].Xmm());
|
||||
} else {
|
||||
ctx.Code().movd(dword[rsp - 4], src[0].Xmm());
|
||||
MovGP(ctx, dest[0], dword[rsp - 4]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (src[0].IsMem()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (dest[0].IsMem()) {
|
||||
ctx.Code().movq(dest[0].Mem(), src[0].Xmm());
|
||||
} else {
|
||||
ctx.Code().movq(qword[rsp - 8], src[0].Xmm());
|
||||
MovGP(ctx, dest[0], qword[rsp - 8]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].IsMem()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (src[0].IsMem()) {
|
||||
ctx.Code().movd(dest[0].Xmm(), src[0].Mem());
|
||||
} else {
|
||||
MovGP(ctx, dword[rsp - 4], src[0]);
|
||||
ctx.Code().movd(dest[0].Xmm(), dword[rsp - 4]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].IsMem()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (src[0].IsMem()) {
|
||||
ctx.Code().movq(dest[0].Xmm(), src[0].Mem());
|
||||
} else {
|
||||
MovGP(ctx, qword[rsp - 8], src[0]);
|
||||
ctx.Code().mov(dest[0].Xmm(), qword[rsp - 8]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
const bool is_mem = dest[0].IsMem() && (src[0].IsMem() || src[1].IsMem());
|
||||
Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, src[1]);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
ctx.Code().or_(tmp, src[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg src0 = src[0].IsMem() ? ctx.TempGPReg() : src[0].Reg();
|
||||
MovGP(ctx, src0, src[0]);
|
||||
Reg dest1 = dest[1].IsMem() ? ctx.TempGPReg() : dest[1].Reg().changeBit(64);
|
||||
MovGP(ctx, dest1, src0);
|
||||
ctx.Code().shr(dest1, 32);
|
||||
MovGP(ctx, dest[1], dest1);
|
||||
MovGP(ctx, dest[0], src0.cvt32());
|
||||
}
|
||||
|
||||
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, src[0]);
|
||||
ctx.Code().pinsrd(tmp, src[1].Op(), 1);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitPackUnorm2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUnorm2x16");
|
||||
}
|
||||
|
||||
void EmitUnpackUnorm2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUnorm2x16");
|
||||
}
|
||||
|
||||
void EmitPackSnorm2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSnorm2x16");
|
||||
}
|
||||
|
||||
void EmitUnpackSnorm2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSnorm2x16");
|
||||
}
|
||||
|
||||
void EmitPackUint2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUint2x16");
|
||||
}
|
||||
|
||||
void EmitUnpackUint2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUint2x16");
|
||||
}
|
||||
|
||||
void EmitPackSint2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSint2x16");
|
||||
}
|
||||
|
||||
void EmitUnpackSint2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSint2x16");
|
||||
}
|
||||
|
||||
void EmitPackHalf2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackHalf2x16");
|
||||
}
|
||||
|
||||
void EmitUnpackHalf2x16(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackHalf2x16");
|
||||
}
|
||||
|
||||
void EmitPackUnorm4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUnorm4x8");
|
||||
}
|
||||
|
||||
void EmitUnpackUnorm4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUnorm4x8");
|
||||
}
|
||||
|
||||
void EmitPackSnorm4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSnorm4x8");
|
||||
}
|
||||
|
||||
void EmitUnpackSnorm4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSnorm4x8");
|
||||
}
|
||||
|
||||
void EmitPackUint4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUint4x8");
|
||||
}
|
||||
|
||||
void EmitUnpackUint4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUint4x8");
|
||||
}
|
||||
|
||||
void EmitPackSint4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSint4x8");
|
||||
}
|
||||
|
||||
void EmitUnpackSint4x8(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSint4x8");
|
||||
}
|
||||
|
||||
void EmitPackUfloat10_11_11(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUfloat10_11_11");
|
||||
}
|
||||
|
||||
void EmitUnpackUfloat10_11_11(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUfloat10_11_11");
|
||||
}
|
||||
|
||||
void EmitPackUnorm2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUnorm2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitUnpackUnorm2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUnorm2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitPackSnorm2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSnorm2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitUnpackSnorm2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSnorm2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitPackUint2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackUint2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitUnpackUint2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackUint2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitPackSint2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("PackSint2_10_10_10");
|
||||
}
|
||||
|
||||
void EmitUnpackSint2_10_10_10(EmitContext& ctx) {
|
||||
throw NotImplementedException("UnpackSint2_10_10_10");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
350
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
Normal file
350
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
Normal file
|
@ -0,0 +1,350 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace {
|
||||
|
||||
template <u32 N>
|
||||
static const OperandHolder& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) {
|
||||
if (index < N) {
|
||||
return comp1[index];
|
||||
} else {
|
||||
return comp2[index - N];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
MovGP(ctx, dest[2], src3[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
MovGP(ctx, dest[2], src3[0]);
|
||||
MovGP(ctx, dest[3], src4[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
MovGP(ctx, dest[2], src1[1]);
|
||||
MovGP(ctx, dest[3], src2[1]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
if (index == 0) {
|
||||
MovGP(ctx, dest[0], object[0]);
|
||||
MovGP(ctx, dest[1], composite[1]);
|
||||
} else {
|
||||
MovGP(ctx, dest[0], composite[0]);
|
||||
MovGP(ctx, dest[1], object[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 3; ++i) {
|
||||
if (i == index) {
|
||||
MovGP(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovGP(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 3; ++i) {
|
||||
if (i == index) {
|
||||
MovGP(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovGP(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
|
||||
MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
|
||||
MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
|
||||
MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
MovGP(ctx, dest[2], src3[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
|
||||
MovGP(ctx, dest[0], src1[0]);
|
||||
MovGP(ctx, dest[1], src2[0]);
|
||||
MovGP(ctx, dest[2], src3[0]);
|
||||
MovGP(ctx, dest[3], src4[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovGP(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
if (index == 0) {
|
||||
MovGP(ctx, dest[0], object[0]);
|
||||
MovGP(ctx, dest[1], composite[1]);
|
||||
} else {
|
||||
MovGP(ctx, dest[0], composite[0]);
|
||||
MovGP(ctx, dest[1], object[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 3; ++i) {
|
||||
if (i == index) {
|
||||
MovGP(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovGP(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
if (i == index) {
|
||||
MovGP(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovGP(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
|
||||
MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
|
||||
MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
|
||||
MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
|
||||
MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
|
||||
MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovFloat(ctx, dest[0], src1[0]);
|
||||
MovFloat(ctx, dest[1], src2[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
|
||||
MovFloat(ctx, dest[0], src1[0]);
|
||||
MovFloat(ctx, dest[1], src2[0]);
|
||||
MovFloat(ctx, dest[2], src3[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
|
||||
MovFloat(ctx, dest[0], src1[0]);
|
||||
MovFloat(ctx, dest[1], src2[0]);
|
||||
MovFloat(ctx, dest[2], src3[0]);
|
||||
MovFloat(ctx, dest[3], src4[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovFloat(ctx, dest[0], src1[0]);
|
||||
MovFloat(ctx, dest[1], src2[0]);
|
||||
MovFloat(ctx, dest[2], src1[1]);
|
||||
MovFloat(ctx, dest[3], src2[1]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovFloat(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovFloat(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovFloat(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
if (index == 0) {
|
||||
MovFloat(ctx, dest[0], object[0]);
|
||||
MovFloat(ctx, dest[1], composite[1]);
|
||||
} else {
|
||||
MovFloat(ctx, dest[0], composite[0]);
|
||||
MovFloat(ctx, dest[1], object[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 3; ++i) {
|
||||
if (i == index) {
|
||||
MovFloat(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovFloat(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
if (i == index) {
|
||||
MovFloat(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovFloat(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
|
||||
MovFloat(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
|
||||
MovFloat(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
|
||||
MovFloat(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
|
||||
MovFloat(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
|
||||
MovFloat(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
|
||||
MovFloat(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
|
||||
MovFloat(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
|
||||
MovFloat(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
|
||||
MovFloat(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) {
|
||||
MovDouble(ctx, dest[0], src1[0]);
|
||||
MovDouble(ctx, dest[1], src2[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) {
|
||||
MovDouble(ctx, dest[0], src1[0]);
|
||||
MovDouble(ctx, dest[1], src2[0]);
|
||||
MovDouble(ctx, dest[2], src3[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) {
|
||||
MovDouble(ctx, dest[0], src1[0]);
|
||||
MovDouble(ctx, dest[1], src2[0]);
|
||||
MovDouble(ctx, dest[2], src3[0]);
|
||||
MovDouble(ctx, dest[3], src4[0]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovDouble(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovDouble(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) {
|
||||
MovDouble(ctx, dest[0], composite[index]);
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
if (index == 0) {
|
||||
MovDouble(ctx, dest[0], object[0]);
|
||||
MovDouble(ctx, dest[1], composite[1]);
|
||||
} else {
|
||||
MovDouble(ctx, dest[0], composite[0]);
|
||||
MovDouble(ctx, dest[1], object[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 3; ++i) {
|
||||
if (i == index) {
|
||||
MovDouble(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovDouble(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) {
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
if (i == index) {
|
||||
MovDouble(ctx, dest[i], object[0]);
|
||||
} else {
|
||||
MovDouble(ctx, dest[i], composite[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) {
|
||||
MovDouble(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1));
|
||||
MovDouble(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) {
|
||||
MovDouble(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1));
|
||||
MovDouble(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2));
|
||||
MovDouble(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3));
|
||||
}
|
||||
|
||||
void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) {
|
||||
MovDouble(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1));
|
||||
MovDouble(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2));
|
||||
MovDouble(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3));
|
||||
MovDouble(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4));
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
|
@ -0,0 +1,221 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) {
|
||||
const u32 offset = static_cast<u32>(reg) << 2;
|
||||
Reg tmp = ctx.TempGPReg();
|
||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
|
||||
MovGP( ctx, dest[0], dword[tmp]);
|
||||
}
|
||||
|
||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
|
||||
Reg tmp = ctx.TempGPReg();
|
||||
MovGP(ctx, tmp, offset[0]);
|
||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp * 4]);
|
||||
MovGP(ctx, dword[tmp], value[0]);
|
||||
}
|
||||
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetScalarRegister(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetScalarRegister(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVectorRegister(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVectorRegister(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetGotoVariable(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetGotoVariable(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64);
|
||||
Reg off_tmp = offset[0].IsMem() ? ctx.TempGPReg() : offset[0].Reg().changeBit(64);
|
||||
MovGP(ctx, tmp, base[1]);
|
||||
MovGP(ctx, off_tmp, offset[0]);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
ctx.Code().or_(tmp, base[0].Op());
|
||||
ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]);
|
||||
MovGP(ctx, dest[0], dword[tmp]);
|
||||
}
|
||||
|
||||
void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64);
|
||||
// Reconstruct base address
|
||||
Reg off_tmp = ctx.TempGPReg();
|
||||
MovGP(ctx, tmp, handle[1]);
|
||||
ctx.Code().and_(tmp, 0xFFF);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
MovGP(ctx, off_tmp.cvt32(), handle[0]);
|
||||
ctx.Code().and_(off_tmp.cvt32(), 0xFFFFFFFF);
|
||||
ctx.Code().or_(tmp, off_tmp);
|
||||
// TODO: we should correctly clamp the offset
|
||||
MovGP(ctx, off_tmp, offset[0]);
|
||||
ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]);
|
||||
MovGP(ctx, dest[0], dword[tmp]);
|
||||
|
||||
}
|
||||
|
||||
void EmitReadStepRate(EmitContext& ctx) {
|
||||
throw NotImplementedException("ReadStepRate");
|
||||
}
|
||||
|
||||
void EmitGetAttribute(EmitContext& ctx, const Operands& dest) {
|
||||
LOG_WARNING(Render_Recompiler, "GetAttribute stubbed, setting to 0.0");
|
||||
if (dest[0].IsMem()) {
|
||||
ctx.Code().mov(dest[0].Mem(), 0);
|
||||
} else {
|
||||
ctx.Code().pxor(dest[0].Xmm(), dest[0].Xmm());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest) {
|
||||
LOG_WARNING(Render_Recompiler, "GetAttributeU32 stubbed, setting to 0");
|
||||
if (dest[0].IsMem()) {
|
||||
ctx.Code().mov(dest[0].Mem(), 0);
|
||||
} else {
|
||||
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetAttribute");
|
||||
}
|
||||
|
||||
void EmitGetTessGenericAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetTessGenericAttribute");
|
||||
}
|
||||
|
||||
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("ReadTcsGenericOuputAttribute");
|
||||
}
|
||||
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetTcsGenericAttribute");
|
||||
}
|
||||
|
||||
void EmitGetPatch(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetPatch");
|
||||
}
|
||||
|
||||
void EmitSetPatch(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetPatch");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU8(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU8");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU16(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU16");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x2");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x3");
|
||||
}
|
||||
|
||||
void EmitLoadBufferU32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x4");
|
||||
}
|
||||
|
||||
void EmitLoadBufferF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32");
|
||||
}
|
||||
|
||||
void EmitLoadBufferF32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x2");
|
||||
}
|
||||
|
||||
void EmitLoadBufferF32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x3");
|
||||
}
|
||||
|
||||
void EmitLoadBufferF32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x4");
|
||||
}
|
||||
|
||||
void EmitLoadBufferFormatF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferFormatF32");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU8(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU8");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU16(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU16");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x2");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x3");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x4");
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32");
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x2");
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x3");
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x4");
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferFormatF32");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
279
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
Normal file
279
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
Normal file
|
@ -0,0 +1,279 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
ctx.Code().and_(tmp_reg, 0xFFFF);
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp, src[0].Op());
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
ctx.Code().cvttsd2si(tmp, src[0].Op());
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
ctx.Code().cvttss2si(tmp, src[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
ctx.Code().cvttsd2si(tmp, src[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op());
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
|
||||
ctx.Code().cvttss2si(tmp, src[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
|
||||
ctx.Code().cvttsd2si(tmp, src[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), src[0].Op());
|
||||
}
|
||||
|
||||
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitInlineF16ToF32(ctx, dest[0].Op(), src[0].Op());
|
||||
}
|
||||
|
||||
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtsd2ss(tmp, src[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtss2sd(tmp, src[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0].Op());
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0].Op());
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovGP(ctx, tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
|
@ -0,0 +1,766 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
|
||||
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
ctx.Code().and_(dest[0].Op(), 0x7FFF);
|
||||
}
|
||||
|
||||
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg reg_tmp = ctx.TempXmmReg();
|
||||
Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().mov(reg_tmp, 0x7FFFFFFF);
|
||||
ctx.Code().movd(xmm_tmp, reg_tmp);
|
||||
ctx.Code().andps(xmm_tmp, src[0].Op());
|
||||
MovFloat(ctx, dest[0], xmm_tmp);
|
||||
}
|
||||
|
||||
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg reg_tmp = ctx.TempGPReg();
|
||||
Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF);
|
||||
ctx.Code().movq(xmm_tmp, reg_tmp);
|
||||
ctx.Code().andpd(xmm_tmp, src[0].Op());
|
||||
MovFloat(ctx, dest[0], xmm_tmp);
|
||||
}
|
||||
|
||||
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
|
||||
ctx.Code().addss(tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
|
||||
}
|
||||
|
||||
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().addss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().addsd(tmp, op2[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().subss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
Xmm tmp3 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp3, op3[0].Op());
|
||||
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp3);
|
||||
}
|
||||
|
||||
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm();
|
||||
MovFloat(ctx, tmp1, op3[0]);
|
||||
MovFloat(ctx, tmp2, op2[0]);
|
||||
ctx.Code().vfmadd132ss(tmp2, tmp1, op1[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp2);
|
||||
}
|
||||
|
||||
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm();
|
||||
MovDouble(ctx, tmp1, op3[0]);
|
||||
MovDouble(ctx, tmp2, op2[0]);
|
||||
ctx.Code().vfmadd132sd(tmp2, tmp1, op1[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp2);
|
||||
}
|
||||
|
||||
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
|
||||
if (is_legacy) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp1, op1[0].Op());
|
||||
MovFloat(ctx, tmp2, op1[0].Op());
|
||||
ctx.Code().maxss(tmp2, op2[0].Op());
|
||||
ctx.Code().cmpunordss(tmp1, tmp1);
|
||||
ctx.Code().andps(tmp1, op2[0].Op());
|
||||
ctx.Code().orps(tmp2, tmp1);
|
||||
MovFloat(ctx, dest[0], tmp2);
|
||||
} else {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().maxss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().maxsd(tmp, op2[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
|
||||
if (is_legacy) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp1, op1[0].Op());
|
||||
MovFloat(ctx, tmp2, op1[0].Op());
|
||||
ctx.Code().minss(tmp2, op2[0].Op());
|
||||
ctx.Code().cmpunordss(tmp1, tmp1);
|
||||
ctx.Code().andps(tmp1, op2[0].Op());
|
||||
ctx.Code().orps(tmp2, tmp1);
|
||||
MovFloat(ctx, dest[0], tmp2);
|
||||
} else {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().minss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().minsd(tmp, op2[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().minss(tmp, op2[0].Op());
|
||||
ctx.Code().minss(tmp, op3[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().maxss(tmp, op2[0].Op());
|
||||
ctx.Code().maxss(tmp, op3[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp2, op1[0]);
|
||||
ctx.Code().maxss(tmp2, op2[0].Op());
|
||||
ctx.Code().minss(tmp2, op3[0].Op());
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().minss(tmp, op2[0].Op());
|
||||
ctx.Code().maxss(tmp, tmp2);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0].Op());
|
||||
ctx.Code().mulss(tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
|
||||
}
|
||||
|
||||
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().mulss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().mulsd(tmp, op2[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().divss(tmp, op2[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().divsd(tmp, op2[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
MovGP(ctx, dest[0], op1[0]);
|
||||
ctx.Code().xor_(dest[0].Op(), 0x8000);
|
||||
}
|
||||
|
||||
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
ctx.Code().mov(tmp_reg, 0x80000000);
|
||||
ctx.Code().movd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().xorps(tmp_xmm, op1[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempXmmReg();
|
||||
ctx.Code().mov(tmp_reg, 0x8000000000000000);
|
||||
ctx.Code().movq(tmp_xmm, tmp_reg);
|
||||
ctx.Code().xorpd(tmp_xmm, op1[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
|
||||
}
|
||||
|
||||
void EmitFPSin(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSin");
|
||||
}
|
||||
|
||||
void EmitFPCos(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPCos");
|
||||
}
|
||||
|
||||
void EmitFPExp2(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPExp2");
|
||||
}
|
||||
|
||||
void EmitFPLdexp(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPLdexp");
|
||||
}
|
||||
|
||||
void EmitFPLog2(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPLog2");
|
||||
}
|
||||
|
||||
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().rcpss(tmp, op1[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempGPReg();
|
||||
ctx.Code().mov(tmp_reg, 1);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().divsd(tmp_xmm, op1[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().rsqrtss(tmp, op1[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempGPReg();
|
||||
ctx.Code().mov(tmp_reg, 1);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().divsd(tmp_xmm, op1[0].Op());
|
||||
ctx.Code().sqrtsd(tmp_xmm, tmp_xmm);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().sqrtss(tmp, op1[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPSaturate16(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate16");
|
||||
}
|
||||
|
||||
void EmitFPSaturate32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate32");
|
||||
}
|
||||
|
||||
void EmitFPSaturate64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate64");
|
||||
}
|
||||
|
||||
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
Xmm tmp3 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, min[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp3, max[0].Op());
|
||||
ctx.Code().maxss(tmp1, tmp2);
|
||||
ctx.Code().minss(tmp1, tmp3);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1);
|
||||
}
|
||||
|
||||
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op[0]);
|
||||
ctx.Code().maxss(tmp, min[0].Op());
|
||||
ctx.Code().minss(tmp, max[0].Op());
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op[0]);
|
||||
ctx.Code().maxsd(tmp, min[0].Op());
|
||||
ctx.Code().minsd(tmp, max[0].Op());
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
|
||||
ctx.Code().roundss(tmp, tmp, 0x00);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundss(tmp, op1[0].Op(), 0x00);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundsd(tmp, op1[0].Op(), 0x00);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
|
||||
ctx.Code().roundss(tmp, tmp, 0x01);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundss(tmp, op1[0].Op(), 0x01);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundsd(tmp, op1[0].Op(), 0x01);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0].Op());
|
||||
ctx.Code().roundss(tmp, tmp, 0x02);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundss(tmp, op1[0].Op(), 0x02);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
ctx.Code().roundsd(tmp, op1[0].Op(), 0x02);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPTrunc16(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg();
|
||||
Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, op[0].Op());
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPTrunc32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempGPReg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp_reg, op[0].Op());
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPTrunc64(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Reg tmp_reg = ctx.TempGPReg();
|
||||
ctx.Code().cvttsd2si(tmp_reg, op[0].Op());
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPFract32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp, op[0]);
|
||||
ctx.Code().roundss(tmp2, tmp, 0x01);
|
||||
ctx.Code().subss(tmp, tmp2);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFract64(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovDouble(ctx, tmp, op[0]);
|
||||
ctx.Code().roundsd(tmp2, tmp, 0x01);
|
||||
ctx.Code().subsd(tmp, tmp2);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFrexpSig32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpSig32");
|
||||
}
|
||||
|
||||
void EmitFPFrexpSig64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpSig64");
|
||||
}
|
||||
|
||||
void EmitFPFrexpExp32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpExp32");
|
||||
}
|
||||
|
||||
void EmitFPFrexpExp64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpExp64");
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().sete(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().sete(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().sete(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordNotEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordNotEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setne(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().setne(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().setne(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setb(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().setb(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().setb(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().seta(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().seta(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().seta(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setbe(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().setbe(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().setbe(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0].Op(), 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op());
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op());
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setae(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0].Op());
|
||||
ctx.Code().setae(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0].Op());
|
||||
ctx.Code().setae(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op[0].Op());
|
||||
ctx.Code().ucomiss(tmp, tmp);
|
||||
ctx.Code().setp(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovFloat(ctx, tmp, op[0]);
|
||||
ctx.Code().ucomiss(tmp, tmp);
|
||||
ctx.Code().setp(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm();
|
||||
MovDouble(ctx, tmp, op[0]);
|
||||
ctx.Code().ucomisd(tmp, tmp);
|
||||
ctx.Code().setp(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFPIsInf32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPIsInf32");
|
||||
}
|
||||
|
||||
void EmitFPIsInf64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPIsInf64");
|
||||
}
|
||||
|
||||
void EmitFPCmpClass32(EmitContext&) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
62
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
Normal file
62
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitImageSampleRaw(EmitContext& ctx) {
|
||||
// We can reach this here. We done resource tracking pass yet.
|
||||
throw NotImplementedException("ImageSampleRaw");
|
||||
}
|
||||
|
||||
void EmitImageSampleImplicitLod(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageSampleImplicitLod");
|
||||
}
|
||||
|
||||
void EmitImageSampleExplicitLod(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageSampleExplicitLod");
|
||||
}
|
||||
|
||||
void EmitImageSampleDrefImplicitLod(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageSampleDrefImplicitLod");
|
||||
}
|
||||
|
||||
void EmitImageSampleDrefExplicitLod(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageSampleDrefExplicitLod");
|
||||
}
|
||||
|
||||
void EmitImageGather(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageGather");
|
||||
}
|
||||
|
||||
void EmitImageGatherDref(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageGatherDref");
|
||||
}
|
||||
|
||||
void EmitImageQueryDimensions(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageQueryDimensions");
|
||||
}
|
||||
|
||||
void EmitImageQueryLod(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageQueryLod");
|
||||
}
|
||||
|
||||
void EmitImageGradient(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageGradient");
|
||||
}
|
||||
|
||||
void EmitImageRead(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageRead");
|
||||
}
|
||||
|
||||
void EmitImageWrite(EmitContext& ctx) {
|
||||
throw NotImplementedException("ImageWrite");
|
||||
}
|
||||
|
||||
void EmitCubeFaceIndex(EmitContext& ctx) {
|
||||
throw NotImplementedException("CubeFaceIndex");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
482
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
Normal file
482
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
Normal file
|
@ -0,0 +1,482 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
enum class Attribute : u64;
|
||||
enum class ScalarReg : u32;
|
||||
enum class Patch : u64;
|
||||
class Inst;
|
||||
class Value;
|
||||
} // namespace Shader::IR
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
// Microinstruction emitters
|
||||
void EmitPhi(EmitContext& ctx);
|
||||
void EmitVoid(EmitContext& ctx);
|
||||
void EmitIdentity(EmitContext& ctx);
|
||||
void EmitConditionRef(EmitContext& ctx);
|
||||
void EmitReference(EmitContext&);
|
||||
void EmitPhiMove(EmitContext&);
|
||||
void EmitJoin(EmitContext& ctx);
|
||||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
void EmitGetSccLo(EmitContext& ctx);
|
||||
void EmitGetVccLo(EmitContext& ctx);
|
||||
void EmitGetVccHi(EmitContext& ctx);
|
||||
void EmitGetM0(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
void EmitSetExec(EmitContext& ctx);
|
||||
void EmitSetVcc(EmitContext& ctx);
|
||||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitSetM0(EmitContext& ctx);
|
||||
void EmitFPCmpClass32(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
void EmitDiscardCond(EmitContext& ctx, const Operands& condition);
|
||||
void EmitDebugPrint(EmitContext& ctx);
|
||||
void EmitBarrier(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg);
|
||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value);
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitGetScalarRegister(EmitContext& ctx);
|
||||
void EmitSetScalarRegister(EmitContext& ctx);
|
||||
void EmitGetVectorRegister(EmitContext& ctx);
|
||||
void EmitSetVectorRegister(EmitContext& ctx);
|
||||
void EmitSetGotoVariable(EmitContext& ctx);
|
||||
void EmitGetGotoVariable(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset);
|
||||
void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset);
|
||||
void EmitLoadBufferU8(EmitContext& ctx);
|
||||
void EmitLoadBufferU16(EmitContext& ctx);
|
||||
void EmitLoadBufferU32(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x2(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x3(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x4(EmitContext& ctx);
|
||||
void EmitLoadBufferF32(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x2(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x3(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x4(EmitContext& ctx);
|
||||
void EmitLoadBufferFormatF32(EmitContext& ctx);
|
||||
void EmitStoreBufferU8(EmitContext& ctx);
|
||||
void EmitStoreBufferU16(EmitContext& ctx);
|
||||
void EmitStoreBufferU32(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx);
|
||||
void EmitStoreBufferF32(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx);
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx);
|
||||
void EmitBufferAtomicIAdd32(EmitContext& ctx);
|
||||
void EmitBufferAtomicSMin32(EmitContext& ctx);
|
||||
void EmitBufferAtomicUMin32(EmitContext& ctx);
|
||||
void EmitBufferAtomicSMax32(EmitContext& ctx);
|
||||
void EmitBufferAtomicUMax32(EmitContext& ctx);
|
||||
void EmitBufferAtomicInc32(EmitContext& ctx);
|
||||
void EmitBufferAtomicDec32(EmitContext& ctx);
|
||||
void EmitBufferAtomicAnd32(EmitContext& ctx);
|
||||
void EmitBufferAtomicOr32(EmitContext& ctx);
|
||||
void EmitBufferAtomicXor32(EmitContext& ctx);
|
||||
void EmitBufferAtomicSwap32(EmitContext& ctx);
|
||||
void EmitGetAttribute(EmitContext& ctx, const Operands& dest);
|
||||
void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest);
|
||||
void EmitSetAttribute(EmitContext& ctx);
|
||||
void EmitGetTessGenericAttribute(EmitContext& ctx);
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx);
|
||||
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx);
|
||||
void EmitGetPatch(EmitContext& ctx);
|
||||
void EmitSetPatch(EmitContext& ctx);
|
||||
void EmitSetFragColor(EmitContext& ctx);
|
||||
void EmitSetSampleMask(EmitContext& ctx);
|
||||
void EmitSetFragDepth(EmitContext& ctx);
|
||||
void EmitWorkgroupId(EmitContext& ctx);
|
||||
void EmitLocalInvocationId(EmitContext& ctx);
|
||||
void EmitInvocationId(EmitContext& ctx);
|
||||
void EmitInvocationInfo(EmitContext& ctx);
|
||||
void EmitSampleId(EmitContext& ctx);
|
||||
void EmitUndefU1(EmitContext& ctx);
|
||||
void EmitUndefU8(EmitContext& ctx);
|
||||
void EmitUndefU16(EmitContext& ctx);
|
||||
void EmitUndefU32(EmitContext& ctx);
|
||||
void EmitUndefU64(EmitContext& ctx);
|
||||
void EmitLoadSharedU32(EmitContext& ctx, const Operands& dest, const Operands& offset);
|
||||
void EmitLoadSharedU64(EmitContext& ctx, const Operands& dest, const Operands& offset);
|
||||
void EmitWriteSharedU32(EmitContext& ctx);
|
||||
void EmitWriteSharedU64(EmitContext& ctx);
|
||||
void EmitSharedAtomicIAdd32(EmitContext& ctx);
|
||||
void EmitSharedAtomicUMax32(EmitContext& ctx);
|
||||
void EmitSharedAtomicSMax32(EmitContext& ctx);
|
||||
void EmitSharedAtomicUMin32(EmitContext& ctx);
|
||||
void EmitSharedAtomicSMin32(EmitContext& ctx);
|
||||
void EmitSharedAtomicAnd32(EmitContext& ctx);
|
||||
void EmitSharedAtomicOr32(EmitContext& ctx);
|
||||
void EmitSharedAtomicXor32(EmitContext& ctx);
|
||||
void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
|
||||
void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
|
||||
void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
|
||||
void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
|
||||
void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
|
||||
void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
|
||||
void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
|
||||
void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
|
||||
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
|
||||
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
|
||||
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
|
||||
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
|
||||
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
|
||||
void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
|
||||
void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
|
||||
void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
|
||||
void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
|
||||
void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index);
|
||||
void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index);
|
||||
void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
|
||||
void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
|
||||
void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
|
||||
void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value);
|
||||
void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitPackUnorm2x16(EmitContext& ctx);
|
||||
void EmitUnpackUnorm2x16(EmitContext& ctx);
|
||||
void EmitPackSnorm2x16(EmitContext& ctx);
|
||||
void EmitUnpackSnorm2x16(EmitContext& ctx);
|
||||
void EmitPackUint2x16(EmitContext& ctx);
|
||||
void EmitUnpackUint2x16(EmitContext& ctx);
|
||||
void EmitPackSint2x16(EmitContext& ctx);
|
||||
void EmitUnpackSint2x16(EmitContext& ctx);
|
||||
void EmitPackHalf2x16(EmitContext& ctx);
|
||||
void EmitUnpackHalf2x16(EmitContext& ctx);
|
||||
void EmitPackUnorm4x8(EmitContext& ctx);
|
||||
void EmitUnpackUnorm4x8(EmitContext& ctx);
|
||||
void EmitPackSnorm4x8(EmitContext& ctx);
|
||||
void EmitUnpackSnorm4x8(EmitContext& ctx);
|
||||
void EmitPackUint4x8(EmitContext& ctx);
|
||||
void EmitUnpackUint4x8(EmitContext& ctx);
|
||||
void EmitPackSint4x8(EmitContext& ctx);
|
||||
void EmitUnpackSint4x8(EmitContext& ctx);
|
||||
void EmitPackUfloat10_11_11(EmitContext& ctx);
|
||||
void EmitUnpackUfloat10_11_11(EmitContext& ctx);
|
||||
void EmitPackUnorm2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackUnorm2_10_10_10(EmitContext& ctx);
|
||||
void EmitPackSnorm2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackSnorm2_10_10_10(EmitContext& ctx);
|
||||
void EmitPackUint2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackUint2_10_10_10(EmitContext& ctx);
|
||||
void EmitPackSint2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackSint2_10_10_10(EmitContext& ctx);
|
||||
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
|
||||
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
|
||||
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSin(EmitContext& ctx);
|
||||
void EmitFPCos(EmitContext& ctx);
|
||||
void EmitFPExp2(EmitContext& ctx);
|
||||
void EmitFPLdexp(EmitContext& ctx);
|
||||
void EmitFPLog2(EmitContext& ctx);
|
||||
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSaturate16(EmitContext& ctx);
|
||||
void EmitFPSaturate32(EmitContext& ctx);
|
||||
void EmitFPSaturate64(EmitContext& ctx);
|
||||
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPTrunc16(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFPTrunc32(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFPTrunc64(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFPFract32(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFPFract64(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFPFrexpSig32(EmitContext& ctx);
|
||||
void EmitFPFrexpSig64(EmitContext& ctx);
|
||||
void EmitFPFrexpExp32(EmitContext& ctx);
|
||||
void EmitFPFrexpExp64(EmitContext& ctx);
|
||||
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsInf32(EmitContext& ctx);
|
||||
void EmitFPIsInf64(EmitContext& ctx);
|
||||
void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitSMulExt(EmitContext& ctx);
|
||||
void EmitUMulExt(EmitContext& ctx);
|
||||
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
|
||||
void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
|
||||
void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);;
|
||||
void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);;
|
||||
void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
|
||||
void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);
|
||||
void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count);
|
||||
void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
|
||||
void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
|
||||
void EmitBitReverse32(EmitContext& ctx);
|
||||
void EmitBitCount32(EmitContext& ctx);
|
||||
void EmitBitCount64(EmitContext& ctx);
|
||||
void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitFindSMsb32(EmitContext& ctx);
|
||||
void EmitFindUMsb32(EmitContext& ctx);
|
||||
void EmitFindILsb32(EmitContext& ctx);
|
||||
void EmitFindILsb64(EmitContext& ctx);
|
||||
void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op);
|
||||
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
|
||||
void EmitImageSampleRaw(EmitContext& ctx);
|
||||
void EmitImageSampleImplicitLod(EmitContext& ctx);
|
||||
void EmitImageSampleExplicitLod(EmitContext& ctx);
|
||||
void EmitImageSampleDrefImplicitLod(EmitContext& ctx);
|
||||
void EmitImageSampleDrefExplicitLod(EmitContext& ctx);
|
||||
void EmitImageGather(EmitContext& ctx);
|
||||
void EmitImageGatherDref(EmitContext& ctx);
|
||||
void EmitImageQueryDimensions(EmitContext& ctx);
|
||||
void EmitImageQueryLod(EmitContext& ctx);
|
||||
void EmitImageGradient(EmitContext& ctx);
|
||||
void EmitImageRead(EmitContext& ctx);
|
||||
void EmitImageWrite(EmitContext& ctx);
|
||||
|
||||
void EmitImageAtomicIAdd32(EmitContext& ctx);
|
||||
void EmitImageAtomicSMin32(EmitContext& ctx);
|
||||
void EmitImageAtomicUMin32(EmitContext& ctx);
|
||||
void EmitImageAtomicSMax32(EmitContext& ctx);
|
||||
void EmitImageAtomicUMax32(EmitContext& ctx);
|
||||
void EmitImageAtomicInc32(EmitContext& ctx);
|
||||
void EmitImageAtomicDec32(EmitContext& ctx);
|
||||
void EmitImageAtomicAnd32(EmitContext& ctx);
|
||||
void EmitImageAtomicOr32(EmitContext& ctx);
|
||||
void EmitImageAtomicXor32(EmitContext& ctx);
|
||||
void EmitImageAtomicExchange32(EmitContext& ctx);
|
||||
void EmitCubeFaceIndex(EmitContext& ctx);
|
||||
void EmitLaneId(EmitContext& ctx);
|
||||
void EmitWarpId(EmitContext& ctx);
|
||||
void EmitQuadShuffle(EmitContext& ctx);
|
||||
void EmitReadFirstLane(EmitContext& ctx);
|
||||
void EmitReadLane(EmitContext& ctx);
|
||||
void EmitWriteLane(EmitContext& ctx);
|
||||
void EmitDataAppend(EmitContext& ctx);
|
||||
void EmitDataConsume(EmitContext& ctx);
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx);
|
||||
void EmitEmitPrimitive(EmitContext& ctx);
|
||||
|
||||
} // namespace Shader::Backend::X64
|
624
src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp
Normal file
624
src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp
Normal file
|
@ -0,0 +1,624 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace {
|
||||
|
||||
static bool IsReg(const OperandHolder& op, const Reg& reg) {
|
||||
return op.IsReg() && op.Reg().getIdx() == reg.getIdx();
|
||||
}
|
||||
|
||||
static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const OperandHolder& dest) {
|
||||
if (IsReg(dest, save)) {
|
||||
// Destination is reg, no need to save
|
||||
return false;
|
||||
}
|
||||
ctx.Code().push(save);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void EmitRestoreRegTemp(EmitContext ctx, const Reg& save) {
|
||||
ctx.Code().pop(save);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) {
|
||||
ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]);
|
||||
} else {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().add(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) {
|
||||
ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]);
|
||||
} else {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().add(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
OperandHolder carry = dest[1];
|
||||
carry.Op().setBit(1);
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().add(tmp.Op(), op2[0].Op());
|
||||
ctx.Code().setc(carry.Op());
|
||||
}
|
||||
|
||||
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().sub(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().sub(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSMulExt(EmitContext& ctx) {
|
||||
throw NotImplementedException("SMulExtended");
|
||||
}
|
||||
|
||||
void EmitUMulExt(EmitContext& ctx) {
|
||||
throw NotImplementedException("UMulExtended");
|
||||
}
|
||||
|
||||
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().imul(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().imul(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
|
||||
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
|
||||
OperandHolder tmp = op2[0];
|
||||
while (IsReg(tmp, rax)) {
|
||||
tmp = ctx.TempGPReg().cvt32();
|
||||
}
|
||||
MovGP(ctx, tmp, op2[0]);
|
||||
MovGP(ctx, eax, op1[0]);
|
||||
ctx.Code().idiv(tmp.Op());
|
||||
MovGP(ctx, dest[0], eax);
|
||||
if (rdx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rdx);
|
||||
}
|
||||
if (rax_saved) {
|
||||
EmitRestoreRegTemp(ctx, rax);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
|
||||
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
|
||||
OperandHolder tmp = op2[0];
|
||||
while (IsReg(tmp, rax)) {
|
||||
tmp = ctx.TempGPReg().cvt32();
|
||||
}
|
||||
MovGP(ctx, tmp, op2[0]);
|
||||
MovGP(ctx, eax, op1[0]);
|
||||
ctx.Code().div(tmp.Op());
|
||||
MovGP(ctx, dest[0], eax);
|
||||
if (rdx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rdx);
|
||||
}
|
||||
if (rax_saved) {
|
||||
EmitRestoreRegTemp(ctx, rax);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
|
||||
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
|
||||
OperandHolder tmp = op2[0];
|
||||
while (IsReg(tmp, rax)) {
|
||||
tmp = ctx.TempGPReg().cvt32();
|
||||
}
|
||||
MovGP(ctx, tmp, op2[0]);
|
||||
MovGP(ctx, eax, op1[0]);
|
||||
ctx.Code().idiv(tmp.Op());
|
||||
MovGP(ctx, dest[0], edx);
|
||||
if (rdx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rdx);
|
||||
}
|
||||
if (rax_saved) {
|
||||
EmitRestoreRegTemp(ctx, rax);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]);
|
||||
bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]);
|
||||
OperandHolder tmp = op2[0];
|
||||
while (IsReg(tmp, rax)) {
|
||||
tmp = ctx.TempGPReg().cvt32();
|
||||
}
|
||||
MovGP(ctx, tmp, op2[0]);
|
||||
MovGP(ctx, eax, op1[0]);
|
||||
ctx.Code().div(tmp.Op());
|
||||
MovGP(ctx, dest[0], edx);
|
||||
if (rdx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rdx);
|
||||
}
|
||||
if (rax_saved) {
|
||||
EmitRestoreRegTemp(ctx, rax);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
MovGP(ctx, dest[0], op[0]);
|
||||
ctx.Code().neg(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
MovGP(ctx, dest[0], op[0]);
|
||||
ctx.Code().neg(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Label done;
|
||||
MovGP(ctx, dest[0], op[0]);
|
||||
ctx.Code().cmp(dest[0].Op(), 0);
|
||||
ctx.Code().jns(done);
|
||||
ctx.Code().neg(dest[0].Op());
|
||||
ctx.Code().L(done);
|
||||
}
|
||||
|
||||
void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().shl(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().shl(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().shr(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().shr(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().sar(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, shift[0]);
|
||||
ctx.Code().sar(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().and_(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().and_(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().or_(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().or_(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().xor_(tmp.Op(), op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
Reg mask = ctx.TempGPReg().cvt32();
|
||||
Reg tmp2 = ctx.TempGPReg().cvt32();
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, count[0]);
|
||||
MovGP(ctx, tmp2, insert[0]);
|
||||
ctx.Code().mov(mask, 1);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().sub(mask, 1);
|
||||
MovGP(ctx, cl, offset[0]);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().shl(tmp2, cl);
|
||||
ctx.Code().and_(tmp2, mask);
|
||||
ctx.Code().not_(mask);
|
||||
ctx.Code().and_(tmp.Op(), mask);
|
||||
ctx.Code().or_(tmp.Op(), tmp2);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
Reg mask = ctx.TempGPReg().cvt32();
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, count[0]);
|
||||
ctx.Code().mov(mask, 1);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().sub(mask, 1);
|
||||
MovGP(ctx, cl, offset[0]);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().and_(tmp.Op(), mask);
|
||||
ctx.Code().shr(tmp.Op(), cl);
|
||||
ctx.Code().mov(ecx, 0x20);
|
||||
ctx.Code().sub(ecx, count[0].Op());
|
||||
ctx.Code().shl(tmp.Op(), cl);
|
||||
ctx.Code().sar(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count) {
|
||||
bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]);
|
||||
OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0];
|
||||
Reg mask = ctx.TempGPReg().cvt32();
|
||||
MovGP(ctx, tmp, base[0]);
|
||||
MovGP(ctx, cl, count[0]);
|
||||
ctx.Code().mov(mask, 1);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().sub(mask, 1);
|
||||
MovGP(ctx, cl, offset[0]);
|
||||
ctx.Code().shl(mask, cl);
|
||||
ctx.Code().and_(tmp.Op(), mask);
|
||||
ctx.Code().shr(tmp.Op(), cl);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
if (rcx_saved) {
|
||||
EmitRestoreRegTemp(ctx, rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitReverse32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BitReverse32");
|
||||
}
|
||||
|
||||
void EmitBitCount32(EmitContext& ctx) {
|
||||
throw NotImplementedException("BitCount32");
|
||||
}
|
||||
|
||||
void EmitBitCount64(EmitContext& ctx) {
|
||||
throw NotImplementedException("BitCount64");
|
||||
}
|
||||
|
||||
void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
MovGP(ctx, dest[0], op[0]);
|
||||
ctx.Code().not_(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitFindSMsb32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FindSMsb32");
|
||||
}
|
||||
|
||||
void EmitFindUMsb32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FindUMsb32");
|
||||
}
|
||||
|
||||
void EmitFindILsb32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FindILsb32");
|
||||
}
|
||||
|
||||
void EmitFindILsb64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FindILsb64");
|
||||
}
|
||||
|
||||
void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovg(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmova(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovl(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovb(tmp, op2[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovg(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, op3[0].Op());
|
||||
ctx.Code().cmovg(tmp, op3[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmova(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, op3[0].Op());
|
||||
ctx.Code().cmova(tmp, op3[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovl(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, op3[0].Op());
|
||||
ctx.Code().cmovl(tmp, op3[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovb(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, op3[0].Op());
|
||||
ctx.Code().cmovb(tmp, op3[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
Reg tmp2 = ctx.TempGPReg().cvt32();
|
||||
MovGP(ctx, tmp2, op1[0]);
|
||||
ctx.Code().cmp(tmp2, op2[0].Op());
|
||||
ctx.Code().cmovl(tmp2, op2[0].Op());
|
||||
ctx.Code().cmp(tmp2, op3[0].Op());
|
||||
ctx.Code().cmovg(tmp2, op3[0].Op());
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmovg(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, tmp);
|
||||
ctx.Code().cmovl(tmp, tmp2);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
Reg tmp2 = ctx.TempGPReg().cvt32();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().cmp(tmp, op2[0].Op());
|
||||
ctx.Code().cmova(tmp, op2[0].Op());
|
||||
ctx.Code().cmp(tmp, op3[0].Op());
|
||||
ctx.Code().cmovb(tmp, op3[0].Op());
|
||||
MovGP(ctx, tmp2, op1[0]);
|
||||
ctx.Code().cmp(tmp2, op2[0].Op());
|
||||
ctx.Code().cmovb(tmp2, op2[0].Op());
|
||||
ctx.Code().cmp(tmp2, tmp);
|
||||
ctx.Code().cmova(tmp2, tmp);
|
||||
MovGP(ctx, dest[0], tmp2);
|
||||
}
|
||||
|
||||
void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, value[0]);
|
||||
ctx.Code().cmp(tmp, min[0].Op());
|
||||
ctx.Code().cmovl(tmp, min[0].Op());
|
||||
ctx.Code().cmp(tmp, max[0].Op());
|
||||
ctx.Code().cmovg(tmp, max[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) {
|
||||
Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg();
|
||||
MovGP(ctx, tmp, value[0]);
|
||||
ctx.Code().cmp(tmp, min[0].Op());
|
||||
ctx.Code().cmovb(tmp, min[0].Op());
|
||||
ctx.Code().cmp(tmp, max[0].Op());
|
||||
ctx.Code().cmova(tmp, max[0].Op());
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setl(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setl(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setb(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setb(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().sete(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().sete(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setle(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setbe(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setg(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().seta(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setne(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setne(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setge(dest[0].Op());
|
||||
}
|
||||
|
||||
void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0];
|
||||
MovGP(ctx, tmp, lhs[0]);
|
||||
ctx.Code().cmp(tmp.Op(), rhs[0].Op());
|
||||
ctx.Code().setae(dest[0].Op());
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
42
src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp
Normal file
42
src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp
Normal file
|
@ -0,0 +1,42 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().or_(tmp.Op(), op2[0].Op());
|
||||
ctx.Code().and_(tmp.Op(), 1);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().and_(tmp.Op(), op2[0].Op());
|
||||
ctx.Code().and_(tmp.Op(), 1);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0];
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().xor_(tmp.Op(), op2[0].Op());
|
||||
ctx.Code().and_(tmp.Op(), 1);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
MovGP(ctx, dest[0], op[0]);
|
||||
ctx.Code().not_(dest[0].Op());
|
||||
ctx.Code().and_(dest[0].Op(), 1);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
71
src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp
Normal file
71
src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp
Normal file
|
@ -0,0 +1,71 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
Label false_label, end_label;
|
||||
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
|
||||
MovGP(ctx, tmp, cond[0]);
|
||||
ctx.Code().test(tmp, tmp);
|
||||
ctx.Code().jz(false_label);
|
||||
MovGP(ctx, dest[0], true_value[0]);
|
||||
ctx.Code().jmp(end_label);
|
||||
ctx.Code().L(false_label);
|
||||
MovGP(ctx, dest[0], false_value[0]);
|
||||
ctx.Code().L(end_label);
|
||||
}
|
||||
|
||||
void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
EmitSelectU1(ctx, dest, cond, true_value, false_value);
|
||||
}
|
||||
|
||||
void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
EmitSelectU1(ctx, dest, cond, true_value, false_value);
|
||||
}
|
||||
|
||||
void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
EmitSelectU1(ctx, dest, cond, true_value, false_value);
|
||||
}
|
||||
|
||||
void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
EmitSelectU1(ctx, dest, cond, true_value, false_value);
|
||||
}
|
||||
|
||||
void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
EmitSelectU1(ctx, dest, cond, true_value, false_value);
|
||||
}
|
||||
|
||||
void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
Label false_label, end_label;
|
||||
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
|
||||
MovGP(ctx, tmp, cond[0]);
|
||||
ctx.Code().test(tmp, tmp);
|
||||
ctx.Code().jz(false_label);
|
||||
MovFloat(ctx, dest[0], true_value[0]);
|
||||
ctx.Code().jmp(end_label);
|
||||
ctx.Code().L(false_label);
|
||||
MovFloat(ctx, dest[0], false_value[0]);
|
||||
ctx.Code().L(end_label);
|
||||
}
|
||||
|
||||
void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) {
|
||||
Label false_label, end_label;
|
||||
Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8();
|
||||
MovGP(ctx, tmp, cond[0]);
|
||||
ctx.Code().test(tmp, tmp);
|
||||
ctx.Code().jz(false_label);
|
||||
MovDouble(ctx, dest[0], true_value[0]);
|
||||
ctx.Code().jmp(end_label);
|
||||
ctx.Code().L(false_label);
|
||||
MovDouble(ctx, dest[0], false_value[0]);
|
||||
ctx.Code().L(end_label);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
|
@ -0,0 +1,39 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitLoadSharedU32(EmitContext& ctx, const Operands& dest, const Operands& offset) {
|
||||
LOG_WARNING(Render_Recompiler, "EmitLoadSharedU32 stubbed, setting to 0");
|
||||
if (dest[0].IsMem()) {
|
||||
ctx.Code().mov(dest[0].Mem(), 0);
|
||||
} else {
|
||||
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitLoadSharedU64(EmitContext& ctx, const Operands& dest, const Operands& offset) {
|
||||
LOG_WARNING(Render_Recompiler, "EmitLoadSharedU64 stubbed, setting to 0");
|
||||
if (dest[0].IsMem()) {
|
||||
ctx.Code().mov(dest[0].Mem(), 0);
|
||||
} else {
|
||||
ctx.Code().xor_(dest[0].Reg(), dest[0].Reg());
|
||||
}
|
||||
if (dest[1].IsMem()) {
|
||||
ctx.Code().mov(dest[1].Mem(), 0);
|
||||
} else {
|
||||
ctx.Code().xor_(dest[1].Reg(), dest[1].Reg());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("WriteSharedU32");
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx) {
|
||||
throw NotImplementedException("WriteSharedU64");
|
||||
}
|
||||
}
|
55
src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp
Normal file
55
src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitPrologue(EmitContext& ctx) {
|
||||
ctx.Prologue();
|
||||
}
|
||||
|
||||
void ConvertDepthMode(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void ConvertPositionToClipSpace(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitEpilogue(EmitContext& ctx) {
|
||||
ctx.SetEndFlag();
|
||||
}
|
||||
|
||||
void EmitDiscard(EmitContext& ctx) {
|
||||
ctx.SetEndFlag();
|
||||
}
|
||||
|
||||
void EmitDiscardCond(EmitContext& ctx, const Operands& condition) {
|
||||
Reg tmp = condition[0].IsMem() ? ctx.TempGPReg().cvt8() : condition[0].Reg().cvt8();
|
||||
MovGP(ctx, tmp, condition[0]);
|
||||
ctx.Code().test(tmp, tmp);
|
||||
ctx.Code().jnz(ctx.EndLabel());
|
||||
}
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitEmitPrimitive(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitEndPrimitive(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitDebugPrint(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
28
src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp
Normal file
28
src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitUndefU1(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("x64 Instruction");
|
||||
}
|
||||
|
||||
void EmitUndefU8(EmitContext&) {
|
||||
UNREACHABLE_MSG("x64 Instruction");
|
||||
}
|
||||
|
||||
void EmitUndefU16(EmitContext&) {
|
||||
UNREACHABLE_MSG("x64 Instruction");
|
||||
}
|
||||
|
||||
void EmitUndefU32(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("x64 Instruction");
|
||||
}
|
||||
|
||||
void EmitUndefU64(EmitContext&) {
|
||||
UNREACHABLE_MSG("x64 Instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
32
src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp
Normal file
32
src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp
Normal file
|
@ -0,0 +1,32 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
void EmitWarpId(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitLaneId(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitQuadShuffle(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitReadFirstLane(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitReadLane(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
void EmitWriteLane(EmitContext& ctx) {
|
||||
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
372
src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp
Normal file
372
src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp
Normal file
|
@ -0,0 +1,372 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
EmitContext::EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_)
|
||||
: program(program_), code(code_) {
|
||||
for (IR::Block* block : program.blocks) {
|
||||
block_labels[block] = {};
|
||||
}
|
||||
AllocateRegisters();
|
||||
}
|
||||
|
||||
Reg64& EmitContext::TempGPReg(bool reserve) {
|
||||
ASSERT(temp_gp_reg_index < temp_gp_regs.size());
|
||||
u64 idx = temp_gp_reg_index;
|
||||
if (reserve) {
|
||||
temp_gp_reg_index++;
|
||||
}
|
||||
Reg64& reg = temp_gp_regs[idx];
|
||||
if (idx > num_scratch_gp_regs &&
|
||||
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
|
||||
preserved_regs.push_back(reg);
|
||||
code.sub(rsp, 8);
|
||||
code.mov(ptr[rsp], reg);
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
Xmm& EmitContext::TempXmmReg(bool reserve) {
|
||||
ASSERT(temp_xmm_reg_index < temp_xmm_regs.size());
|
||||
u64 idx = temp_xmm_reg_index;
|
||||
if (reserve) {
|
||||
temp_xmm_reg_index++;
|
||||
}
|
||||
Xmm& reg = temp_xmm_regs[idx];
|
||||
if (idx > num_scratch_xmm_regs &&
|
||||
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
|
||||
preserved_regs.push_back(reg);
|
||||
code.sub(rsp, 16);
|
||||
code.movups(ptr[rsp], reg);
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
void EmitContext::PopTempGPReg() {
|
||||
ASSERT(temp_gp_reg_index > 0);
|
||||
temp_gp_reg_index--;
|
||||
}
|
||||
|
||||
void EmitContext::PopTempXmmReg() {
|
||||
ASSERT(temp_xmm_reg_index > 0);
|
||||
temp_xmm_reg_index--;
|
||||
}
|
||||
|
||||
void EmitContext::ResetTempRegs() {
|
||||
temp_gp_reg_index = 0;
|
||||
temp_xmm_reg_index = 0;
|
||||
}
|
||||
|
||||
const Operands& EmitContext::Def(IR::Inst* inst) {
|
||||
return inst_to_operands.at(inst);
|
||||
}
|
||||
|
||||
Operands EmitContext::Def(const IR::Value& value) {
|
||||
if (!value.IsImmediate()) {
|
||||
return Def(value.InstRecursive());
|
||||
}
|
||||
Operands operands;
|
||||
Reg64& tmp = TempGPReg(false);
|
||||
switch (value.Type()) {
|
||||
case IR::Type::U1:
|
||||
operands.push_back(TempGPReg().cvt8());
|
||||
code.mov(operands.back().Reg(), value.U1());
|
||||
break;
|
||||
case IR::Type::U8:
|
||||
operands.push_back(TempGPReg().cvt8());
|
||||
code.mov(operands.back().Reg(), value.U8());
|
||||
break;
|
||||
case IR::Type::U16:
|
||||
operands.push_back(TempGPReg().cvt16());
|
||||
code.mov(operands.back().Reg(), value.U16());
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
operands.push_back(TempGPReg().cvt32());
|
||||
code.mov(operands.back().Reg(), value.U32());
|
||||
break;
|
||||
case IR::Type::F32: {
|
||||
code.mov(tmp.cvt32(), std::bit_cast<u32>(value.F32()));
|
||||
Xmm& xmm32 = TempXmmReg();
|
||||
code.movd(xmm32, tmp.cvt32());
|
||||
operands.push_back(xmm32);
|
||||
break;
|
||||
}
|
||||
case IR::Type::U64:
|
||||
operands.push_back(TempGPReg());
|
||||
code.mov(operands.back().Reg(), value.U64());
|
||||
break;
|
||||
case IR::Type::F64: {
|
||||
code.mov(tmp, std::bit_cast<u64>(value.F64()));
|
||||
Xmm& xmm64 = TempXmmReg();
|
||||
code.movq(xmm64, tmp);
|
||||
operands.push_back(xmm64);
|
||||
break;
|
||||
}
|
||||
case IR::Type::ScalarReg:
|
||||
operands.push_back(TempGPReg().cvt32());
|
||||
code.mov(operands.back().Reg(), std::bit_cast<u32>(value.ScalarReg()));
|
||||
break;
|
||||
case IR::Type::VectorReg:
|
||||
operands.push_back(TempXmmReg().cvt32());
|
||||
code.mov(operands.back().Reg(), std::bit_cast<u32>(value.VectorReg()));
|
||||
break;
|
||||
case IR::Type::Attribute:
|
||||
operands.push_back(TempGPReg());
|
||||
code.mov(operands.back().Reg(), std::bit_cast<u64>(value.Attribute()));
|
||||
break;
|
||||
case IR::Type::Patch:
|
||||
operands.push_back(TempGPReg());
|
||||
code.mov(operands.back().Reg(), std::bit_cast<u64>(value.Patch()));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported value type: {}", IR::NameOf(value.Type()));
|
||||
break;
|
||||
}
|
||||
return operands;
|
||||
}
|
||||
|
||||
std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
|
||||
EmitContext::PhiAssignments(IR::Block* block) const {
|
||||
auto it = phi_assignments.find(block);
|
||||
if (it != phi_assignments.end()) {
|
||||
return std::cref(it->second);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void EmitContext::Prologue() {
|
||||
if (inst_stack_space > 0) {
|
||||
code.sub(rsp, inst_stack_space);
|
||||
code.mov(r11, rsp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::Epilogue() {
|
||||
for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) {
|
||||
Reg& reg = *it;
|
||||
if (reg.isMMX()) {
|
||||
code.movups(reg.cvt128(), ptr[rsp]);
|
||||
code.add(rsp, 16);
|
||||
} else {
|
||||
code.mov(reg, ptr[rsp]);
|
||||
code.add(rsp, 8);
|
||||
}
|
||||
}
|
||||
preserved_regs.clear();
|
||||
if (inst_stack_space > 0) {
|
||||
code.add(rsp, inst_stack_space);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
|
||||
ActiveIntervalList& active_intervals) {
|
||||
const auto get_operand = [&](IR::Inst* inst) -> Address {
|
||||
size_t current_sp = inst_stack_space;
|
||||
inst_stack_space += 8;
|
||||
switch (GetRegBytesOfType(IR::Value(inst))) {
|
||||
case 1:
|
||||
return byte[r11 + current_sp];
|
||||
case 2:
|
||||
return word[r11 + current_sp];
|
||||
case 4:
|
||||
return dword[r11 + current_sp];
|
||||
case 8:
|
||||
return qword[r11 + current_sp];
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported register size: {}", GetRegBytesOfType(inst));
|
||||
return ptr[r11 + current_sp];
|
||||
}
|
||||
};
|
||||
auto spill_candidate = std::max_element(
|
||||
active_intervals.begin(), active_intervals.end(),
|
||||
[](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; });
|
||||
if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) {
|
||||
inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst);
|
||||
} else {
|
||||
Operands& operands = inst_to_operands[spill_candidate->inst];
|
||||
OperandHolder op = operands[spill_candidate->component];
|
||||
inst_to_operands[interval.inst][interval.component] =
|
||||
op.IsXmm() ? op : ResizeRegToType(op.Reg(), interval.inst);
|
||||
operands[spill_candidate->component] = get_operand(spill_candidate->inst);
|
||||
*spill_candidate = interval;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& insts) {
|
||||
IR::Inst* inst = interval.inst;
|
||||
size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst));
|
||||
interval.start = dist;
|
||||
interval.end = dist;
|
||||
const auto enlarge_interval = [&](IR::Inst* inst) {
|
||||
size_t position = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst));
|
||||
interval.start = std::min(interval.start, position);
|
||||
interval.end = std::max(interval.end, position);
|
||||
};
|
||||
for (const auto& use : inst->Uses()) {
|
||||
IR::Inst* target_inst = use.user;
|
||||
if (use.user->GetOpcode() == IR::Opcode::Phi) {
|
||||
// We assign the value at the end of the phi block
|
||||
target_inst = &use.user->PhiBlock(use.operand)->back();
|
||||
}
|
||||
// If the user is in a loop and the instruction is not, we need to extend the interval
|
||||
// to the end of the loop
|
||||
u32 target_depth = inst->GetParent()->CondData().depth;
|
||||
const auto* cond_data = &target_inst->GetParent()->CondData();
|
||||
const IR::AbstractSyntaxNode* target_loop = nullptr;
|
||||
while (cond_data && cond_data->depth > target_depth) {
|
||||
if (cond_data->asl_node->type == IR::AbstractSyntaxNode::Type::Loop) {
|
||||
target_loop = cond_data->asl_node;
|
||||
}
|
||||
cond_data = cond_data->parent;
|
||||
}
|
||||
if (target_loop) {
|
||||
IR::Block* cont_block = target_loop->data.loop.continue_block;
|
||||
target_inst = &cont_block->back();
|
||||
ASSERT(target_inst->GetOpcode() == IR::Opcode::ConditionRef);
|
||||
}
|
||||
enlarge_interval(target_inst);
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Phi) {
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
IR::Block* block = inst->PhiBlock(i);
|
||||
enlarge_interval(&block->back());
|
||||
phi_assignments[block].emplace_back(inst, inst->Arg(i));
|
||||
}
|
||||
// Extend to predecessors
|
||||
// Phis in loop headers need to extend to the end of the loop
|
||||
for (IR::Block* pred : inst->GetParent()->ImmPredecessors()) {
|
||||
IR::Inst* last_inst = &pred->back();
|
||||
if (last_inst->GetOpcode() == IR::Opcode::ConditionRef) {
|
||||
enlarge_interval(last_inst);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Rregister utilization:
|
||||
// Instruction registers:
|
||||
// General purpose registers: rcx, rdx, rsi, r8, r9, r10
|
||||
// XMM registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
||||
//
|
||||
// Value / temporary registers:
|
||||
// General purpose registers: rax (scratch), rbx, r12, r13, r14, r15
|
||||
// XMM registers: xmm7 (scratch), xmm7 (scratch), xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
|
||||
// xmm15
|
||||
//
|
||||
// r11: Stack pointer for spilled instructions
|
||||
// rdi: User data pointer
|
||||
// rsp: Stack pointer
|
||||
//
|
||||
// If instruction registers are never used, will be used as temporary registers
|
||||
void EmitContext::AllocateRegisters() {
|
||||
const std::array<Reg64, 6> initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10};
|
||||
const std::array<Xmm, 7> initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6};
|
||||
const std::array<Reg64, 6> initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15};
|
||||
const std::array<Xmm, 9> initial_xmm_temp_regs = {xmm7, xmm8, xmm9, xmm10, xmm11,
|
||||
xmm12, xmm13, xmm14, xmm15};
|
||||
|
||||
boost::container::small_vector<InstInterval, 64> intervals;
|
||||
FlatInstList insts;
|
||||
// We copy insts tot the flat list for faster iteration
|
||||
for (IR::Block* block : program.blocks) {
|
||||
insts.reserve(insts.size() + block->size());
|
||||
for (IR::Inst& inst : *block) {
|
||||
insts.push_back(&inst);
|
||||
}
|
||||
}
|
||||
for (IR::Inst* inst : insts) {
|
||||
if (inst->GetOpcode() == IR::Opcode::ConditionRef || inst->Type() == IR::Type::Void) {
|
||||
continue;
|
||||
}
|
||||
intervals.emplace_back(inst, 0, 0);
|
||||
AdjustInstInterval(intervals.back(), insts);
|
||||
}
|
||||
std::sort(intervals.begin(), intervals.end(),
|
||||
[](const InstInterval& a, const InstInterval& b) { return a.start < b.start; });
|
||||
RegAllocContext ctx;
|
||||
ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_inst_regs.begin(),
|
||||
initial_gp_inst_regs.end());
|
||||
ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_inst_regs.begin(),
|
||||
initial_xmm_inst_regs.end());
|
||||
boost::container::static_vector<Reg64, 6> unused_gp_inst_regs;
|
||||
boost::container::static_vector<Xmm, 7> unused_xmm_inst_regs;
|
||||
unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(),
|
||||
ctx.free_gp_regs.end());
|
||||
unused_xmm_inst_regs.insert(unused_xmm_inst_regs.end(), ctx.free_xmm_regs.begin(),
|
||||
ctx.free_xmm_regs.end());
|
||||
for (const InstInterval& interval : intervals) {
|
||||
// Free old interval resources
|
||||
for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) {
|
||||
if (it->end < interval.start) {
|
||||
Reg64 reg = inst_to_operands[it->inst][it->component].Reg().cvt64();
|
||||
ctx.free_gp_regs.push_back(reg);
|
||||
it = ctx.active_gp_intervals.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) {
|
||||
if (it->end < interval.start) {
|
||||
Xmm reg = inst_to_operands[it->inst][it->component].Xmm();
|
||||
ctx.free_xmm_regs.push_back(reg);
|
||||
it = ctx.active_xmm_intervals.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
u8 num_components = GetNumComponentsOfType(interval.inst);
|
||||
bool is_floating = IsFloatingType(interval.inst);
|
||||
auto& operands = inst_to_operands[interval.inst];
|
||||
operands.resize(num_components);
|
||||
if (is_floating) {
|
||||
for (size_t i = 0; i < num_components; ++i) {
|
||||
ActiveInstInterval active(interval, i);
|
||||
if (!ctx.free_xmm_regs.empty()) {
|
||||
Xmm& reg = ctx.free_xmm_regs.back();
|
||||
ctx.free_xmm_regs.pop_back();
|
||||
operands[active.component] = reg;
|
||||
unused_xmm_inst_regs.erase(
|
||||
std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg),
|
||||
unused_xmm_inst_regs.end());
|
||||
ctx.active_xmm_intervals.push_back(active);
|
||||
} else {
|
||||
SpillInst(ctx, active, ctx.active_xmm_intervals);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < num_components; ++i) {
|
||||
ActiveInstInterval active(interval, i);
|
||||
if (!ctx.free_gp_regs.empty()) {
|
||||
Reg64& reg = ctx.free_gp_regs.back();
|
||||
ctx.free_gp_regs.pop_back();
|
||||
operands[active.component] = ResizeRegToType(reg, active.inst);
|
||||
unused_gp_inst_regs.erase(
|
||||
std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg),
|
||||
unused_gp_inst_regs.end());
|
||||
ctx.active_gp_intervals.push_back(active);
|
||||
} else {
|
||||
SpillInst(ctx, active, ctx.active_gp_intervals);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
temp_gp_regs.insert(temp_gp_regs.end(), unused_gp_inst_regs.begin(), unused_gp_inst_regs.end());
|
||||
temp_xmm_regs.insert(temp_xmm_regs.end(), unused_xmm_inst_regs.begin(),
|
||||
unused_xmm_inst_regs.end());
|
||||
num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch
|
||||
num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch
|
||||
temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_temp_regs.begin(),
|
||||
initial_gp_temp_regs.end());
|
||||
temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_temp_regs.begin(),
|
||||
initial_xmm_temp_regs.end());
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
199
src/shader_recompiler/backend/asm_x64/x64_emit_context.h
Normal file
199
src/shader_recompiler/backend/asm_x64/x64_emit_context.h
Normal file
|
@ -0,0 +1,199 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/flat_map.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
class OperandHolder {
|
||||
public:
|
||||
OperandHolder() : op() {}
|
||||
OperandHolder(const OperandHolder&) = default;
|
||||
OperandHolder(OperandHolder&&) = default;
|
||||
OperandHolder& operator=(const OperandHolder&) = default;
|
||||
OperandHolder& operator=(OperandHolder&&) = default;
|
||||
|
||||
OperandHolder(const Xbyak::Reg& reg_) : reg(reg_) {}
|
||||
OperandHolder(const Xbyak::Xmm& xmm_) : xmm(xmm_) {}
|
||||
OperandHolder(const Xbyak::Address& mem_) : mem(mem_) {}
|
||||
OperandHolder(const Xbyak::Operand& op_) : op(op_) {}
|
||||
|
||||
[[nodiscard]] inline Xbyak::Operand& Op() {
|
||||
return op;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline const Xbyak::Operand& Op() const {
|
||||
return op;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline Xbyak::Reg& Reg() {
|
||||
ASSERT(IsReg());
|
||||
return reg;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline const Xbyak::Reg& Reg() const {
|
||||
ASSERT(IsReg());
|
||||
return reg;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline Xbyak::Xmm& Xmm() {
|
||||
ASSERT(IsXmm());
|
||||
return xmm;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline const Xbyak::Xmm& Xmm() const {
|
||||
ASSERT(IsXmm());
|
||||
return xmm;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline Xbyak::Address& Mem() {
|
||||
ASSERT(IsMem());
|
||||
return mem;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline const Xbyak::Address& Mem() const {
|
||||
ASSERT(IsMem());
|
||||
return mem;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool IsReg() const {
|
||||
return op.isREG();
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool IsXmm() const {
|
||||
return op.isXMM();
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool IsMem() const {
|
||||
return op.isMEM();
|
||||
}
|
||||
private:
|
||||
union {
|
||||
Xbyak::Operand op;
|
||||
Xbyak::Reg reg;
|
||||
Xbyak::Xmm xmm;
|
||||
Xbyak::Address mem;
|
||||
};
|
||||
};
|
||||
|
||||
using Operands = boost::container::static_vector<OperandHolder, 4>;
|
||||
|
||||
class EmitContext {
|
||||
public:
|
||||
static constexpr size_t NumGPRegs = 16;
|
||||
static constexpr size_t NumXmmRegs = 16;
|
||||
|
||||
using PhiAssignmentList = boost::container::small_vector<std::pair<IR::Inst*, IR::Value>, 4>;
|
||||
|
||||
EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_);
|
||||
|
||||
[[nodiscard]] Xbyak::CodeGenerator& Code() const {
|
||||
return code;
|
||||
}
|
||||
|
||||
[[nodiscard]] const IR::Program& Program() const {
|
||||
return program;
|
||||
}
|
||||
|
||||
[[nodiscard]] Xbyak::Label& EndLabel() {
|
||||
return end_label;
|
||||
}
|
||||
|
||||
[[nodiscard]] Xbyak::Label& BlockLabel(IR::Block* block) {
|
||||
return block_labels.at(block);
|
||||
}
|
||||
|
||||
void SetEndFlag() {
|
||||
end_flag = true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool EndFlag() {
|
||||
bool flag = end_flag;
|
||||
end_flag = false;
|
||||
return flag;
|
||||
}
|
||||
|
||||
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
|
||||
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
|
||||
void PopTempGPReg();
|
||||
void PopTempXmmReg();
|
||||
void ResetTempRegs();
|
||||
|
||||
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::rdi;}
|
||||
|
||||
[[nodiscard]] const Operands& Def(IR::Inst* inst);
|
||||
[[nodiscard]] Operands Def(const IR::Value& value);
|
||||
[[nodiscard]] std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
|
||||
PhiAssignments(IR::Block* block) const;
|
||||
|
||||
|
||||
void Prologue();
|
||||
void Epilogue();
|
||||
|
||||
private:
|
||||
struct InstInterval {
|
||||
IR::Inst* inst;
|
||||
size_t start;
|
||||
size_t end;
|
||||
};
|
||||
|
||||
struct ActiveInstInterval : InstInterval {
|
||||
size_t component;
|
||||
|
||||
ActiveInstInterval(const InstInterval& interval, size_t component_)
|
||||
: InstInterval(interval), component(component_) {}
|
||||
};
|
||||
using ActiveIntervalList = boost::container::small_vector<ActiveInstInterval, 8>;
|
||||
|
||||
struct RegAllocContext {
|
||||
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> free_gp_regs;
|
||||
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> free_xmm_regs;
|
||||
ActiveIntervalList active_gp_intervals;
|
||||
ActiveIntervalList active_xmm_intervals;
|
||||
};
|
||||
|
||||
using FlatInstList = boost::container::small_vector<IR::Inst*, 64>;
|
||||
|
||||
const IR::Program& program;
|
||||
Xbyak::CodeGenerator& code;
|
||||
|
||||
// Map of blocks to their phi assignments
|
||||
boost::container::small_flat_map<IR::Block*, PhiAssignmentList, 8> phi_assignments;
|
||||
|
||||
// Map of instructions to their operands
|
||||
boost::container::small_flat_map<IR::Inst*, Operands, 64> inst_to_operands;
|
||||
|
||||
// Space used for spilled instructions
|
||||
size_t inst_stack_space = 0;
|
||||
|
||||
// Temporary register allocation
|
||||
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> temp_gp_regs;
|
||||
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> temp_xmm_regs;
|
||||
size_t temp_gp_reg_index = 0;
|
||||
size_t temp_xmm_reg_index = 0;
|
||||
size_t num_scratch_gp_regs = 0;
|
||||
size_t num_scratch_xmm_regs = 0;
|
||||
|
||||
// Preseved registers
|
||||
boost::container::static_vector<Xbyak::Reg, NumGPRegs + NumXmmRegs> preserved_regs;
|
||||
|
||||
// Labels
|
||||
boost::container::small_flat_map<IR::Block*, Xbyak::Label, 8> block_labels;
|
||||
Xbyak::Label end_label;
|
||||
|
||||
// End flag, used to defer jump to end label
|
||||
bool end_flag = false;
|
||||
|
||||
void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
|
||||
ActiveIntervalList& active_intervals);
|
||||
void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts);
|
||||
void AllocateRegisters();
|
||||
};
|
||||
|
||||
} // namespace Shader::Backend::X64
|
403
src/shader_recompiler/backend/asm_x64/x64_utils.cpp
Normal file
403
src/shader_recompiler/backend/asm_x64/x64_utils.cpp
Normal file
|
@ -0,0 +1,403 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
bool IsFloatingType(const IR::Value& value) {
|
||||
// We store F16 on general purpose registers since we don't do
|
||||
// arithmetic on them
|
||||
IR::Type type = value.Type();
|
||||
return type == IR::Type::F32 || type == IR::Type::F64;
|
||||
}
|
||||
|
||||
size_t GetRegBytesOfType(const IR::Value& value) {
|
||||
switch (value.Type()) {
|
||||
case IR::Type::U1:
|
||||
case IR::Type::U8:
|
||||
return 1;
|
||||
case IR::Type::U16:
|
||||
case IR::Type::F16:
|
||||
case IR::Type::F16x2:
|
||||
case IR::Type::F16x3:
|
||||
case IR::Type::F16x4:
|
||||
return 2;
|
||||
case IR::Type::U32:
|
||||
case IR::Type::U32x2:
|
||||
case IR::Type::U32x3:
|
||||
case IR::Type::U32x4:
|
||||
case IR::Type::F32:
|
||||
case IR::Type::F32x2:
|
||||
case IR::Type::F32x3:
|
||||
case IR::Type::F32x4:
|
||||
case IR::Type::ScalarReg:
|
||||
case IR::Type::VectorReg:
|
||||
return 4;
|
||||
case IR::Type::U64:
|
||||
case IR::Type::F64:
|
||||
case IR::Type::F64x2:
|
||||
case IR::Type::F64x3:
|
||||
case IR::Type::F64x4:
|
||||
case IR::Type::Attribute:
|
||||
case IR::Type::Patch:
|
||||
return 8;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
u8 GetNumComponentsOfType(const IR::Value& value) {
|
||||
switch (value.Type()) {
|
||||
case IR::Type::U1:
|
||||
case IR::Type::U8:
|
||||
case IR::Type::U16:
|
||||
case IR::Type::F16:
|
||||
case IR::Type::U32:
|
||||
case IR::Type::F32:
|
||||
case IR::Type::U64:
|
||||
case IR::Type::F64:
|
||||
case IR::Type::ScalarReg:
|
||||
case IR::Type::VectorReg:
|
||||
case IR::Type::Attribute:
|
||||
case IR::Type::Patch:
|
||||
return 1;
|
||||
case IR::Type::U32x2:
|
||||
case IR::Type::F32x2:
|
||||
case IR::Type::F16x2:
|
||||
case IR::Type::F64x2:
|
||||
return 2;
|
||||
case IR::Type::U32x3:
|
||||
case IR::Type::F32x3:
|
||||
case IR::Type::F16x3:
|
||||
case IR::Type::F64x3:
|
||||
return 3;
|
||||
case IR::Type::U32x4:
|
||||
case IR::Type::F32x4:
|
||||
case IR::Type::F16x4:
|
||||
case IR::Type::F64x4:
|
||||
return 4;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reg ResizeRegToType(const Reg& reg, const IR::Value& value) {
|
||||
ASSERT(reg.getKind() == Operand::Kind::REG);
|
||||
switch (GetRegBytesOfType(value)) {
|
||||
case 1:
|
||||
return reg.cvt8();
|
||||
case 2:
|
||||
return reg.cvt16();
|
||||
case 4:
|
||||
return reg.cvt32();
|
||||
case 8:
|
||||
return reg.cvt64();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||
return reg;
|
||||
}
|
||||
|
||||
void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src.Op() == dst.Op()) {
|
||||
return;
|
||||
}
|
||||
if (src.IsMem() && dst.IsMem()) {
|
||||
Reg tmp = ctx.TempGPReg(false).cvt32();
|
||||
c.mov(tmp, src.Mem());
|
||||
c.mov(dst.Mem(), tmp);
|
||||
} else if (src.IsMem() && dst.IsXmm()) {
|
||||
c.movss(dst.Xmm(), src.Mem());
|
||||
} else if (src.IsXmm() && dst.IsMem()) {
|
||||
c.movss(dst.Mem(), src.Xmm());
|
||||
} else if (src.IsXmm() && dst.IsXmm()) {
|
||||
c.movaps(dst.Xmm(), src.Xmm());
|
||||
} else {
|
||||
UNREACHABLE_MSG("Unsupported mov float {} {}", src.Op().toString(), dst.Op().toString());
|
||||
}
|
||||
}
|
||||
|
||||
void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src.Op() == dst.Op()) {
|
||||
return;
|
||||
}
|
||||
if (src.IsMem() && dst.IsMem()) {
|
||||
const Reg64& tmp = ctx.TempGPReg(false);
|
||||
c.mov(tmp, src.Mem());
|
||||
c.mov(dst.Mem(), tmp);
|
||||
} else if (src.IsMem() && dst.IsXmm()) {
|
||||
c.movsd(dst.Xmm(), src.Mem());
|
||||
} else if (src.IsXmm() && dst.IsMem()) {
|
||||
c.movsd(dst.Mem(), src.Xmm());
|
||||
} else if (src.IsXmm() && dst.IsXmm()) {
|
||||
c.movapd(dst.Xmm(), src.Xmm());
|
||||
} else {
|
||||
UNREACHABLE_MSG("Unsupported mov double {} {}", src.Op().toString(), dst.Op().toString());
|
||||
}
|
||||
}
|
||||
|
||||
void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src.Op() == dst.Op()) {
|
||||
return;
|
||||
}
|
||||
const bool is_mem2mem = src.IsMem() && dst.IsMem();
|
||||
const u32 src_bit = src.Op().getBit();
|
||||
const u32 dst_bit = dst.Op().getBit();
|
||||
OperandHolder tmp = is_mem2mem ? ctx.TempGPReg(false).changeBit(dst_bit) : dst;
|
||||
if (src_bit < dst_bit) {
|
||||
if (!tmp.IsMem() && !src.Op().isBit(32)) {
|
||||
c.movzx(tmp.Reg(), src.Op());
|
||||
} else if (tmp.IsMem()) {
|
||||
Address addr = tmp.Mem();
|
||||
c.mov(addr, 0);
|
||||
addr.setBit(dst_bit);
|
||||
c.mov(addr, src.Reg());
|
||||
} else {
|
||||
c.mov(tmp.Reg().cvt32(), src.Op());
|
||||
}
|
||||
} else if (src_bit > dst_bit) {
|
||||
OperandHolder src_tmp = src;
|
||||
src_tmp.Op().setBit(dst_bit);
|
||||
c.mov(tmp.Op(), src_tmp.Op());
|
||||
} else {
|
||||
c.mov(tmp.Op(), src.Op());
|
||||
}
|
||||
if (is_mem2mem) {
|
||||
c.mov(dst.Op(), tmp.Op());
|
||||
}
|
||||
}
|
||||
|
||||
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
||||
if (!src.IsImmediate()) {
|
||||
IR::Inst* src_inst = src.InstRecursive();
|
||||
const Operands& src_op = ctx.Def(src_inst);
|
||||
if (IsFloatingType(src)) {
|
||||
switch (GetRegBytesOfType(src)) {
|
||||
case 32:
|
||||
for (size_t i = 0; i < src_op.size(); i++) {
|
||||
MovFloat(ctx, dst[i], src_op[i]);
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
for (size_t i = 0; i < src_op.size(); i++) {
|
||||
MovDouble(ctx, dst[i], src_op[i]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < src_op.size(); i++) {
|
||||
MovGP(ctx, dst[i], src_op[i]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
const bool is_mem = dst[0].IsMem();
|
||||
Reg64& tmp = ctx.TempGPReg(false);
|
||||
switch (src.Type()) {
|
||||
case IR::Type::U1:
|
||||
c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U1());
|
||||
break;
|
||||
case IR::Type::U8:
|
||||
c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U8());
|
||||
break;
|
||||
case IR::Type::U16:
|
||||
c.mov(is_mem ? tmp.cvt16() : dst[0].Reg(), src.U16());
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), src.U32());
|
||||
break;
|
||||
case IR::Type::F32:
|
||||
c.mov(tmp.cvt32(), static_cast<u32>(src.F32()));
|
||||
if (!is_mem) {
|
||||
c.movd(dst[0].Xmm(), tmp.cvt32());
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case IR::Type::U64:
|
||||
c.mov(is_mem ? tmp : dst[0].Reg(), src.U64());
|
||||
break;
|
||||
case IR::Type::F64:
|
||||
c.mov(tmp, static_cast<u64>(src.F64()));
|
||||
if (!is_mem) {
|
||||
c.movq(dst[0].Xmm(), tmp);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case IR::Type::ScalarReg:
|
||||
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast<u32>(src.ScalarReg()));
|
||||
break;
|
||||
case IR::Type::VectorReg:
|
||||
c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast<u32>(src.VectorReg()));
|
||||
break;
|
||||
case IR::Type::Attribute:
|
||||
c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast<u64>(src.Attribute()));
|
||||
break;
|
||||
case IR::Type::Patch:
|
||||
c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast<u64>(src.Patch()));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
|
||||
break;
|
||||
}
|
||||
if (is_mem) {
|
||||
c.mov(dst[0].Mem(), tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = ctx.TempGPReg().cvt32();
|
||||
|
||||
c.movzx(mantissa, src);
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(sign, 0x8000);
|
||||
c.shl(sign, 16);
|
||||
c.mov(exponent, mantissa);
|
||||
c.and_(exponent, 0x7C00);
|
||||
c.shr(exponent, 10);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
|
||||
// Check for zero exponent and mantissa
|
||||
c.test(exponent, exponent);
|
||||
c.jnz(nonzero_exp);
|
||||
c.test(mantissa, mantissa);
|
||||
c.jz(zero_mantissa);
|
||||
|
||||
// Nromalize subnormal number
|
||||
c.mov(exponent, 1);
|
||||
c.L(norm_loop);
|
||||
c.test(mantissa, 0x400);
|
||||
c.jnz(norm_done);
|
||||
c.shl(mantissa, 1);
|
||||
c.dec(exponent);
|
||||
c.jmp(norm_loop);
|
||||
c.L(norm_done);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
c.jmp(normal);
|
||||
|
||||
// Zero mantissa
|
||||
c.L(zero_mantissa);
|
||||
c.and_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Non-zero exponent
|
||||
c.L(nonzero_exp);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jne(normal);
|
||||
|
||||
// Infinite or NaN
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, 0x7F800000);
|
||||
c.jmp(done);
|
||||
|
||||
// Normal number
|
||||
c.L(normal);
|
||||
c.add(exponent, 112);
|
||||
c.shl(exponent, 23);
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, exponent);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.movd(dest.getReg().cvt128(), mantissa);
|
||||
}
|
||||
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
}
|
||||
|
||||
void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label zero_exp, underflow, overflow, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
|
||||
|
||||
if (src.isMEM()) {
|
||||
c.mov(mantissa, src);
|
||||
} else {
|
||||
c.movd(mantissa, src.getReg().cvt128());
|
||||
}
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(exponent, mantissa);
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(exponent, 0x7F800000);
|
||||
c.and_(mantissa, 0x007FFFFF);
|
||||
c.shr(exponent, 23);
|
||||
c.shl(mantissa, 3);
|
||||
c.shr(sign, 16);
|
||||
c.and_(sign, 0x8000);
|
||||
|
||||
// Subnormal numbers will be zero
|
||||
c.test(exponent, exponent);
|
||||
c.jz(zero_exp);
|
||||
|
||||
// Check for overflow and underflow
|
||||
c.sub(exponent, 112);
|
||||
c.cmp(exponent, 0);
|
||||
c.jle(underflow);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jge(overflow);
|
||||
|
||||
// Normal number
|
||||
c.shl(exponent, 10);
|
||||
c.shr(mantissa, 13);
|
||||
c.or_(mantissa, exponent);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Undeflow
|
||||
c.L(underflow);
|
||||
c.xor_(mantissa, mantissa);
|
||||
c.jmp(done);
|
||||
|
||||
// Overflow
|
||||
c.L(overflow);
|
||||
c.mov(mantissa, 0x7C00);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Zero value
|
||||
c.L(zero_exp);
|
||||
c.and_(mantissa, sign);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.and_(mantissa, 0xFFFF);
|
||||
}
|
||||
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
40
src/shader_recompiler/backend/asm_x64/x64_utils.h
Normal file
40
src/shader_recompiler/backend/asm_x64/x64_utils.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
bool IsFloatingType(const IR::Value& value);
|
||||
size_t GetRegBytesOfType(const IR::Value& value);
|
||||
u8 GetNumComponentsOfType(const IR::Value& value);
|
||||
Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, const IR::Value& value);
|
||||
void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
|
||||
void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
|
||||
void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src);
|
||||
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
|
||||
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||
|
||||
inline bool IsFloatingType(IR::Inst* inst) {
|
||||
return IsFloatingType(IR::Value(inst));
|
||||
}
|
||||
|
||||
inline size_t GetRegBytesOfType(IR::Inst* inst) {
|
||||
return GetRegBytesOfType(IR::Value(inst));
|
||||
}
|
||||
|
||||
inline u8 GetNumComponentsOfType(IR::Inst* inst) {
|
||||
return GetNumComponentsOfType(IR::Value(inst));
|
||||
}
|
||||
|
||||
inline Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Inst* inst) {
|
||||
return ResizeRegToType(reg, IR::Value(inst));
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
|
@ -128,6 +128,10 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
|||
return ud_reg;
|
||||
}
|
||||
|
||||
void EmitSetUserData(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
@ -163,13 +167,10 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
using BufferAlias = EmitContext::BufferAlias;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
const auto& srt_flatbuf = ctx.buffers.back();
|
||||
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
|
||||
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ void EmitBarrier(EmitContext& ctx);
|
|||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
||||
void EmitSetUserData(EmitContext& ctx);
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitGetScalarRegister(EmitContext& ctx);
|
||||
|
|
|
@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
|
|||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
const IR::U32 dword_offset = [&] -> IR::U32 {
|
||||
if (smrd.imm) {
|
||||
return smrd.offset;
|
||||
return ir.Imm32(smrd.offset);
|
||||
}
|
||||
if (smrd.offset == SQ_SRC_LITERAL) {
|
||||
return inst.src[1].code;
|
||||
return ir.Imm32(inst.src[1].code);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
|
||||
}();
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::Value base =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
|
||||
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -255,8 +255,9 @@ struct Info {
|
|||
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
|
||||
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
|
||||
if (srt_info.walker_func) {
|
||||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||
srt_info.walker_func(flattened_ud_buf.data());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
|
||||
|
|
44
src/shader_recompiler/ir/abstract_syntax_list.cpp
Normal file
44
src/shader_recompiler/ir/abstract_syntax_list.cpp
Normal file
|
@ -0,0 +1,44 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "abstract_syntax_list.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string DumpASLNode(const AbstractSyntaxNode& node,
|
||||
const std::map<const Block*, size_t>& block_to_index,
|
||||
const std::map<const Inst*, size_t>& inst_to_index) {
|
||||
switch (node.type) {
|
||||
case AbstractSyntaxNode::Type::Block:
|
||||
return fmt::format("Block: ${}", block_to_index.at(node.data.block));
|
||||
case AbstractSyntaxNode::Type::If:
|
||||
return fmt::format("If: cond = %{}, body = ${}, merge = ${}",
|
||||
inst_to_index.at(node.data.if_node.cond.Inst()),
|
||||
block_to_index.at(node.data.if_node.body),
|
||||
block_to_index.at(node.data.if_node.merge));
|
||||
case AbstractSyntaxNode::Type::EndIf:
|
||||
return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge));
|
||||
case AbstractSyntaxNode::Type::Loop:
|
||||
return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}",
|
||||
block_to_index.at(node.data.loop.body),
|
||||
block_to_index.at(node.data.loop.continue_block),
|
||||
block_to_index.at(node.data.loop.merge));
|
||||
case AbstractSyntaxNode::Type::Repeat:
|
||||
return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}",
|
||||
inst_to_index.at(node.data.repeat.cond.Inst()),
|
||||
block_to_index.at(node.data.repeat.loop_header),
|
||||
block_to_index.at(node.data.repeat.merge));
|
||||
case AbstractSyntaxNode::Type::Break:
|
||||
return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}",
|
||||
inst_to_index.at(node.data.break_node.cond.Inst()),
|
||||
block_to_index.at(node.data.break_node.merge),
|
||||
block_to_index.at(node.data.break_node.skip));
|
||||
case AbstractSyntaxNode::Type::Return:
|
||||
return "Return";
|
||||
case AbstractSyntaxNode::Type::Unreachable:
|
||||
return "Unreachable";
|
||||
};
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
|
@ -53,4 +54,8 @@ struct AbstractSyntaxNode {
|
|||
};
|
||||
using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
|
||||
|
||||
std::string DumpASLNode(const AbstractSyntaxNode& node,
|
||||
const std::map<const Block*, size_t>& block_to_index,
|
||||
const std::map<const Inst*, size_t>& inst_to_index);
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -23,6 +23,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base
|
|||
return instructions.insert(insertion_point, *inst);
|
||||
}
|
||||
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, u32 flags) {
|
||||
Inst* const inst{inst_pool->Create(op, flags)};
|
||||
inst->SetParent(this);
|
||||
return instructions.insert(insertion_point, *inst);
|
||||
}
|
||||
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
|
||||
std::initializer_list<Value> args, u32 flags) {
|
||||
Inst* const inst{inst_pool->Create(op, flags)};
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
|
@ -18,6 +19,12 @@ namespace Shader::IR {
|
|||
|
||||
class Block {
|
||||
public:
|
||||
struct ConditionalData {
|
||||
u32 depth;
|
||||
const ConditionalData* parent;
|
||||
const AbstractSyntaxNode* asl_node;
|
||||
};
|
||||
|
||||
using InstructionList = boost::intrusive::list<Inst>;
|
||||
using size_type = InstructionList::size_type;
|
||||
using iterator = InstructionList::iterator;
|
||||
|
@ -40,6 +47,9 @@ public:
|
|||
/// Prepends a copy of an instruction to this basic block before the insertion point.
|
||||
iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
|
||||
|
||||
/// Prepends a new instruction to this basic block before the insertion point (without args).
|
||||
iterator PrependNewInst(iterator insertion_point, Opcode op, u32 flags);
|
||||
|
||||
/// Prepends a new instruction to this basic block before the insertion point.
|
||||
iterator PrependNewInst(iterator insertion_point, Opcode op,
|
||||
std::initializer_list<Value> args = {}, u32 flags = 0);
|
||||
|
@ -64,6 +74,24 @@ public:
|
|||
[[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
|
||||
return imm_successors;
|
||||
}
|
||||
// Returns if the block has a given immediate predecessor.
|
||||
[[nodiscard]] bool HasImmPredecessor(const Block* block) const noexcept {
|
||||
return std::ranges::find(imm_predecessors, block) != imm_predecessors.end();
|
||||
}
|
||||
// Returns if the block has a given immediate successor.
|
||||
[[nodiscard]] bool HasImmSuccessor(const Block* block) const noexcept {
|
||||
return std::ranges::find(imm_successors, block) != imm_successors.end();
|
||||
}
|
||||
|
||||
// Set the conditional data for this block.
|
||||
void SetConditionalData(const ConditionalData& data) {
|
||||
cond_data = data;
|
||||
}
|
||||
|
||||
// Get the conditional data for this block.
|
||||
[[nodiscard]] const ConditionalData& CondData() const {
|
||||
return cond_data;
|
||||
}
|
||||
|
||||
/// Intrusively store the host definition of this instruction.
|
||||
template <typename T>
|
||||
|
@ -164,6 +192,9 @@ private:
|
|||
/// Block immediate successors
|
||||
std::vector<Block*> imm_successors;
|
||||
|
||||
// Conditional data
|
||||
Block::ConditionalData cond_data;
|
||||
|
||||
/// Intrusively store if the block is sealed in the SSA pass.
|
||||
bool is_ssa_sealed{false};
|
||||
|
||||
|
|
94
src/shader_recompiler/ir/compute_value/compute.cpp
Normal file
94
src/shader_recompiler/ir/compute_value/compute.cpp
Normal file
|
@ -0,0 +1,94 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_bitcast.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_composite.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_convert.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_float_operations.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_integer_operations.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_logical_operations.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_nop_functions.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_packing.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
template <auto func, size_t... I>
|
||||
static void Invoke(ImmValueList& inst_values, const std::array<ImmValueList, sizeof...(I)>& args,
|
||||
std::index_sequence<I...>) {
|
||||
func(inst_values, args[I]...);
|
||||
}
|
||||
|
||||
template <auto func>
|
||||
static void Invoke(Inst* inst, ImmValueList& inst_values, Cache& cache) {
|
||||
using Traits = Common::FuncTraits<decltype(func)>;
|
||||
constexpr size_t num_args = Traits::NUM_ARGS - 1;
|
||||
ASSERT(inst->NumArgs() >= num_args);
|
||||
std::array<ImmValueList, num_args> args{};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
Compute(inst->Arg(i), args[i], cache);
|
||||
}
|
||||
Invoke<func>(inst_values, args, std::make_index_sequence<num_args>{});
|
||||
}
|
||||
|
||||
static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, Cache& cache) {
|
||||
switch (inst->GetOpcode()) {
|
||||
#define OPCODE(name, result_type, ...) \
|
||||
case Opcode::name: \
|
||||
Invoke<&Do##name>(inst, inst_values, cache); \
|
||||
break;
|
||||
#include "shader_recompiler/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid opcode: {}", inst->GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsSelectInst(Inst* inst) {
|
||||
switch (inst->GetOpcode()) {
|
||||
case Opcode::SelectU1:
|
||||
case Opcode::SelectU8:
|
||||
case Opcode::SelectU16:
|
||||
case Opcode::SelectU32:
|
||||
case Opcode::SelectU64:
|
||||
case Opcode::SelectF32:
|
||||
case Opcode::SelectF64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Compute(const Value& value, ImmValueList& values, Cache& cache) {
|
||||
Value resolved = value.Resolve();
|
||||
if (ImmValue::IsSupportedValue(resolved)) {
|
||||
values.insert(ImmValue(resolved));
|
||||
return;
|
||||
}
|
||||
if (resolved.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
Inst* inst = resolved.InstRecursive();
|
||||
auto it = cache.find(inst);
|
||||
if (it != cache.end()) {
|
||||
values.insert(it->second.begin(), it->second.end());
|
||||
return;
|
||||
}
|
||||
auto& inst_values = cache.emplace(inst, ImmValueList{}).first->second;
|
||||
if (inst->GetOpcode() == Opcode::Phi) {
|
||||
for (size_t i = 0; i < inst->NumArgs(); ++i) {
|
||||
Compute(inst->Arg(i), inst_values, cache);
|
||||
}
|
||||
} else if (IsSelectInst(inst)) {
|
||||
Compute(inst->Arg(1), inst_values, cache);
|
||||
Compute(inst->Arg(2), inst_values, cache);
|
||||
} else {
|
||||
DoInstructionOperation(inst, inst_values, cache);
|
||||
}
|
||||
values.insert(inst_values.begin(), inst_values.end());
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
22
src/shader_recompiler/ir/compute_value/compute.h
Normal file
22
src/shader_recompiler/ir/compute_value/compute.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "shader_recompiler/ir/compute_value/imm_value.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
// Given a value (inmediate or not), compute all the possible inmediate values
|
||||
// that can represent. If the value can't be computed statically, the list will
|
||||
// be empty.
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
using ImmValueList = std::unordered_set<ImmValue>;
|
||||
using Cache = std::unordered_map<Inst*, ImmValueList>;
|
||||
|
||||
void Compute(const Value& value, ImmValueList& values, Cache& cache);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
32
src/shader_recompiler/ir/compute_value/do_bitcast.cpp
Normal file
32
src/shader_recompiler/ir/compute_value/do_bitcast.cpp
Normal file
|
@ -0,0 +1,32 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/do_bitcast.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values) {
|
||||
inst_values.insert(src_values.begin(), src_values.end());
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
17
src/shader_recompiler/ir/compute_value/do_bitcast.h
Normal file
17
src/shader_recompiler/ir/compute_value/do_bitcast.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
330
src/shader_recompiler/ir/compute_value/do_composite.cpp
Normal file
330
src/shader_recompiler/ir/compute_value/do_composite.cpp
Normal file
|
@ -0,0 +1,330 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_composite.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
const auto op = [](const ImmValue& a, const ImmValue& b) { return ImmValue(a, b); };
|
||||
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
|
||||
}
|
||||
|
||||
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) {
|
||||
return ImmValue(a, b, c);
|
||||
};
|
||||
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1,
|
||||
arg2);
|
||||
}
|
||||
|
||||
static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3) {
|
||||
const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c, const ImmValue& d) {
|
||||
return ImmValue(a, b, c, d);
|
||||
};
|
||||
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1,
|
||||
arg2, arg3);
|
||||
}
|
||||
|
||||
void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
Common::CartesianInvoke(ImmValue::CompositeFrom2x2,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
Common::CartesianInvoke(ImmValue::CompositeFrom2x2,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3) {
|
||||
CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Extract,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx) {
|
||||
Common::CartesianInvoke(ImmValue::Insert,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx);
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
134
src/shader_recompiler/ir/compute_value/do_composite.h
Normal file
134
src/shader_recompiler/ir/compute_value/do_composite.h
Normal file
|
@ -0,0 +1,134 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3);
|
||||
void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1);
|
||||
void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2);
|
||||
void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3);
|
||||
|
||||
void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3);
|
||||
void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1);
|
||||
void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2);
|
||||
void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3);
|
||||
|
||||
void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3);
|
||||
void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1);
|
||||
void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2);
|
||||
void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3);
|
||||
|
||||
void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1);
|
||||
void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0,
|
||||
const ImmValueList& arg1, const ImmValueList& arg2,
|
||||
const ImmValueList& arg3);
|
||||
void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& idx);
|
||||
void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec,
|
||||
const ImmValueList& val, const ImmValueList& idx);
|
||||
void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1);
|
||||
void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2);
|
||||
void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0,
|
||||
const ImmValueList& vec1, const ImmValueList& idx0,
|
||||
const ImmValueList& idx1, const ImmValueList& idx2,
|
||||
const ImmValueList& idx3);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
81
src/shader_recompiler/ir/compute_value/do_convert.cpp
Normal file
81
src/shader_recompiler/ir/compute_value/do_convert.cpp
Normal file
|
@ -0,0 +1,81 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_convert.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::U32, true, Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::U32, true, Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::U32, false, Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
// Common::CartesianInvoke(ImmValue::Convert<Type::F16, true, Type::F32, true>,
|
||||
// std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
UNREACHABLE_MSG("F32 to F16 conversion is not implemented");
|
||||
}
|
||||
|
||||
void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
// Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::F16, true>,
|
||||
// std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
UNREACHABLE_MSG("F16 to F32 conversion is not implemented");
|
||||
}
|
||||
|
||||
void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F64, true, Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::F32, true, Type::U16, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::U16, false, Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Convert<Type::U32, false, Type::U16, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
25
src/shader_recompiler/ir/compute_value/do_convert.h
Normal file
25
src/shader_recompiler/ir/compute_value/do_convert.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
278
src/shader_recompiler/ir/compute_value/do_float_operations.cpp
Normal file
278
src/shader_recompiler/ir/compute_value/do_float_operations.cpp
Normal file
|
@ -0,0 +1,278 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_float_operations.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Abs<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Abs<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Add<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Add<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Sub<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::Fma<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
|
||||
args2);
|
||||
}
|
||||
|
||||
void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::Fma<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
|
||||
args2);
|
||||
}
|
||||
|
||||
void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args_legacy) {
|
||||
const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) {
|
||||
if (legacy.U1()) {
|
||||
if (ImmValue::IsNan<Type::F32>(a))
|
||||
return b;
|
||||
if (ImmValue::IsNan<Type::F32>(b))
|
||||
return a;
|
||||
}
|
||||
return ImmValue::Max<Type::F32, true>(a, b);
|
||||
};
|
||||
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0,
|
||||
args1, args_legacy);
|
||||
}
|
||||
|
||||
void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Max<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args_legacy) {
|
||||
const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) {
|
||||
if (legacy.U1()) {
|
||||
if (ImmValue::IsNan<Type::F64>(a))
|
||||
return b;
|
||||
if (ImmValue::IsNan<Type::F64>(b))
|
||||
return a;
|
||||
}
|
||||
return ImmValue::Min<Type::F32, true>(a, b);
|
||||
};
|
||||
Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0,
|
||||
args1, args_legacy);
|
||||
}
|
||||
|
||||
void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Min<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MaxTri<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
|
||||
args2);
|
||||
}
|
||||
|
||||
void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MinTri<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
|
||||
args2);
|
||||
}
|
||||
|
||||
void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MedTri<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1,
|
||||
args2);
|
||||
}
|
||||
|
||||
void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mul<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mul<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Div<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Div<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Neg<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Neg<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Recip<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Recip<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Rsqrt<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Rsqrt<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Sqrt<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPSin(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Sin<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Exp2<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents) {
|
||||
Common::CartesianInvoke(ImmValue::Ldexp<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args,
|
||||
exponents);
|
||||
}
|
||||
|
||||
void DoFPCos(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Cos<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Log2<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPSaturate32 not implemented");
|
||||
}
|
||||
|
||||
void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPSaturate64 not implemented");
|
||||
}
|
||||
|
||||
void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
|
||||
const ImmValueList& maxs) {
|
||||
Common::CartesianInvoke(ImmValue::Clamp<Type::F32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, mins,
|
||||
maxs);
|
||||
}
|
||||
|
||||
void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
|
||||
const ImmValueList& maxs) {
|
||||
Common::CartesianInvoke(ImmValue::Clamp<Type::F64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, mins,
|
||||
maxs);
|
||||
}
|
||||
|
||||
void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Round<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Round<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Floor<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Floor<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Ceil<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Ceil<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Trunc<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Trunc<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Fract<Type::F32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Fract<Type::F64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPFrexpSig32 not implemented");
|
||||
}
|
||||
|
||||
void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPFrexpSig64 not implemented");
|
||||
}
|
||||
|
||||
void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPFrexpExp32 not implemented");
|
||||
}
|
||||
|
||||
void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
UNREACHABLE_MSG("FPFrexpExp64 not implemented");
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
68
src/shader_recompiler/ir/compute_value/do_float_operations.h
Normal file
68
src/shader_recompiler/ir/compute_value/do_float_operations.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2);
|
||||
void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2);
|
||||
void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args_legacy);
|
||||
void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args_legacy);
|
||||
void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2);
|
||||
void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2);
|
||||
void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2);
|
||||
void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPSin(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents);
|
||||
void DoFPCos(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
|
||||
const ImmValueList& maxs);
|
||||
void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins,
|
||||
const ImmValueList& maxs);
|
||||
void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
272
src/shader_recompiler/ir/compute_value/do_integer_operations.cpp
Normal file
272
src/shader_recompiler/ir/compute_value/do_integer_operations.cpp
Normal file
|
@ -0,0 +1,272 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_integer_operations.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Add<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Add<Type::U64, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::AddCarry<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Sub<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Sub<Type::U64, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mul<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mul<Type::U64, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
UNREACHABLE_MSG("SMulExt not implemented");
|
||||
}
|
||||
|
||||
void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
UNREACHABLE_MSG("UMulExt not implemented");
|
||||
}
|
||||
|
||||
void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Div<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Div<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mod<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Mod<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoINeg32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Neg<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoINeg64(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Neg<Type::U64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args) {
|
||||
Common::CartesianInvoke(ImmValue::Abs<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args);
|
||||
}
|
||||
|
||||
void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::LShift<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::LShift<Type::U64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::RShift<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::RShift<Type::U64, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::RShift<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift) {
|
||||
Common::CartesianInvoke(ImmValue::RShift<Type::U64, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args, shift);
|
||||
}
|
||||
|
||||
void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::And<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::And<Type::U64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Or<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Or<Type::U64>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Xor<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& insert, const ImmValueList& offset,
|
||||
const ImmValueList& count) {
|
||||
UNREACHABLE_MSG("BitFieldInsert not implemented");
|
||||
}
|
||||
|
||||
void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& offset, const ImmValueList& count) {
|
||||
UNREACHABLE_MSG("BitFieldSExtract not implemented");
|
||||
}
|
||||
|
||||
void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& offset, const ImmValueList& count) {
|
||||
UNREACHABLE_MSG("BitFieldUExtract not implemented");
|
||||
}
|
||||
|
||||
void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("BitReverse32 not implemented");
|
||||
}
|
||||
|
||||
void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("BitCount32 not implemented");
|
||||
}
|
||||
|
||||
void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("BitCount64 not implemented");
|
||||
}
|
||||
|
||||
void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
Common::CartesianInvoke(ImmValue::Not<Type::U32>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg);
|
||||
}
|
||||
|
||||
void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("FindSMsb32 not implemented");
|
||||
}
|
||||
|
||||
void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("FindUMsb32 not implemented");
|
||||
}
|
||||
|
||||
void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("FindILsb32 not implemented");
|
||||
}
|
||||
|
||||
void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg) {
|
||||
UNREACHABLE_MSG("FindILsb64 not implemented");
|
||||
}
|
||||
|
||||
void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Min<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Min<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Max<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) {
|
||||
Common::CartesianInvoke(ImmValue::Max<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1);
|
||||
}
|
||||
|
||||
void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MinTri<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MinTri<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MaxTri<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MaxTri<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MedTri<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1,
|
||||
const ImmValueList& args2) {
|
||||
Common::CartesianInvoke(ImmValue::MedTri<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2);
|
||||
}
|
||||
|
||||
void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
|
||||
const ImmValueList& max) {
|
||||
Common::CartesianInvoke(ImmValue::Clamp<Type::U32, true>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), value, min,
|
||||
max);
|
||||
}
|
||||
|
||||
void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
|
||||
const ImmValueList& max) {
|
||||
Common::CartesianInvoke(ImmValue::Clamp<Type::U32, false>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), value, min,
|
||||
max);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
|
@ -0,0 +1,76 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoINeg32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoINeg64(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args);
|
||||
void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args,
|
||||
const ImmValueList& shift);
|
||||
void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1);
|
||||
void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1);
|
||||
void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0,
|
||||
const ImmValueList& args1);
|
||||
void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& insert, const ImmValueList& offset,
|
||||
const ImmValueList& count);
|
||||
void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& offset, const ImmValueList& count);
|
||||
void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg,
|
||||
const ImmValueList& offset, const ImmValueList& count);
|
||||
void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg);
|
||||
void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1);
|
||||
void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2);
|
||||
void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
|
||||
const ImmValueList& max);
|
||||
void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min,
|
||||
const ImmValueList& max);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
|
@ -0,0 +1,29 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/compute_value/do_logical_operations.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
Common::CartesianInvoke(ImmValue::Or<Type::U1>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
|
||||
}
|
||||
|
||||
void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
Common::CartesianInvoke(ImmValue::And<Type::U1>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
|
||||
}
|
||||
|
||||
void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) {
|
||||
Common::CartesianInvoke(ImmValue::Xor<Type::U1>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2);
|
||||
}
|
||||
|
||||
void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1) {
|
||||
Common::CartesianInvoke(ImmValue::Not<Type::U1>,
|
||||
std::insert_iterator(inst_values, inst_values.begin()), arg1);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
|
@ -0,0 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2);
|
||||
void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
212
src/shader_recompiler/ir/compute_value/do_nop_functions.h
Normal file
212
src/shader_recompiler/ir/compute_value/do_nop_functions.h
Normal file
|
@ -0,0 +1,212 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
#define NOP_FUNCTION(name) \
|
||||
inline void Do##name(ImmValueList& inst_values) {}
|
||||
|
||||
NOP_FUNCTION(Phi)
|
||||
NOP_FUNCTION(Identity)
|
||||
NOP_FUNCTION(Void)
|
||||
NOP_FUNCTION(ConditionRef)
|
||||
NOP_FUNCTION(Reference)
|
||||
NOP_FUNCTION(PhiMove)
|
||||
|
||||
NOP_FUNCTION(Prologue)
|
||||
NOP_FUNCTION(Epilogue)
|
||||
NOP_FUNCTION(Discard)
|
||||
NOP_FUNCTION(DiscardCond)
|
||||
NOP_FUNCTION(DebugPrint)
|
||||
|
||||
NOP_FUNCTION(ReadConst)
|
||||
NOP_FUNCTION(ReadConstBuffer)
|
||||
|
||||
NOP_FUNCTION(Barrier)
|
||||
NOP_FUNCTION(WorkgroupMemoryBarrier)
|
||||
NOP_FUNCTION(DeviceMemoryBarrier)
|
||||
|
||||
NOP_FUNCTION(EmitVertex)
|
||||
NOP_FUNCTION(EmitPrimitive)
|
||||
|
||||
NOP_FUNCTION(LoadSharedU32)
|
||||
NOP_FUNCTION(LoadSharedU64)
|
||||
NOP_FUNCTION(WriteSharedU32)
|
||||
NOP_FUNCTION(WriteSharedU64)
|
||||
|
||||
NOP_FUNCTION(SharedAtomicIAdd32)
|
||||
NOP_FUNCTION(SharedAtomicSMin32)
|
||||
NOP_FUNCTION(SharedAtomicUMin32)
|
||||
NOP_FUNCTION(SharedAtomicSMax32)
|
||||
NOP_FUNCTION(SharedAtomicUMax32)
|
||||
NOP_FUNCTION(SharedAtomicAnd32)
|
||||
NOP_FUNCTION(SharedAtomicOr32)
|
||||
NOP_FUNCTION(SharedAtomicXor32)
|
||||
|
||||
NOP_FUNCTION(GetUserData)
|
||||
NOP_FUNCTION(SetUserData)
|
||||
NOP_FUNCTION(GetThreadBitScalarReg)
|
||||
NOP_FUNCTION(SetThreadBitScalarReg)
|
||||
NOP_FUNCTION(GetScalarRegister)
|
||||
NOP_FUNCTION(SetScalarRegister)
|
||||
NOP_FUNCTION(GetVectorRegister)
|
||||
NOP_FUNCTION(SetVectorRegister)
|
||||
NOP_FUNCTION(GetGotoVariable)
|
||||
NOP_FUNCTION(SetGotoVariable)
|
||||
NOP_FUNCTION(GetAttribute)
|
||||
NOP_FUNCTION(GetAttributeU32)
|
||||
NOP_FUNCTION(SetAttribute)
|
||||
NOP_FUNCTION(GetPatch)
|
||||
NOP_FUNCTION(SetPatch)
|
||||
NOP_FUNCTION(GetTessGenericAttribute)
|
||||
NOP_FUNCTION(SetTcsGenericAttribute)
|
||||
NOP_FUNCTION(ReadTcsGenericOuputAttribute)
|
||||
|
||||
NOP_FUNCTION(GetScc)
|
||||
NOP_FUNCTION(GetExec)
|
||||
NOP_FUNCTION(GetVcc)
|
||||
NOP_FUNCTION(GetVccLo)
|
||||
NOP_FUNCTION(GetVccHi)
|
||||
NOP_FUNCTION(GetM0)
|
||||
NOP_FUNCTION(SetScc)
|
||||
NOP_FUNCTION(SetExec)
|
||||
NOP_FUNCTION(SetVcc)
|
||||
NOP_FUNCTION(SetSccLo)
|
||||
NOP_FUNCTION(SetVccLo)
|
||||
NOP_FUNCTION(SetVccHi)
|
||||
NOP_FUNCTION(SetM0)
|
||||
|
||||
NOP_FUNCTION(UndefU1)
|
||||
NOP_FUNCTION(UndefU8)
|
||||
NOP_FUNCTION(UndefU16)
|
||||
NOP_FUNCTION(UndefU32)
|
||||
NOP_FUNCTION(UndefU64)
|
||||
|
||||
NOP_FUNCTION(LoadBufferU8)
|
||||
NOP_FUNCTION(LoadBufferU16)
|
||||
NOP_FUNCTION(LoadBufferU32)
|
||||
NOP_FUNCTION(LoadBufferU32x2)
|
||||
NOP_FUNCTION(LoadBufferU32x3)
|
||||
NOP_FUNCTION(LoadBufferU32x4)
|
||||
NOP_FUNCTION(LoadBufferF32)
|
||||
NOP_FUNCTION(LoadBufferF32x2)
|
||||
NOP_FUNCTION(LoadBufferF32x3)
|
||||
NOP_FUNCTION(LoadBufferF32x4)
|
||||
NOP_FUNCTION(LoadBufferFormatF32)
|
||||
NOP_FUNCTION(StoreBufferU8)
|
||||
NOP_FUNCTION(StoreBufferU16)
|
||||
NOP_FUNCTION(StoreBufferU32)
|
||||
NOP_FUNCTION(StoreBufferU32x2)
|
||||
NOP_FUNCTION(StoreBufferU32x3)
|
||||
NOP_FUNCTION(StoreBufferU32x4)
|
||||
NOP_FUNCTION(StoreBufferF32)
|
||||
NOP_FUNCTION(StoreBufferF32x2)
|
||||
NOP_FUNCTION(StoreBufferF32x3)
|
||||
NOP_FUNCTION(StoreBufferF32x4)
|
||||
NOP_FUNCTION(StoreBufferFormatF32)
|
||||
|
||||
NOP_FUNCTION(BufferAtomicIAdd32)
|
||||
NOP_FUNCTION(BufferAtomicSMin32)
|
||||
NOP_FUNCTION(BufferAtomicUMin32)
|
||||
NOP_FUNCTION(BufferAtomicSMax32)
|
||||
NOP_FUNCTION(BufferAtomicUMax32)
|
||||
NOP_FUNCTION(BufferAtomicInc32)
|
||||
NOP_FUNCTION(BufferAtomicDec32)
|
||||
NOP_FUNCTION(BufferAtomicAnd32)
|
||||
NOP_FUNCTION(BufferAtomicOr32)
|
||||
NOP_FUNCTION(BufferAtomicXor32)
|
||||
NOP_FUNCTION(BufferAtomicSwap32)
|
||||
|
||||
// Select instructions are handled separately
|
||||
NOP_FUNCTION(SelectU1)
|
||||
NOP_FUNCTION(SelectU8)
|
||||
NOP_FUNCTION(SelectU16)
|
||||
NOP_FUNCTION(SelectU32)
|
||||
NOP_FUNCTION(SelectU64)
|
||||
NOP_FUNCTION(SelectF32)
|
||||
NOP_FUNCTION(SelectF64)
|
||||
|
||||
NOP_FUNCTION(FPOrdEqual32)
|
||||
NOP_FUNCTION(FPOrdEqual64)
|
||||
NOP_FUNCTION(FPUnordEqual32)
|
||||
NOP_FUNCTION(FPUnordEqual64)
|
||||
NOP_FUNCTION(FPOrdNotEqual32)
|
||||
NOP_FUNCTION(FPOrdNotEqual64)
|
||||
NOP_FUNCTION(FPUnordNotEqual32)
|
||||
NOP_FUNCTION(FPUnordNotEqual64)
|
||||
NOP_FUNCTION(FPOrdLessThan32)
|
||||
NOP_FUNCTION(FPOrdLessThan64)
|
||||
NOP_FUNCTION(FPUnordLessThan32)
|
||||
NOP_FUNCTION(FPUnordLessThan64)
|
||||
NOP_FUNCTION(FPOrdGreaterThan32)
|
||||
NOP_FUNCTION(FPOrdGreaterThan64)
|
||||
NOP_FUNCTION(FPUnordGreaterThan32)
|
||||
NOP_FUNCTION(FPUnordGreaterThan64)
|
||||
NOP_FUNCTION(FPOrdLessThanEqual32)
|
||||
NOP_FUNCTION(FPOrdLessThanEqual64)
|
||||
NOP_FUNCTION(FPUnordLessThanEqual32)
|
||||
NOP_FUNCTION(FPUnordLessThanEqual64)
|
||||
NOP_FUNCTION(FPOrdGreaterThanEqual32)
|
||||
NOP_FUNCTION(FPOrdGreaterThanEqual64)
|
||||
NOP_FUNCTION(FPUnordGreaterThanEqual32)
|
||||
NOP_FUNCTION(FPUnordGreaterThanEqual64)
|
||||
NOP_FUNCTION(FPIsNan32)
|
||||
NOP_FUNCTION(FPIsNan64)
|
||||
NOP_FUNCTION(FPIsInf32)
|
||||
NOP_FUNCTION(FPIsInf64)
|
||||
NOP_FUNCTION(FPCmpClass32)
|
||||
|
||||
NOP_FUNCTION(SLessThan32)
|
||||
NOP_FUNCTION(SLessThan64)
|
||||
NOP_FUNCTION(ULessThan32)
|
||||
NOP_FUNCTION(ULessThan64)
|
||||
NOP_FUNCTION(IEqual32)
|
||||
NOP_FUNCTION(IEqual64)
|
||||
NOP_FUNCTION(SLessThanEqual)
|
||||
NOP_FUNCTION(ULessThanEqual)
|
||||
NOP_FUNCTION(SGreaterThan)
|
||||
NOP_FUNCTION(UGreaterThan)
|
||||
NOP_FUNCTION(INotEqual32)
|
||||
NOP_FUNCTION(INotEqual64)
|
||||
NOP_FUNCTION(SGreaterThanEqual)
|
||||
NOP_FUNCTION(UGreaterThanEqual)
|
||||
|
||||
NOP_FUNCTION(ImageSampleRaw)
|
||||
NOP_FUNCTION(ImageSampleImplicitLod)
|
||||
NOP_FUNCTION(ImageSampleExplicitLod)
|
||||
NOP_FUNCTION(ImageSampleDrefImplicitLod)
|
||||
NOP_FUNCTION(ImageSampleDrefExplicitLod)
|
||||
NOP_FUNCTION(ImageGather)
|
||||
NOP_FUNCTION(ImageGatherDref)
|
||||
NOP_FUNCTION(ImageQueryDimensions)
|
||||
NOP_FUNCTION(ImageQueryLod)
|
||||
NOP_FUNCTION(ImageGradient)
|
||||
NOP_FUNCTION(ImageRead)
|
||||
NOP_FUNCTION(ImageWrite)
|
||||
|
||||
NOP_FUNCTION(ImageAtomicIAdd32)
|
||||
NOP_FUNCTION(ImageAtomicSMin32)
|
||||
NOP_FUNCTION(ImageAtomicUMin32)
|
||||
NOP_FUNCTION(ImageAtomicSMax32)
|
||||
NOP_FUNCTION(ImageAtomicUMax32)
|
||||
NOP_FUNCTION(ImageAtomicInc32)
|
||||
NOP_FUNCTION(ImageAtomicDec32)
|
||||
NOP_FUNCTION(ImageAtomicAnd32)
|
||||
NOP_FUNCTION(ImageAtomicOr32)
|
||||
NOP_FUNCTION(ImageAtomicXor32)
|
||||
NOP_FUNCTION(ImageAtomicExchange32)
|
||||
|
||||
NOP_FUNCTION(CubeFaceIndex)
|
||||
|
||||
NOP_FUNCTION(LaneId)
|
||||
NOP_FUNCTION(WarpId)
|
||||
NOP_FUNCTION(QuadShuffle)
|
||||
NOP_FUNCTION(ReadFirstLane)
|
||||
NOP_FUNCTION(ReadLane)
|
||||
NOP_FUNCTION(WriteLane)
|
||||
NOP_FUNCTION(DataAppend)
|
||||
NOP_FUNCTION(DataConsume)
|
||||
|
||||
#undef NOP_FUNCTION
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
132
src/shader_recompiler/ir/compute_value/do_packing.cpp
Normal file
132
src/shader_recompiler/ir/compute_value/do_packing.cpp
Normal file
|
@ -0,0 +1,132 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/do_packing.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) {
|
||||
UNREACHABLE_MSG("Unimplemented");
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
42
src/shader_recompiler/ir/compute_value/do_packing.h
Normal file
42
src/shader_recompiler/ir/compute_value/do_packing.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0);
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
1510
src/shader_recompiler/ir/compute_value/imm_value.cpp
Normal file
1510
src/shader_recompiler/ir/compute_value/imm_value.cpp
Normal file
File diff suppressed because it is too large
Load diff
330
src/shader_recompiler/ir/compute_value/imm_value.h
Normal file
330
src/shader_recompiler/ir/compute_value/imm_value.h
Normal file
|
@ -0,0 +1,330 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR::ComputeValue {
|
||||
|
||||
// Holds an immediate value and provides helper functions to do arithmetic operations on it.
|
||||
|
||||
class ImmValue {
|
||||
public:
|
||||
ImmValue() noexcept = default;
|
||||
ImmValue(const ImmValue& value) noexcept = default;
|
||||
explicit ImmValue(const IR::Value& value) noexcept;
|
||||
explicit ImmValue(bool value) noexcept;
|
||||
explicit ImmValue(u8 value) noexcept;
|
||||
explicit ImmValue(s8 value) noexcept;
|
||||
explicit ImmValue(u16 value) noexcept;
|
||||
explicit ImmValue(s16 value) noexcept;
|
||||
explicit ImmValue(u32 value) noexcept;
|
||||
explicit ImmValue(s32 value) noexcept;
|
||||
explicit ImmValue(f32 value) noexcept;
|
||||
explicit ImmValue(u64 value) noexcept;
|
||||
explicit ImmValue(s64 value) noexcept;
|
||||
explicit ImmValue(f64 value) noexcept;
|
||||
ImmValue(u32 value1, u32 value2) noexcept;
|
||||
ImmValue(u32 value1, u32 value2, u32 value3) noexcept;
|
||||
ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept;
|
||||
ImmValue(s32 value1, s32 value2) noexcept;
|
||||
ImmValue(s32 value1, s32 value2, s32 value3) noexcept;
|
||||
ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept;
|
||||
ImmValue(f32 value1, f32 value2) noexcept;
|
||||
ImmValue(f32 value1, f32 value2, f32 value3) noexcept;
|
||||
ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept;
|
||||
ImmValue(u64 value1, u64 value2) noexcept;
|
||||
ImmValue(u64 value1, u64 value2, u64 value3) noexcept;
|
||||
ImmValue(u64 value1, u64 value2, u64 value3, u64 value4) noexcept;
|
||||
ImmValue(s64 value1, s64 value2) noexcept;
|
||||
ImmValue(s64 value1, s64 value2, s64 value3) noexcept;
|
||||
ImmValue(s64 value1, s64 value2, s64 value3, s64 value4) noexcept;
|
||||
ImmValue(f64 value1, f64 value2) noexcept;
|
||||
ImmValue(f64 value1, f64 value2, f64 value3) noexcept;
|
||||
ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept;
|
||||
ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept;
|
||||
ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept;
|
||||
ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3,
|
||||
const ImmValue& value4) noexcept;
|
||||
|
||||
[[nodiscard]] static ImmValue CompositeFrom2x2(const ImmValue& value1,
|
||||
const ImmValue& value2) noexcept;
|
||||
|
||||
[[nodiscard]] bool U1() const noexcept;
|
||||
[[nodiscard]] u8 U8() const noexcept;
|
||||
[[nodiscard]] s8 S8() const noexcept;
|
||||
[[nodiscard]] u16 U16() const noexcept;
|
||||
[[nodiscard]] s16 S16() const noexcept;
|
||||
[[nodiscard]] u32 U32() const noexcept;
|
||||
[[nodiscard]] s32 S32() const noexcept;
|
||||
[[nodiscard]] f32 F32() const noexcept;
|
||||
[[nodiscard]] u64 U64() const noexcept;
|
||||
[[nodiscard]] s64 S64() const noexcept;
|
||||
[[nodiscard]] f64 F64() const noexcept;
|
||||
|
||||
[[nodiscard]] std::tuple<u32, u32> U32x2() const noexcept;
|
||||
[[nodiscard]] std::tuple<u32, u32, u32> U32x3() const noexcept;
|
||||
[[nodiscard]] std::tuple<u32, u32, u32, u32> U32x4() const noexcept;
|
||||
[[nodiscard]] std::tuple<s32, s32> S32x2() const noexcept;
|
||||
[[nodiscard]] std::tuple<s32, s32, s32> S32x3() const noexcept;
|
||||
[[nodiscard]] std::tuple<s32, s32, s32, s32> S32x4() const noexcept;
|
||||
[[nodiscard]] std::tuple<f32, f32> F32x2() const noexcept;
|
||||
[[nodiscard]] std::tuple<f32, f32, f32> F32x3() const noexcept;
|
||||
[[nodiscard]] std::tuple<f32, f32, f32, f32> F32x4() const noexcept;
|
||||
[[nodiscard]] std::tuple<f64, f64> F64x2() const noexcept;
|
||||
[[nodiscard]] std::tuple<f64, f64, f64> F64x3() const noexcept;
|
||||
[[nodiscard]] std::tuple<f64, f64, f64, f64> F64x4() const noexcept;
|
||||
|
||||
ImmValue& operator=(const ImmValue& value) noexcept = default;
|
||||
|
||||
[[nodiscard]] bool operator==(const ImmValue& other) const noexcept;
|
||||
[[nodiscard]] bool operator!=(const ImmValue& other) const noexcept;
|
||||
|
||||
[[nodiscard]] static ImmValue Extract(const ImmValue& vec, const ImmValue& index) noexcept;
|
||||
[[nodiscard]] static ImmValue Insert(const ImmValue& vec, const ImmValue& value,
|
||||
const ImmValue& index) noexcept;
|
||||
|
||||
template <IR::Type NewType, bool NewSigned, IR::Type OldType, bool OldSigned>
|
||||
[[nodiscard]] static ImmValue Convert(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Add(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue AddCarry(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Sub(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Mul(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Div(const ImmValue& a, const ImmValue& b);
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Mod(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue And(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Or(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Xor(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue LShift(const ImmValue& a, const ImmValue& shift) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue RShift(const ImmValue& a, const ImmValue& shift) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Not(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Neg(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Abs(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Recip(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Sqrt(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Rsqrt(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Sin(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Cos(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Exp2(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Ldexp(const ImmValue& in, const ImmValue& exp) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Log2(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Min(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Max(const ImmValue& a, const ImmValue& b) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept;
|
||||
|
||||
template <IR::Type Type, bool IsSigned>
|
||||
[[nodiscard]] static ImmValue Clamp(const ImmValue& in, const ImmValue& min,
|
||||
const ImmValue& max) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Floor(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Ceil(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Round(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Trunc(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Fract(const ImmValue& in) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static ImmValue Fma(const ImmValue& a, const ImmValue& b,
|
||||
const ImmValue& c) noexcept;
|
||||
|
||||
template <IR::Type Type>
|
||||
[[nodiscard]] static bool IsNan(const ImmValue& in) noexcept;
|
||||
|
||||
[[nodiscard]] static bool IsSupportedValue(const IR::Value& value) noexcept;
|
||||
|
||||
private:
|
||||
union Value {
|
||||
bool imm_u1;
|
||||
u8 imm_u8;
|
||||
s8 imm_s8;
|
||||
u16 imm_u16;
|
||||
s16 imm_s16;
|
||||
u32 imm_u32;
|
||||
s32 imm_s32;
|
||||
f32 imm_f32;
|
||||
u64 imm_u64;
|
||||
s64 imm_s64;
|
||||
f64 imm_f64;
|
||||
};
|
||||
|
||||
std::array<Value, 4> imm_values;
|
||||
|
||||
friend class std::hash<ImmValue>;
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<ImmValue>);
|
||||
|
||||
inline bool ImmValue::U1() const noexcept {
|
||||
return imm_values[0].imm_u1;
|
||||
}
|
||||
|
||||
inline u8 ImmValue::U8() const noexcept {
|
||||
return imm_values[0].imm_u8;
|
||||
}
|
||||
|
||||
inline s8 ImmValue::S8() const noexcept {
|
||||
return imm_values[0].imm_s8;
|
||||
}
|
||||
|
||||
inline u16 ImmValue::U16() const noexcept {
|
||||
return imm_values[0].imm_u16;
|
||||
}
|
||||
|
||||
inline s16 ImmValue::S16() const noexcept {
|
||||
return imm_values[0].imm_s16;
|
||||
}
|
||||
|
||||
inline u32 ImmValue::U32() const noexcept {
|
||||
return imm_values[0].imm_u32;
|
||||
}
|
||||
|
||||
inline s32 ImmValue::S32() const noexcept {
|
||||
return imm_values[0].imm_s32;
|
||||
}
|
||||
|
||||
inline f32 ImmValue::F32() const noexcept {
|
||||
return imm_values[0].imm_f32;
|
||||
}
|
||||
|
||||
inline u64 ImmValue::U64() const noexcept {
|
||||
return imm_values[0].imm_u64;
|
||||
}
|
||||
|
||||
inline s64 ImmValue::S64() const noexcept {
|
||||
return imm_values[0].imm_s64;
|
||||
}
|
||||
|
||||
inline f64 ImmValue::F64() const noexcept {
|
||||
return imm_values[0].imm_f64;
|
||||
}
|
||||
|
||||
inline std::tuple<u32, u32> ImmValue::U32x2() const noexcept {
|
||||
return {imm_values[0].imm_u32, imm_values[1].imm_u32};
|
||||
}
|
||||
|
||||
inline std::tuple<u32, u32, u32> ImmValue::U32x3() const noexcept {
|
||||
return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32};
|
||||
}
|
||||
|
||||
inline std::tuple<u32, u32, u32, u32> ImmValue::U32x4() const noexcept {
|
||||
return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32,
|
||||
imm_values[3].imm_u32};
|
||||
}
|
||||
|
||||
inline std::tuple<s32, s32> ImmValue::S32x2() const noexcept {
|
||||
return {imm_values[0].imm_s32, imm_values[1].imm_s32};
|
||||
}
|
||||
|
||||
inline std::tuple<s32, s32, s32> ImmValue::S32x3() const noexcept {
|
||||
return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32};
|
||||
}
|
||||
|
||||
inline std::tuple<s32, s32, s32, s32> ImmValue::S32x4() const noexcept {
|
||||
return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32,
|
||||
imm_values[3].imm_s32};
|
||||
}
|
||||
|
||||
inline std::tuple<f32, f32> ImmValue::F32x2() const noexcept {
|
||||
return {imm_values[0].imm_f32, imm_values[1].imm_f32};
|
||||
}
|
||||
|
||||
inline std::tuple<f32, f32, f32> ImmValue::F32x3() const noexcept {
|
||||
return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32};
|
||||
}
|
||||
|
||||
inline std::tuple<f32, f32, f32, f32> ImmValue::F32x4() const noexcept {
|
||||
return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32,
|
||||
imm_values[3].imm_f32};
|
||||
}
|
||||
|
||||
inline std::tuple<f64, f64> ImmValue::F64x2() const noexcept {
|
||||
return {imm_values[0].imm_f64, imm_values[1].imm_f64};
|
||||
}
|
||||
|
||||
inline std::tuple<f64, f64, f64> ImmValue::F64x3() const noexcept {
|
||||
return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64};
|
||||
}
|
||||
|
||||
inline std::tuple<f64, f64, f64, f64> ImmValue::F64x4() const noexcept {
|
||||
return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64,
|
||||
imm_values[3].imm_f64};
|
||||
}
|
||||
|
||||
} // namespace Shader::IR::ComputeValue
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Shader::IR::ComputeValue::ImmValue> {
|
||||
std::size_t operator()(const Shader::IR::ComputeValue::ImmValue& value) const;
|
||||
};
|
||||
} // namespace std
|
65
src/shader_recompiler/ir/conditional_tree.cpp
Normal file
65
src/shader_recompiler/ir/conditional_tree.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/conditional_tree.h"
|
||||
|
||||
#include <span>
|
||||
|
||||
// This can be used to get, for a given block, the list of conditions that
|
||||
// must be true for the block to be executed. Can be also useful for
|
||||
// for determining the maximum number of thimes a block is executed.
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
static void AddConditionalTree(std::span<AbstractSyntaxNode> asl_span,
|
||||
Block::ConditionalData* parent) {
|
||||
const auto get_span = [&asl_span](AbstractSyntaxNode& node,
|
||||
Block* merge_block) -> std::span<AbstractSyntaxNode> {
|
||||
auto it = std::find_if(asl_span.begin(), asl_span.end(),
|
||||
[&node, &merge_block](const AbstractSyntaxNode& n) {
|
||||
return n.data.block == merge_block;
|
||||
});
|
||||
ASSERT(it != asl_span.end());
|
||||
std::ptrdiff_t merge_index = std::distance(asl_span.begin(), it);
|
||||
return std::span<AbstractSyntaxNode>(&node + 1, asl_span.data() + merge_index);
|
||||
};
|
||||
const Block::ConditionalData* copied_parent = nullptr;
|
||||
for (auto it = asl_span.begin(); it < asl_span.end(); ++it) {
|
||||
AbstractSyntaxNode& node = *it;
|
||||
if (node.type == AbstractSyntaxNode::Type::If ||
|
||||
node.type == AbstractSyntaxNode::Type::Loop) {
|
||||
ASSERT(copied_parent);
|
||||
Block* merge_block;
|
||||
switch (node.type) {
|
||||
case AbstractSyntaxNode::Type::If:
|
||||
merge_block = node.data.if_node.merge;
|
||||
break;
|
||||
case AbstractSyntaxNode::Type::Loop:
|
||||
merge_block = node.data.loop.merge;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
auto subspan = get_span(node, merge_block);
|
||||
Block::ConditionalData cond{copied_parent->depth + 1, copied_parent, &node};
|
||||
AddConditionalTree(subspan, &cond);
|
||||
it += subspan.size();
|
||||
} else if (node.type == AbstractSyntaxNode::Type::Block) {
|
||||
Block* block = node.data.block;
|
||||
if (!copied_parent) {
|
||||
block->SetConditionalData(*parent);
|
||||
copied_parent = &block->CondData();
|
||||
} else {
|
||||
block->SetConditionalData(*copied_parent);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list) {
|
||||
Block::ConditionalData cond{0, nullptr, nullptr};
|
||||
AddConditionalTree(syntax_list, &cond);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
12
src/shader_recompiler/ir/conditional_tree.h
Normal file
12
src/shader_recompiler/ir/conditional_tree.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list);
|
||||
|
||||
} // namespace Shader::IR
|
|
@ -102,6 +102,10 @@ void IREmitter::Reference(const Value& value) {
|
|||
Inst(Opcode::Reference, value);
|
||||
}
|
||||
|
||||
Value IREmitter::Phi(IR::Type type) {
|
||||
return Inst(Opcode::Phi, Flags(type));
|
||||
}
|
||||
|
||||
void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
|
||||
Inst(Opcode::PhiMove, Value{&phi}, value);
|
||||
}
|
||||
|
@ -139,6 +143,10 @@ U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
|||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
||||
void IREmitter::SetUserData(const U32& offset, const U32& data) {
|
||||
Inst(Opcode::SetUserData, offset, data);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) {
|
||||
ASSERT(static_cast<u32>(reg) < IR::NumScalarRegs);
|
||||
return Inst<U1>(Opcode::GetThreadBitScalarReg, reg);
|
||||
|
|
|
@ -18,6 +18,8 @@ namespace Shader::IR {
|
|||
class IREmitter {
|
||||
public:
|
||||
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
|
||||
explicit IREmitter(IR::Inst& inst)
|
||||
: block{inst.GetParent()}, insertion_point{Block::InstructionList::s_iterator_to(inst)} {}
|
||||
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
|
||||
: block{&block_}, insertion_point{insertion_point_} {}
|
||||
|
||||
|
@ -39,6 +41,7 @@ public:
|
|||
U1 ConditionRef(const U1& value);
|
||||
void Reference(const Value& value);
|
||||
|
||||
[[nodiscard]] Value Phi(IR::Type type);
|
||||
void PhiMove(IR::Inst& phi, const Value& value);
|
||||
|
||||
void Prologue();
|
||||
|
@ -52,6 +55,7 @@ public:
|
|||
void DeviceMemoryBarrier();
|
||||
|
||||
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
|
||||
void SetUserData(const U32& offset, const U32& data);
|
||||
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
|
||||
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
|
||||
|
||||
|
|
|
@ -103,6 +103,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::DebugPrint:
|
||||
case Opcode::EmitVertex:
|
||||
case Opcode::EmitPrimitive:
|
||||
case Opcode::SetUserData:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
64
src/shader_recompiler/ir/num_executions.cpp
Normal file
64
src/shader_recompiler/ir/num_executions.cpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/cartesian_invoke.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/compute_value/compute.h"
|
||||
#include "shader_recompiler/ir/num_executions.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
static bool Is64BitCondition(const Inst* inst) {
|
||||
switch (inst->GetOpcode()) {
|
||||
case Opcode::SLessThan64:
|
||||
case Opcode::ULessThan64:
|
||||
case Opcode::IEqual64:
|
||||
case Opcode::INotEqual64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static u64 GetDistance32(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) {
|
||||
return a.U32() < b.U32() ? b.U32() - a.U32() : a.U32() - b.U32();
|
||||
}
|
||||
|
||||
static u64 GetDistance64(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) {
|
||||
return a.U64() < b.U64() ? b.U64() - a.U64() : a.U64() - b.U64();
|
||||
}
|
||||
|
||||
u64 GetNumExecutions(const Inst* inst) {
|
||||
u64 num_executions = 1;
|
||||
const auto* cond_data = &inst->GetParent()->CondData();
|
||||
while (cond_data->asl_node) {
|
||||
if (cond_data->asl_node->type == AbstractSyntaxNode::Type::Loop) {
|
||||
ComputeValue::ImmValueList cond_arg0, cond_arg1;
|
||||
ComputeValue::Cache cache;
|
||||
Block* cont_block = cond_data->asl_node->data.loop.continue_block;
|
||||
Inst* cond_inst = cont_block->back().Arg(0).InstRecursive();
|
||||
ASSERT(cond_inst);
|
||||
ComputeValue::Compute(cond_inst->Arg(0), cond_arg0, cache);
|
||||
ComputeValue::Compute(cond_inst->Arg(1), cond_arg1, cache);
|
||||
std::unordered_set<u64> distances;
|
||||
if (Is64BitCondition(cond_inst)) {
|
||||
Common::CartesianInvoke(GetDistance64,
|
||||
std::insert_iterator(distances, distances.end()), cond_arg0,
|
||||
cond_arg1);
|
||||
} else {
|
||||
Common::CartesianInvoke(GetDistance32,
|
||||
std::insert_iterator(distances, distances.end()), cond_arg0,
|
||||
cond_arg1);
|
||||
}
|
||||
if (!distances.empty()) {
|
||||
// We assume that the iterator changes by 1 each loop iteration.
|
||||
num_executions *=
|
||||
std::max<u64>(1, *std::max_element(distances.begin(), distances.end())) + 1;
|
||||
}
|
||||
}
|
||||
cond_data = cond_data->parent;
|
||||
}
|
||||
return num_executions;
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
16
src/shader_recompiler/ir/num_executions.h
Normal file
16
src/shader_recompiler/ir/num_executions.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
|
||||
// Get the number of times an instruction will be executed.
|
||||
// 0 if it cannot be determined statically.
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
u64 GetNumExecutions(const Inst* inst);
|
||||
|
||||
} // namespace Shader::IR
|
|
@ -50,6 +50,8 @@ OPCODE(SharedAtomicIDecrement32, U32, U32,
|
|||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
// We don't use ScalarReg since we do arithmetics on the register index
|
||||
OPCODE(SetUserData, Void, U32, U32 )
|
||||
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
|
||||
OPCODE(SetThreadBitScalarReg, Void, ScalarReg, U1, )
|
||||
OPCODE(GetScalarRegister, U32, ScalarReg, )
|
||||
|
|
|
@ -4,19 +4,25 @@
|
|||
|
||||
#include <unordered_map>
|
||||
#include <boost/container/flat_map.hpp>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "common/arch.h"
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#ifdef ARCH_X86_64
|
||||
#include "shader_recompiler/backend/asm_x64/emit_x64.h"
|
||||
#endif
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/num_executions.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/passes/ir_passes.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/srt_gvn_table.h"
|
||||
#include "shader_recompiler/ir/subprogram.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "src/common/arch.h"
|
||||
#include "src/common/decoder.h"
|
||||
|
@ -57,28 +63,23 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
|
|||
using namespace Shader;
|
||||
|
||||
struct PassInfo {
|
||||
// map offset to inst
|
||||
using PtrUserList = boost::container::flat_map<u32, Shader::IR::Inst*>;
|
||||
struct ReadConstData {
|
||||
u32 offset_dw;
|
||||
u32 count_dw;
|
||||
IR::Inst* unique_inst;
|
||||
IR::Inst* original_inst;
|
||||
};
|
||||
|
||||
Optimization::SrtGvnTable gvn_table;
|
||||
// keys are GetUserData or ReadConst instructions that are used as pointers
|
||||
std::unordered_map<IR::Inst*, PtrUserList> pointer_uses;
|
||||
// GetUserData instructions corresponding to sgpr_base of SRT roots
|
||||
boost::container::small_flat_map<IR::ScalarReg, IR::Inst*, 1> srt_roots;
|
||||
|
||||
// pick a single inst for a given value number
|
||||
std::unordered_map<u32, IR::Inst*> vn_to_inst;
|
||||
// map of all readconsts to their subprogram insts
|
||||
boost::container::small_flat_map<IR::Inst*, IR::Inst*, 32> all_readconsts;
|
||||
// subprogram insts mapped to their readconst data
|
||||
boost::container::small_flat_map<IR::Inst*, ReadConstData, 32> readconst_data;
|
||||
|
||||
// Bumped during codegen to assign offsets to readconsts
|
||||
u32 dst_off_dw;
|
||||
|
||||
PtrUserList* GetUsesAsPointer(IR::Inst* inst) {
|
||||
auto it = pointer_uses.find(inst);
|
||||
if (it != pointer_uses.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
// Incremented during SRT program generation
|
||||
u32 dst_off_dw = 0;
|
||||
|
||||
// Return a single instruction that this instruction is identical to, according
|
||||
// to value number
|
||||
|
@ -105,39 +106,84 @@ static inline void PopPtr(Xbyak::CodeGenerator& c) {
|
|||
c.pop(rdi);
|
||||
};
|
||||
|
||||
static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
|
||||
Xbyak::CodeGenerator& c) {
|
||||
PushPtr(c, off_dw);
|
||||
PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree);
|
||||
ASSERT(use_list);
|
||||
|
||||
// First copy all the src data from this tree level
|
||||
// That way, all data that was contiguous in the guest SRT is also contiguous in the
|
||||
// flattened buffer.
|
||||
// TODO src and dst are contiguous. Optimize with wider loads/stores
|
||||
// TODO if this subtree is dynamically indexed, don't compact it (keep it sparse)
|
||||
for (auto [src_off_dw, use] : *use_list) {
|
||||
c.mov(r10d, ptr[rdi + (src_off_dw << 2)]);
|
||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d);
|
||||
|
||||
use->SetFlags<u32>(pass_info.dst_off_dw);
|
||||
pass_info.dst_off_dw++;
|
||||
static IR::U32 WrapInstWithCounter(IR::Inst* inst, u32 inital_value, IR::Block* first_block) {
|
||||
const IR::Block::ConditionalData* loop_data = &inst->GetParent()->CondData();
|
||||
while (loop_data != nullptr &&
|
||||
loop_data->asl_node->type != IR::AbstractSyntaxNode::Type::Loop) {
|
||||
loop_data = loop_data->parent;
|
||||
}
|
||||
|
||||
// Then visit any children used as pointers
|
||||
for (const auto [src_off_dw, use] : *use_list) {
|
||||
if (pass_info.GetUsesAsPointer(use)) {
|
||||
VisitPointer(src_off_dw, use, pass_info, c);
|
||||
}
|
||||
}
|
||||
|
||||
PopPtr(c);
|
||||
ASSERT(loop_data != nullptr);
|
||||
IR::Block* loop_body = loop_data->asl_node->data.loop.body;
|
||||
// We are putting the Phi node in the loop header so that the counter is
|
||||
// incremented each time the loop is executed. We point the Phi node to the
|
||||
// first block so that the counter is not reset each time the loop is
|
||||
// executed (nested loops)
|
||||
IR::IREmitter ir_inst(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst));
|
||||
IR::IREmitter ir_loop_header(*loop_body->ImmPredecessors().front());
|
||||
IR::Inst* phi = ir_loop_header.Phi(IR::Type::U32).Inst();
|
||||
IR::U32 inc = ir_inst.IAdd(IR::U32(phi), ir_inst.Imm32(1));
|
||||
phi->AddPhiOperand(first_block, ir_loop_header.Imm32(inital_value));
|
||||
phi->AddPhiOperand(inst->GetParent(), inc);
|
||||
return IR::U32(phi);
|
||||
}
|
||||
|
||||
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
Xbyak::CodeGenerator& c = g_srt_codegen;
|
||||
static IR::Program GenerateSrtReadConstsSubProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) {
|
||||
IR::SubProgram sub_gen(&program, pools);
|
||||
for (auto& [inst, sub_inst] : pass_info.all_readconsts) {
|
||||
sub_inst = sub_gen.AddInst(inst);
|
||||
pass_info.readconst_data[sub_inst] = {0, 0, pass_info.DeduplicateInstruction(sub_inst),
|
||||
inst};
|
||||
}
|
||||
IR::Program sub_program = sub_gen.GetSubProgram();
|
||||
IR::Block* original_first_block = program.blocks.front();
|
||||
IR::Block* sub_first_block = sub_program.blocks.front();
|
||||
for (auto& [inst, data] : pass_info.readconst_data) {
|
||||
if (inst != data.unique_inst) {
|
||||
PassInfo::ReadConstData& unique_data = pass_info.readconst_data[data.unique_inst];
|
||||
data.offset_dw = unique_data.offset_dw;
|
||||
// In this context, count_dw is always the same as unique_data.count_dw
|
||||
// There are no duplicate instructions in different loops
|
||||
data.count_dw = unique_data.count_dw;
|
||||
} else {
|
||||
u32 count = static_cast<u32>(IR::GetNumExecutions(inst));
|
||||
ASSERT_MSG(count > 0, "Dynamic loop range not supported yet");
|
||||
data.count_dw = count;
|
||||
data.offset_dw = pass_info.dst_off_dw;
|
||||
pass_info.dst_off_dw += count;
|
||||
IR::U32 save_offset;
|
||||
if (data.count_dw > 1) {
|
||||
save_offset = WrapInstWithCounter(inst, data.offset_dw, sub_first_block);
|
||||
} else {
|
||||
IR::IREmitter ir(*inst);
|
||||
save_offset = ir.Imm32(data.offset_dw);
|
||||
}
|
||||
IR::IREmitter ir(*inst->GetParent(),
|
||||
++IR::Block::InstructionList::s_iterator_to(*inst));
|
||||
ir.SetUserData(save_offset, IR::U32(inst));
|
||||
}
|
||||
data.original_inst->SetFlags<u32>(1);
|
||||
IR::IREmitter ir(*data.original_inst);
|
||||
data.original_inst->SetArg(0, ir.Imm32(0));
|
||||
if (data.count_dw > 1) {
|
||||
IR::U32 counter =
|
||||
WrapInstWithCounter(data.original_inst, data.offset_dw, original_first_block);
|
||||
data.original_inst->SetArg(1, counter);
|
||||
} else {
|
||||
data.original_inst->SetArg(1, ir.Imm32(data.offset_dw));
|
||||
}
|
||||
|
||||
}
|
||||
DeadCodeEliminationPass(sub_program);
|
||||
IR::DumpProgram(sub_program, sub_program.info, "srt");
|
||||
return sub_program;
|
||||
}
|
||||
|
||||
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
|
||||
static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) {
|
||||
#ifdef ARCH_X86_64
|
||||
Xbyak::CodeGenerator& c = g_srt_codegen;
|
||||
Shader::Info& info = program.info;
|
||||
|
||||
if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -167,10 +213,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
|||
|
||||
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
||||
|
||||
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
|
||||
VisitPointer(static_cast<u32>(sgpr_base), root, pass_info, c);
|
||||
if (!pass_info.all_readconsts.empty()) {
|
||||
IR::Program sub_program = GenerateSrtReadConstsSubProgram(program, pass_info, pools);
|
||||
Backend::X64::EmitX64(sub_program, c);
|
||||
}
|
||||
|
||||
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
||||
|
||||
c.ret();
|
||||
c.ready();
|
||||
|
||||
|
@ -178,75 +227,30 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
|||
size_t codesize = c.getCurr() - reinterpret_cast<const u8*>(info.srt_info.walker_func);
|
||||
DumpSrtProgram(info, reinterpret_cast<const u8*>(info.srt_info.walker_func), codesize);
|
||||
}
|
||||
|
||||
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
||||
#elif
|
||||
if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) {
|
||||
UNREACHABLE_MSG("SRT program generation only supported on x86_64");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}; // namespace
|
||||
|
||||
void FlattenExtendedUserdataPass(IR::Program& program) {
|
||||
void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools) {
|
||||
Shader::Info& info = program.info;
|
||||
PassInfo pass_info;
|
||||
|
||||
// traverse at end and assign offsets to duplicate readconsts, using
|
||||
// vn_to_inst as the source
|
||||
boost::container::small_vector<IR::Inst*, 32> all_readconsts;
|
||||
|
||||
for (auto r_it = program.post_order_blocks.rbegin(); r_it != program.post_order_blocks.rend();
|
||||
r_it++) {
|
||||
IR::Block* block = *r_it;
|
||||
for (IR::Inst& inst : *block) {
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != program.post_order_blocks.rend();
|
||||
++it) {
|
||||
IR::Block* block = *it;
|
||||
for (auto& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
|
||||
if (!inst.Arg(1).IsImmediate()) {
|
||||
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
|
||||
continue;
|
||||
}
|
||||
|
||||
all_readconsts.push_back(&inst);
|
||||
if (pass_info.DeduplicateInstruction(&inst) != &inst) {
|
||||
// This is a duplicate of a readconst we've already visited
|
||||
continue;
|
||||
}
|
||||
|
||||
IR::Inst* ptr_composite = inst.Arg(0).InstRecursive();
|
||||
|
||||
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
auto base0 = IR::BreadthFirstSearch(ptr_composite->Arg(0), pred);
|
||||
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
|
||||
ASSERT_MSG(base0 && base1, "ReadConst not from constant memory");
|
||||
|
||||
IR::Inst* ptr_lo = base0.value();
|
||||
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
|
||||
|
||||
auto ptr_uses_kv =
|
||||
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
|
||||
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
|
||||
|
||||
user_list[inst.Arg(1).U32()] = &inst;
|
||||
|
||||
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
|
||||
pass_info.srt_roots[ud_reg] = ptr_lo;
|
||||
}
|
||||
pass_info.all_readconsts[&inst] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GenerateSrtProgram(info, pass_info);
|
||||
|
||||
// Assign offsets to duplicate readconsts
|
||||
for (IR::Inst* readconst : all_readconsts) {
|
||||
ASSERT(pass_info.vn_to_inst.contains(pass_info.gvn_table.GetValueNumber(readconst)));
|
||||
IR::Inst* original = pass_info.DeduplicateInstruction(readconst);
|
||||
readconst->SetFlags<u32>(original->Flags<u32>());
|
||||
}
|
||||
|
||||
GenerateSrtProgram(program, pass_info, pools);
|
||||
info.RefreshFlatBuf();
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/pools.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Profile;
|
||||
|
@ -16,7 +17,7 @@ void SsaRewritePass(IR::BlockList& program);
|
|||
void IdentityRemovalPass(IR::BlockList& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(IR::BlockList& program);
|
||||
void FlattenExtendedUserdataPass(IR::Program& program);
|
||||
void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools);
|
||||
void ReadLaneEliminationPass(IR::Program& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
|
|
|
@ -247,17 +247,22 @@ SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) {
|
|||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
// Value may be modified between the ReadConst/GetUserData and inst.
|
||||
// We don't take this into account.
|
||||
const auto result = IR::BreadthFirstSearch(inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track sharp source");
|
||||
inst = result.value();
|
||||
visited_insts.emplace_back(inst);
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return static_cast<u32>(inst->Arg(0).ScalarReg());
|
||||
} else {
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst,
|
||||
"Sharp load not from constant memory");
|
||||
return inst->Flags<u32>();
|
||||
} else if (inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
// Sharp is stored in the offset argument.
|
||||
// The vale is not inmediate if ReadConst is inside of a loop
|
||||
// and the base or offset is different in each iteration. (we don't support this)
|
||||
ASSERT(inst->Arg(1).IsImmediate());
|
||||
return inst->Arg(1).U32();
|
||||
}
|
||||
UNREACHABLE_MSG("Sharp load not from constant memory or user data");
|
||||
}
|
||||
|
||||
/// Tracks a sharp with validation of the chosen data type.
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace Shader {
|
||||
|
||||
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
|
||||
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(u32* /*flat_dst*/);
|
||||
|
||||
struct PersistentSrtInfo {
|
||||
// Special case when fetch shader uses step rates.
|
||||
|
|
|
@ -6,13 +6,30 @@
|
|||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string DumpProgram(const Program& program) {
|
||||
void DumpProgram(const Program& program, const Info& info, const std::string& type) {
|
||||
using namespace Common::FS;
|
||||
|
||||
if (!Config::dumpShaders()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto ir_filename =
|
||||
fmt::format("{}_{:#018x}.{}irprogram.txt", info.stage, info.pgm_hash, type);
|
||||
const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile};
|
||||
|
||||
size_t index{0};
|
||||
std::map<const IR::Inst*, size_t> inst_to_index;
|
||||
std::map<const IR::Block*, size_t> block_to_index;
|
||||
|
@ -21,11 +38,20 @@ std::string DumpProgram(const Program& program) {
|
|||
block_to_index.emplace(block, index);
|
||||
++index;
|
||||
}
|
||||
std::string ret;
|
||||
|
||||
for (const auto& block : program.blocks) {
|
||||
ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
|
||||
std::string s = IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
|
||||
ir_file.WriteString(s);
|
||||
}
|
||||
|
||||
const auto asl_filename = fmt::format("{}_{:#018x}.{}asl.txt", info.stage, info.pgm_hash, type);
|
||||
const auto asl_file =
|
||||
IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile};
|
||||
|
||||
for (const auto& node : program.syntax_list) {
|
||||
std::string s = IR::DumpASLNode(node, block_to_index, inst_to_index) + '\n';
|
||||
asl_file.WriteString(s);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -21,6 +21,6 @@ struct Program {
|
|||
Info& info;
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string DumpProgram(const Program& program);
|
||||
void DumpProgram(const Program& program, const Info& info, const std::string& type = "");
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -51,20 +51,6 @@ private:
|
|||
u32 vn;
|
||||
|
||||
switch (inst->GetOpcode()) {
|
||||
case IR::Opcode::Phi: {
|
||||
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
IR::Inst* source = IR::BreadthFirstSearch(inst, pred).value();
|
||||
vn = GetValueNumber(source);
|
||||
value_numbers[IR::Value(inst)] = vn;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetUserData:
|
||||
case IR::Opcode::CompositeConstructU32x2:
|
||||
case IR::Opcode::ReadConst: {
|
||||
|
|
300
src/shader_recompiler/ir/subprogram.cpp
Normal file
300
src/shader_recompiler/ir/subprogram.cpp
Normal file
|
@ -0,0 +1,300 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include "shader_recompiler/ir/conditional_tree.h"
|
||||
#include "shader_recompiler/ir/post_order.h"
|
||||
#include "shader_recompiler/ir/subprogram.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
SubProgram::SubProgram(Program* super_program, Pools& pools)
|
||||
: super_program(super_program), pools(pools) {}
|
||||
|
||||
Block* SubProgram::AddBlock(Block* orig_block) {
|
||||
auto it = orig_block_to_block.find(orig_block);
|
||||
if (it != orig_block_to_block.end()) {
|
||||
return it->second;
|
||||
}
|
||||
auto block = pools.block_pool.Create(pools.inst_pool);
|
||||
orig_block_to_block[orig_block] = block;
|
||||
return block;
|
||||
}
|
||||
|
||||
Inst* SubProgram::AddInst(Inst* orig_inst) {
|
||||
return AddInst(orig_inst, std::nullopt);
|
||||
}
|
||||
|
||||
Block* SubProgram::GetBlock(Block* orig_block) {
|
||||
auto it = orig_block_to_block.find(orig_block);
|
||||
if (it != orig_block_to_block.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Inst* SubProgram::GetInst(Inst* orig_inst) {
|
||||
auto it = orig_inst_to_inst.find(orig_inst);
|
||||
if (it != orig_inst_to_inst.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Program SubProgram::GetSubProgram() {
|
||||
ASSERT_MSG(!completed, "SubProgram already completed");
|
||||
completed = true;
|
||||
Program sub_program(super_program->info);
|
||||
BuildBlockListAndASL(sub_program);
|
||||
AddProlgueAndEpilogue(sub_program);
|
||||
sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front());
|
||||
AddConditionalTreeFromASL(sub_program.syntax_list);
|
||||
for (Block* block : sub_program.blocks) {
|
||||
block->SsaSeal();
|
||||
}
|
||||
return sub_program;
|
||||
}
|
||||
|
||||
void SubProgram::AddProlgueAndEpilogue(Program& sub_program) {
|
||||
// We may need to handle this better.
|
||||
Block* epilogue_block = pools.block_pool.Create(pools.inst_pool);
|
||||
Block* front_block = sub_program.blocks.front();
|
||||
sub_program.blocks.back()->AddBranch(epilogue_block);
|
||||
sub_program.blocks.push_back(epilogue_block);
|
||||
sub_program.syntax_list.push_back(AbstractSyntaxNode{.data = {.block = epilogue_block},
|
||||
.type = AbstractSyntaxNode::Type::Block});
|
||||
sub_program.syntax_list.push_back(AbstractSyntaxNode{.type = AbstractSyntaxNode::Type::Return});
|
||||
epilogue_block->AppendNewInst(Opcode::Epilogue, {});
|
||||
front_block->PrependNewInst(front_block->begin(), Opcode::Prologue);
|
||||
epilogue_block->SsaSeal();
|
||||
}
|
||||
|
||||
Inst* SubProgram::AddInst(Inst* orig_inst,
|
||||
std::optional<Block::InstructionList::iterator> insertion_point) {
|
||||
auto it = orig_inst_to_inst.find(orig_inst);
|
||||
if (it != orig_inst_to_inst.end()) {
|
||||
return it->second;
|
||||
}
|
||||
Block* block = AddBlock(orig_inst->GetParent());
|
||||
if (!insertion_point) {
|
||||
if (block->back().GetOpcode() == Opcode::ConditionRef) {
|
||||
insertion_point = --block->end();
|
||||
} else {
|
||||
insertion_point = block->end();
|
||||
}
|
||||
}
|
||||
Inst* inst = &(
|
||||
*block->PrependNewInst(*insertion_point, orig_inst->GetOpcode(), orig_inst->Flags<u32>()));
|
||||
orig_inst_to_inst[orig_inst] = inst;
|
||||
if (orig_inst->GetOpcode() == Opcode::Phi) {
|
||||
AddPhi(orig_inst, inst);
|
||||
} else {
|
||||
for (size_t i = 0; i < orig_inst->NumArgs(); ++i) {
|
||||
SetArg(inst, orig_inst, i);
|
||||
}
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) {
|
||||
// Current IR only has Phis with 2 arguments.
|
||||
ASSERT(orig_phi->NumArgs() == 2);
|
||||
Block* orig_block0 = orig_phi->PhiBlock(0);
|
||||
Block* orig_block1 = orig_phi->PhiBlock(1);
|
||||
Block* block0 = AddBlock(orig_block0);
|
||||
Block* block1 = AddBlock(orig_block1);
|
||||
const Value& arg0 = orig_phi->Arg(0);
|
||||
const Value& arg1 = orig_phi->Arg(1);
|
||||
AddPhiOperand(phi, block0, arg0);
|
||||
AddPhiOperand(phi, block1, arg1);
|
||||
const auto get_conds =
|
||||
[orig_block0,
|
||||
orig_block1]() -> std::pair<const Block::ConditionalData&, const Block::ConditionalData&> {
|
||||
const Block::ConditionalData& cond0 = orig_block0->CondData();
|
||||
const Block::ConditionalData& cond1 = orig_block1->CondData();
|
||||
if (cond0.depth > cond1.depth) {
|
||||
return {cond0, cond1};
|
||||
}
|
||||
return {cond1, cond0};
|
||||
};
|
||||
const auto is_negated_cond = [](Inst* ref1, Inst* ref2) {
|
||||
IR::Value cond1 = ref1->Arg(0);
|
||||
IR::Value cond2 = ref2->Arg(0);
|
||||
if (cond1.IsImmediate() || cond2.IsImmediate()) {
|
||||
if (!cond1.IsImmediate() || !cond2.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
return cond1.U1() != cond2.U1();
|
||||
}
|
||||
Inst* cond1_inst = cond1.InstRecursive();
|
||||
Inst* cond2_inst = cond2.InstRecursive();
|
||||
if (cond1_inst->GetOpcode() == Opcode::LogicalNot) {
|
||||
return cond1_inst->Arg(0) == cond2;
|
||||
}
|
||||
if (cond2_inst->GetOpcode() == Opcode::LogicalNot) {
|
||||
return cond2_inst->Arg(0) == cond1;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
const auto& [start_cond, target_cond] = get_conds();
|
||||
const Block::ConditionalData* cond = &start_cond;
|
||||
while (cond->depth > target_cond.depth) {
|
||||
if (cond->asl_node->type == AbstractSyntaxNode::Type::If) {
|
||||
Inst* cond_ref_inst = cond->asl_node->data.if_node.cond.InstRecursive();
|
||||
AddInst(cond_ref_inst);
|
||||
// Check if the condition has an else branch, and add it.
|
||||
Block* merge_block = cond->asl_node->data.if_node.merge;
|
||||
Inst* else_cond_ref_inst = &merge_block->back();
|
||||
if (else_cond_ref_inst->GetOpcode() == Opcode::ConditionRef) {
|
||||
// Check if one condition is the negation of the other.
|
||||
if (is_negated_cond(cond_ref_inst, else_cond_ref_inst)) {
|
||||
AddInst(else_cond_ref_inst);
|
||||
}
|
||||
}
|
||||
} else if (cond->asl_node->type == AbstractSyntaxNode::Type::Loop) {
|
||||
// In case of loop, we need to add the loop itself and also
|
||||
// the break conditions.
|
||||
Block* loop_merge = cond->asl_node->data.loop.merge;
|
||||
for (Block* pred : loop_merge->ImmPredecessors()) {
|
||||
if (pred->CondData().asl_node == cond->asl_node) {
|
||||
ASSERT(pred->back().GetOpcode() == IR::Opcode::ConditionRef);
|
||||
AddInst(&pred->back());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (orig_phi->GetParent()->CondData().asl_node == cond->asl_node) {
|
||||
break;
|
||||
}
|
||||
cond = cond->parent;
|
||||
}
|
||||
}
|
||||
|
||||
void SubProgram::SetArg(Inst* inst, Inst* orig_inst, size_t index) {
|
||||
const Value& arg = orig_inst->Arg(index);
|
||||
if (arg.IsImmediate()) {
|
||||
inst->SetArg(index, arg);
|
||||
} else {
|
||||
Inst* arg_inst = arg.InstRecursive();
|
||||
if (orig_inst->GetParent() == arg_inst->GetParent()) {
|
||||
inst->SetArg(index,
|
||||
Value(AddInst(arg_inst, Block::InstructionList::s_iterator_to(*inst))));
|
||||
} else {
|
||||
inst->SetArg(index, Value(AddInst(arg_inst, std::nullopt)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SubProgram::AddPhiOperand(Inst* phi, Block* block, const Value& arg) {
|
||||
if (arg.IsImmediate()) {
|
||||
phi->AddPhiOperand(block, arg);
|
||||
} else {
|
||||
phi->AddPhiOperand(block, Value(AddInst(arg.InstRecursive())));
|
||||
}
|
||||
}
|
||||
|
||||
void SubProgram::BuildBlockListAndASL(Program& sub_program) {
|
||||
boost::container::flat_set<Block*> filter_blocks;
|
||||
for (const AbstractSyntaxNode& orig_asl_node : super_program->syntax_list) {
|
||||
AbstractSyntaxNode asl_node;
|
||||
asl_node.type = orig_asl_node.type;
|
||||
Block* orig_block = orig_asl_node.data.block;
|
||||
switch (orig_asl_node.type) {
|
||||
case AbstractSyntaxNode::Type::Block: {
|
||||
Block* block = GetBlock(orig_block);
|
||||
if (!block) {
|
||||
continue;
|
||||
}
|
||||
if (!sub_program.syntax_list.empty()) {
|
||||
Block* last_block = sub_program.blocks.back();
|
||||
if (!last_block->HasImmSuccessor(block)) {
|
||||
last_block->AddBranch(block);
|
||||
}
|
||||
}
|
||||
asl_node.data.block = block;
|
||||
sub_program.blocks.push_back(block);
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::If: {
|
||||
Inst* cond = GetInst(orig_asl_node.data.if_node.cond.InstRecursive());
|
||||
if (!cond) {
|
||||
continue;
|
||||
}
|
||||
Block* block = cond->GetParent();
|
||||
Block* merge_block = AddBlock(orig_asl_node.data.if_node.merge);
|
||||
Block* body_block = AddBlock(orig_asl_node.data.if_node.body);
|
||||
asl_node.data.if_node.cond = U1(cond);
|
||||
asl_node.data.if_node.body = body_block;
|
||||
asl_node.data.if_node.merge = merge_block;
|
||||
block->AddBranch(body_block);
|
||||
block->AddBranch(merge_block);
|
||||
filter_blocks.insert(merge_block);
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::EndIf: {
|
||||
Block* merge_block = GetBlock(orig_asl_node.data.end_if.merge);
|
||||
if (!filter_blocks.contains(merge_block)) {
|
||||
continue;
|
||||
}
|
||||
asl_node.data.end_if.merge = merge_block;
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::Loop: {
|
||||
Block* continue_block = GetBlock(orig_asl_node.data.loop.continue_block);
|
||||
if (!continue_block) {
|
||||
continue;
|
||||
}
|
||||
if (continue_block->back().GetOpcode() != Opcode::ConditionRef) {
|
||||
continue;
|
||||
}
|
||||
Block* merge_block = AddBlock(orig_asl_node.data.loop.merge);
|
||||
asl_node.data.loop.body = AddBlock(orig_asl_node.data.loop.body);
|
||||
asl_node.data.loop.continue_block = continue_block;
|
||||
asl_node.data.loop.merge = merge_block;
|
||||
filter_blocks.insert(merge_block);
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::Repeat: {
|
||||
Inst* cond = GetInst(orig_asl_node.data.repeat.cond.InstRecursive());
|
||||
if (!cond) {
|
||||
continue;
|
||||
}
|
||||
Block* block = cond->GetParent();
|
||||
Block* merge_block = AddBlock(orig_asl_node.data.repeat.merge);
|
||||
Block* loop_header_block = AddBlock(orig_asl_node.data.repeat.loop_header);
|
||||
asl_node.data.repeat.cond = U1(cond);
|
||||
asl_node.data.repeat.loop_header = loop_header_block;
|
||||
asl_node.data.repeat.merge = merge_block;
|
||||
block->AddBranch(loop_header_block);
|
||||
block->AddBranch(merge_block);
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::Break: {
|
||||
Inst* cond = GetInst(orig_asl_node.data.break_node.cond.InstRecursive());
|
||||
if (!cond) {
|
||||
continue;
|
||||
}
|
||||
Block* block = cond->GetParent();
|
||||
Block* merge_block = AddBlock(orig_asl_node.data.break_node.merge);
|
||||
Block* skip_block = AddBlock(orig_asl_node.data.break_node.skip);
|
||||
asl_node.data.break_node.cond = U1(&block->back());
|
||||
asl_node.data.break_node.merge = merge_block;
|
||||
asl_node.data.break_node.skip = skip_block;
|
||||
block->AddBranch(skip_block);
|
||||
block->AddBranch(merge_block);
|
||||
break;
|
||||
}
|
||||
case AbstractSyntaxNode::Type::Unreachable:
|
||||
case AbstractSyntaxNode::Type::Return:
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
sub_program.syntax_list.push_back(asl_node);
|
||||
}
|
||||
for (Block* block : sub_program.blocks) {
|
||||
block->has_multiple_predecessors = block->ImmPredecessors().size() > 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
46
src/shader_recompiler/ir/subprogram.h
Normal file
46
src/shader_recompiler/ir/subprogram.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/flat_map.hpp>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/pools.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
// Given an IR program, this class is used to create a subprogram that contains
|
||||
// only the blocks and instructions that relevant to a group of given instructions.
|
||||
// Taking into account only the given instructions, the instructions that it uses and
|
||||
// conditions.
|
||||
|
||||
struct SubProgram {
|
||||
SubProgram(Program* super_program, Pools& pools);
|
||||
|
||||
Block* AddBlock(Block* orig_block);
|
||||
Inst* AddInst(Inst* orig_inst);
|
||||
|
||||
Block* GetBlock(Block* orig_block);
|
||||
Inst* GetInst(Inst* orig_inst);
|
||||
|
||||
Program GetSubProgram();
|
||||
|
||||
private:
|
||||
Inst* AddInst(Inst* orig_inst, std::optional<Block::InstructionList::iterator> insertion_point);
|
||||
void AddPhi(Inst* orig_phi, Inst* phi);
|
||||
|
||||
void SetArg(Inst* inst, Inst* orig_inst, size_t index);
|
||||
void AddPhiOperand(Inst* phi, Block* block, const Value& arg);
|
||||
|
||||
void BuildBlockListAndASL(Program& sub_program);
|
||||
void AddProlgueAndEpilogue(Program& sub_program);
|
||||
|
||||
bool completed = false;
|
||||
Program* super_program;
|
||||
Pools& pools;
|
||||
boost::container::flat_map<Block*, Block*> orig_block_to_block;
|
||||
boost::container::flat_map<Inst*, Inst*> orig_inst_to_inst;
|
||||
};
|
||||
|
||||
} // namespace Shader::IR
|
26
src/shader_recompiler/pools.h
Normal file
26
src/shader_recompiler/pools.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct Pools {
|
||||
static constexpr u32 InstPoolSize = 8192;
|
||||
static constexpr u32 BlockPoolSize = 32;
|
||||
|
||||
Common::ObjectPool<IR::Inst> inst_pool;
|
||||
Common::ObjectPool<IR::Block> block_pool;
|
||||
|
||||
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
|
||||
|
||||
void ReleaseContents() {
|
||||
inst_pool.ReleaseContents();
|
||||
block_pool.ReleaseContents();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
|
@ -4,6 +4,7 @@
|
|||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||
#include "shader_recompiler/ir/conditional_tree.h"
|
||||
#include "shader_recompiler/ir/passes/ir_passes.h"
|
||||
#include "shader_recompiler/ir/post_order.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
@ -59,6 +60,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
Shader::IR::AddConditionalTreeFromASL(program.syntax_list);
|
||||
|
||||
// Run optimization passes
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
|
@ -72,7 +75,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info);
|
||||
Shader::Optimization::ReadLaneEliminationPass(program);
|
||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||
Shader::Optimization::FlattenExtendedUserdataPass(program, pools);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::LowerBufferFormatToRaw(program);
|
||||
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);
|
||||
|
@ -82,6 +85,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
|
||||
Shader::IR::DumpProgram(program, info);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
|
|
@ -3,30 +3,15 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/pools.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
struct RuntimeInfo;
|
||||
|
||||
struct Pools {
|
||||
static constexpr u32 InstPoolSize = 8192;
|
||||
static constexpr u32 BlockPoolSize = 32;
|
||||
|
||||
Common::ObjectPool<IR::Inst> inst_pool;
|
||||
Common::ObjectPool<IR::Block> block_pool;
|
||||
|
||||
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
|
||||
|
||||
void ReleaseContents() {
|
||||
inst_pool.ReleaseContents();
|
||||
block_pool.ReleaseContents();
|
||||
}
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue