Reformat all the things. Have fun with merge conflicts.

This commit is contained in:
Pierre Bourdon 2016-06-24 10:43:46 +02:00
commit 3570c7f03a
1116 changed files with 187405 additions and 180344 deletions

View file

@ -16,14 +16,14 @@ extern "C" {
}
#include "Common/FileUtil.h"
#include "Common/MsgHandler.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Core/ConfigManager.h"
#include "Core/CoreTiming.h"
#include "Core/Movie.h"
#include "Core/HW/SystemTimers.h"
#include "Core/HW/VideoInterface.h" //for TargetRefreshRate
#include "Core/HW/VideoInterface.h" //for TargetRefreshRate
#include "Core/Movie.h"
#include "VideoCommon/AVIDump.h"
#include "VideoCommon/VideoConfig.h"
@ -48,227 +48,228 @@ static u64 s_last_pts;
static void InitAVCodec()
{
static bool first_run = true;
if (first_run)
{
av_register_all();
first_run = false;
}
static bool first_run = true;
if (first_run)
{
av_register_all();
first_run = false;
}
}
bool AVIDump::Start(int w, int h, DumpFormat format)
{
if (format == DumpFormat::FORMAT_BGR)
s_pix_fmt = AV_PIX_FMT_BGR24;
else
s_pix_fmt = AV_PIX_FMT_RGBA;
if (format == DumpFormat::FORMAT_BGR)
s_pix_fmt = AV_PIX_FMT_BGR24;
else
s_pix_fmt = AV_PIX_FMT_RGBA;
s_width = w;
s_height = h;
s_width = w;
s_height = h;
s_last_frame = CoreTiming::GetTicks();
s_last_pts = 0;
s_last_frame = CoreTiming::GetTicks();
s_last_pts = 0;
InitAVCodec();
bool success = CreateFile();
if (!success)
CloseFile();
return success;
InitAVCodec();
bool success = CreateFile();
if (!success)
CloseFile();
return success;
}
bool AVIDump::CreateFile()
{
AVCodec* codec = nullptr;
AVCodec* codec = nullptr;
s_format_context = avformat_alloc_context();
snprintf(s_format_context->filename, sizeof(s_format_context->filename), "%s",
(File::GetUserPath(D_DUMPFRAMES_IDX) + "framedump0.avi").c_str());
File::CreateFullPath(s_format_context->filename);
s_format_context = avformat_alloc_context();
snprintf(s_format_context->filename, sizeof(s_format_context->filename), "%s",
(File::GetUserPath(D_DUMPFRAMES_IDX) + "framedump0.avi").c_str());
File::CreateFullPath(s_format_context->filename);
// Ask to delete file
if (File::Exists(s_format_context->filename))
{
if (SConfig::GetInstance().m_DumpFramesSilent ||
AskYesNoT("Delete the existing file '%s'?", s_format_context->filename))
{
File::Delete(s_format_context->filename);
}
else
{
// Stop and cancel dumping the video
return false;
}
}
// Ask to delete file
if (File::Exists(s_format_context->filename))
{
if (SConfig::GetInstance().m_DumpFramesSilent ||
AskYesNoT("Delete the existing file '%s'?", s_format_context->filename))
{
File::Delete(s_format_context->filename);
}
else
{
// Stop and cancel dumping the video
return false;
}
}
if (!(s_format_context->oformat = av_guess_format("avi", nullptr, nullptr)) ||
!(s_stream = avformat_new_stream(s_format_context, codec)))
{
return false;
}
if (!(s_format_context->oformat = av_guess_format("avi", nullptr, nullptr)) ||
!(s_stream = avformat_new_stream(s_format_context, codec)))
{
return false;
}
s_stream->codec->codec_id = g_Config.bUseFFV1 ? AV_CODEC_ID_FFV1
: s_format_context->oformat->video_codec;
if (!g_Config.bUseFFV1)
s_stream->codec->codec_tag = MKTAG('X', 'V', 'I', 'D'); // Force XVID FourCC for better compatibility
s_stream->codec->codec_type = AVMEDIA_TYPE_VIDEO;
s_stream->codec->bit_rate = 400000;
s_stream->codec->width = s_width;
s_stream->codec->height = s_height;
s_stream->codec->time_base.num = 1;
s_stream->codec->time_base.den = VideoInterface::GetTargetRefreshRate();
s_stream->codec->gop_size = 12;
s_stream->codec->pix_fmt = g_Config.bUseFFV1 ? AV_PIX_FMT_BGRA : AV_PIX_FMT_YUV420P;
s_stream->codec->codec_id =
g_Config.bUseFFV1 ? AV_CODEC_ID_FFV1 : s_format_context->oformat->video_codec;
if (!g_Config.bUseFFV1)
s_stream->codec->codec_tag =
MKTAG('X', 'V', 'I', 'D'); // Force XVID FourCC for better compatibility
s_stream->codec->codec_type = AVMEDIA_TYPE_VIDEO;
s_stream->codec->bit_rate = 400000;
s_stream->codec->width = s_width;
s_stream->codec->height = s_height;
s_stream->codec->time_base.num = 1;
s_stream->codec->time_base.den = VideoInterface::GetTargetRefreshRate();
s_stream->codec->gop_size = 12;
s_stream->codec->pix_fmt = g_Config.bUseFFV1 ? AV_PIX_FMT_BGRA : AV_PIX_FMT_YUV420P;
if (!(codec = avcodec_find_encoder(s_stream->codec->codec_id)) ||
(avcodec_open2(s_stream->codec, codec, nullptr) < 0))
{
return false;
}
if (!(codec = avcodec_find_encoder(s_stream->codec->codec_id)) ||
(avcodec_open2(s_stream->codec, codec, nullptr) < 0))
{
return false;
}
s_src_frame = av_frame_alloc();
s_scaled_frame = av_frame_alloc();
s_src_frame = av_frame_alloc();
s_scaled_frame = av_frame_alloc();
s_size = avpicture_get_size(s_stream->codec->pix_fmt, s_width, s_height);
s_size = avpicture_get_size(s_stream->codec->pix_fmt, s_width, s_height);
s_yuv_buffer = new uint8_t[s_size];
avpicture_fill((AVPicture*)s_scaled_frame, s_yuv_buffer, s_stream->codec->pix_fmt, s_width, s_height);
s_yuv_buffer = new uint8_t[s_size];
avpicture_fill((AVPicture*)s_scaled_frame, s_yuv_buffer, s_stream->codec->pix_fmt, s_width,
s_height);
NOTICE_LOG(VIDEO, "Opening file %s for dumping", s_format_context->filename);
if (avio_open(&s_format_context->pb, s_format_context->filename, AVIO_FLAG_WRITE) < 0)
{
WARN_LOG(VIDEO, "Could not open %s", s_format_context->filename);
return false;
}
NOTICE_LOG(VIDEO, "Opening file %s for dumping", s_format_context->filename);
if (avio_open(&s_format_context->pb, s_format_context->filename, AVIO_FLAG_WRITE) < 0)
{
WARN_LOG(VIDEO, "Could not open %s", s_format_context->filename);
return false;
}
avformat_write_header(s_format_context, nullptr);
avformat_write_header(s_format_context, nullptr);
return true;
return true;
}
static void PreparePacket(AVPacket* pkt)
{
av_init_packet(pkt);
pkt->data = nullptr;
pkt->size = 0;
av_init_packet(pkt);
pkt->data = nullptr;
pkt->size = 0;
}
void AVIDump::AddFrame(const u8* data, int width, int height)
{
avpicture_fill((AVPicture*)s_src_frame, const_cast<u8*>(data), s_pix_fmt, width, height);
avpicture_fill((AVPicture*)s_src_frame, const_cast<u8*>(data), s_pix_fmt, width, height);
// Convert image from {BGR24, RGBA} to desired pixel format, and scale to initial
// width and height
if ((s_sws_context = sws_getCachedContext(s_sws_context,
width, height, s_pix_fmt,
s_width, s_height, s_stream->codec->pix_fmt,
SWS_BICUBIC, nullptr, nullptr, nullptr)))
{
sws_scale(s_sws_context, s_src_frame->data, s_src_frame->linesize, 0,
height, s_scaled_frame->data, s_scaled_frame->linesize);
}
// Convert image from {BGR24, RGBA} to desired pixel format, and scale to initial
// width and height
if ((s_sws_context =
sws_getCachedContext(s_sws_context, width, height, s_pix_fmt, s_width, s_height,
s_stream->codec->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr)))
{
sws_scale(s_sws_context, s_src_frame->data, s_src_frame->linesize, 0, height,
s_scaled_frame->data, s_scaled_frame->linesize);
}
s_scaled_frame->format = s_stream->codec->pix_fmt;
s_scaled_frame->width = s_width;
s_scaled_frame->height = s_height;
s_scaled_frame->format = s_stream->codec->pix_fmt;
s_scaled_frame->width = s_width;
s_scaled_frame->height = s_height;
// Encode and write the image.
AVPacket pkt;
PreparePacket(&pkt);
int got_packet = 0;
int error = 0;
u64 delta;
s64 last_pts;
// Check to see if the first frame being dumped is the first frame of output from the emulator.
// This prevents an issue with starting dumping later in emulation from placing the frames incorrectly.
if (!s_start_dumping && Movie::g_currentFrame < 1)
{
delta = CoreTiming::GetTicks();
last_pts = AV_NOPTS_VALUE;
s_start_dumping = true;
}
else
{
delta = CoreTiming::GetTicks() - s_last_frame;
last_pts = (s_last_pts * s_stream->codec->time_base.den) / SystemTimers::GetTicksPerSecond();
}
u64 pts_in_ticks = s_last_pts + delta;
s_scaled_frame->pts = (pts_in_ticks * s_stream->codec->time_base.den) / SystemTimers::GetTicksPerSecond();
if (s_scaled_frame->pts != last_pts)
{
s_last_frame = CoreTiming::GetTicks();
s_last_pts = pts_in_ticks;
error = avcodec_encode_video2(s_stream->codec, &pkt, s_scaled_frame, &got_packet);
}
while (!error && got_packet)
{
// Write the compressed frame in the media file.
if (pkt.pts != (s64)AV_NOPTS_VALUE)
{
pkt.pts = av_rescale_q(pkt.pts,
s_stream->codec->time_base, s_stream->time_base);
}
if (pkt.dts != (s64)AV_NOPTS_VALUE)
{
pkt.dts = av_rescale_q(pkt.dts,
s_stream->codec->time_base, s_stream->time_base);
}
// Encode and write the image.
AVPacket pkt;
PreparePacket(&pkt);
int got_packet = 0;
int error = 0;
u64 delta;
s64 last_pts;
// Check to see if the first frame being dumped is the first frame of output from the emulator.
// This prevents an issue with starting dumping later in emulation from placing the frames
// incorrectly.
if (!s_start_dumping && Movie::g_currentFrame < 1)
{
delta = CoreTiming::GetTicks();
last_pts = AV_NOPTS_VALUE;
s_start_dumping = true;
}
else
{
delta = CoreTiming::GetTicks() - s_last_frame;
last_pts = (s_last_pts * s_stream->codec->time_base.den) / SystemTimers::GetTicksPerSecond();
}
u64 pts_in_ticks = s_last_pts + delta;
s_scaled_frame->pts =
(pts_in_ticks * s_stream->codec->time_base.den) / SystemTimers::GetTicksPerSecond();
if (s_scaled_frame->pts != last_pts)
{
s_last_frame = CoreTiming::GetTicks();
s_last_pts = pts_in_ticks;
error = avcodec_encode_video2(s_stream->codec, &pkt, s_scaled_frame, &got_packet);
}
while (!error && got_packet)
{
// Write the compressed frame in the media file.
if (pkt.pts != (s64)AV_NOPTS_VALUE)
{
pkt.pts = av_rescale_q(pkt.pts, s_stream->codec->time_base, s_stream->time_base);
}
if (pkt.dts != (s64)AV_NOPTS_VALUE)
{
pkt.dts = av_rescale_q(pkt.dts, s_stream->codec->time_base, s_stream->time_base);
}
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(56, 60, 100)
if (s_stream->codec->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
if (s_stream->codec->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
#endif
pkt.stream_index = s_stream->index;
av_interleaved_write_frame(s_format_context, &pkt);
pkt.stream_index = s_stream->index;
av_interleaved_write_frame(s_format_context, &pkt);
// Handle delayed frames.
PreparePacket(&pkt);
error = avcodec_encode_video2(s_stream->codec, &pkt, nullptr, &got_packet);
}
if (error)
ERROR_LOG(VIDEO, "Error while encoding video: %d", error);
// Handle delayed frames.
PreparePacket(&pkt);
error = avcodec_encode_video2(s_stream->codec, &pkt, nullptr, &got_packet);
}
if (error)
ERROR_LOG(VIDEO, "Error while encoding video: %d", error);
}
void AVIDump::Stop()
{
av_write_trailer(s_format_context);
CloseFile();
NOTICE_LOG(VIDEO, "Stopping frame dump");
av_write_trailer(s_format_context);
CloseFile();
NOTICE_LOG(VIDEO, "Stopping frame dump");
}
void AVIDump::CloseFile()
{
if (s_stream)
{
if (s_stream->codec)
avcodec_close(s_stream->codec);
av_free(s_stream);
s_stream = nullptr;
}
if (s_stream)
{
if (s_stream->codec)
avcodec_close(s_stream->codec);
av_free(s_stream);
s_stream = nullptr;
}
if (s_yuv_buffer)
{
delete[] s_yuv_buffer;
s_yuv_buffer = nullptr;
}
if (s_yuv_buffer)
{
delete[] s_yuv_buffer;
s_yuv_buffer = nullptr;
}
av_frame_free(&s_src_frame);
av_frame_free(&s_scaled_frame);
av_frame_free(&s_src_frame);
av_frame_free(&s_scaled_frame);
if (s_format_context)
{
if (s_format_context->pb)
avio_close(s_format_context->pb);
av_free(s_format_context);
s_format_context = nullptr;
}
if (s_format_context)
{
if (s_format_context->pb)
avio_close(s_format_context->pb);
av_free(s_format_context);
s_format_context = nullptr;
}
if (s_sws_context)
{
sws_freeContext(s_sws_context);
s_sws_context = nullptr;
}
if (s_sws_context)
{
sws_freeContext(s_sws_context);
s_sws_context = nullptr;
}
}
void AVIDump::DoState()
{
s_last_frame = CoreTiming::GetTicks();
s_last_frame = CoreTiming::GetTicks();
}

View file

@ -9,18 +9,18 @@
class AVIDump
{
private:
static bool CreateFile();
static void CloseFile();
static bool CreateFile();
static void CloseFile();
public:
enum class DumpFormat
{
FORMAT_BGR,
FORMAT_RGBA
};
enum class DumpFormat
{
FORMAT_BGR,
FORMAT_RGBA
};
static bool Start(int w, int h, DumpFormat format);
static void AddFrame(const u8* data, int width, int height);
static void Stop();
static void DoState();
static bool Start(int w, int h, DumpFormat format);
static void AddFrame(const u8* data, int width, int height);
static void Stop();
static void DoState();
};

View file

@ -12,146 +12,144 @@
AsyncRequests AsyncRequests::s_singleton;
AsyncRequests::AsyncRequests()
: m_enable(false), m_passthrough(true)
AsyncRequests::AsyncRequests() : m_enable(false), m_passthrough(true)
{
}
void AsyncRequests::PullEventsInternal()
{
std::unique_lock<std::mutex> lock(m_mutex);
m_empty.store(true);
std::unique_lock<std::mutex> lock(m_mutex);
m_empty.store(true);
while (!m_queue.empty())
{
Event e = m_queue.front();
while (!m_queue.empty())
{
Event e = m_queue.front();
// try to merge as many efb pokes as possible
// it's a bit hacky, but some games render a complete frame in this way
if ((e.type == Event::EFB_POKE_COLOR || e.type == Event::EFB_POKE_Z))
{
m_merged_efb_pokes.clear();
Event first_event = m_queue.front();
EFBAccessType t = first_event.type == Event::EFB_POKE_COLOR ? POKE_COLOR : POKE_Z;
// try to merge as many efb pokes as possible
// it's a bit hacky, but some games render a complete frame in this way
if ((e.type == Event::EFB_POKE_COLOR || e.type == Event::EFB_POKE_Z))
{
m_merged_efb_pokes.clear();
Event first_event = m_queue.front();
EFBAccessType t = first_event.type == Event::EFB_POKE_COLOR ? POKE_COLOR : POKE_Z;
do
{
e = m_queue.front();
do
{
e = m_queue.front();
EfbPokeData d;
d.data = e.efb_poke.data;
d.x = e.efb_poke.x;
d.y = e.efb_poke.y;
m_merged_efb_pokes.push_back(d);
EfbPokeData d;
d.data = e.efb_poke.data;
d.x = e.efb_poke.x;
d.y = e.efb_poke.y;
m_merged_efb_pokes.push_back(d);
m_queue.pop();
} while(!m_queue.empty() && m_queue.front().type == first_event.type);
m_queue.pop();
} while (!m_queue.empty() && m_queue.front().type == first_event.type);
lock.unlock();
g_renderer->PokeEFB(t, m_merged_efb_pokes.data(), m_merged_efb_pokes.size());
lock.lock();
continue;
}
lock.unlock();
g_renderer->PokeEFB(t, m_merged_efb_pokes.data(), m_merged_efb_pokes.size());
lock.lock();
continue;
}
lock.unlock();
HandleEvent(e);
lock.lock();
lock.unlock();
HandleEvent(e);
lock.lock();
m_queue.pop();
}
m_queue.pop();
}
if (m_wake_me_up_again)
{
m_wake_me_up_again = false;
m_cond.notify_all();
}
if (m_wake_me_up_again)
{
m_wake_me_up_again = false;
m_cond.notify_all();
}
}
void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking)
{
std::unique_lock<std::mutex> lock(m_mutex);
std::unique_lock<std::mutex> lock(m_mutex);
if (m_passthrough)
{
HandleEvent(event);
return;
}
if (m_passthrough)
{
HandleEvent(event);
return;
}
m_empty.store(false);
m_wake_me_up_again |= blocking;
m_empty.store(false);
m_wake_me_up_again |= blocking;
if (!m_enable)
return;
if (!m_enable)
return;
m_queue.push(event);
m_queue.push(event);
Fifo::RunGpu();
if (blocking)
{
m_cond.wait(lock, [this]{return m_queue.empty();});
}
Fifo::RunGpu();
if (blocking)
{
m_cond.wait(lock, [this] { return m_queue.empty(); });
}
}
void AsyncRequests::SetEnable(bool enable)
{
std::unique_lock<std::mutex> lock(m_mutex);
m_enable = enable;
std::unique_lock<std::mutex> lock(m_mutex);
m_enable = enable;
if (!enable)
{
// flush the queue on disabling
while (!m_queue.empty())
m_queue.pop();
if (m_wake_me_up_again)
m_cond.notify_all();
}
if (!enable)
{
// flush the queue on disabling
while (!m_queue.empty())
m_queue.pop();
if (m_wake_me_up_again)
m_cond.notify_all();
}
}
void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
{
EFBRectangle rc;
switch (e.type)
{
case Event::EFB_POKE_COLOR:
{
EfbPokeData poke = { e.efb_poke.x, e.efb_poke.y, e.efb_poke.data };
g_renderer->PokeEFB(POKE_COLOR, &poke, 1);
}
break;
EFBRectangle rc;
switch (e.type)
{
case Event::EFB_POKE_COLOR:
{
EfbPokeData poke = {e.efb_poke.x, e.efb_poke.y, e.efb_poke.data};
g_renderer->PokeEFB(POKE_COLOR, &poke, 1);
}
break;
case Event::EFB_POKE_Z:
{
EfbPokeData poke = { e.efb_poke.x, e.efb_poke.y, e.efb_poke.data };
g_renderer->PokeEFB(POKE_Z, &poke, 1);
}
break;
case Event::EFB_POKE_Z:
{
EfbPokeData poke = {e.efb_poke.x, e.efb_poke.y, e.efb_poke.data};
g_renderer->PokeEFB(POKE_Z, &poke, 1);
}
break;
case Event::EFB_PEEK_COLOR:
*e.efb_peek.data = g_renderer->AccessEFB(PEEK_COLOR, e.efb_peek.x, e.efb_peek.y, 0);
break;
case Event::EFB_PEEK_COLOR:
*e.efb_peek.data = g_renderer->AccessEFB(PEEK_COLOR, e.efb_peek.x, e.efb_peek.y, 0);
break;
case Event::EFB_PEEK_Z:
*e.efb_peek.data = g_renderer->AccessEFB(PEEK_Z, e.efb_peek.x, e.efb_peek.y, 0);
break;
case Event::EFB_PEEK_Z:
*e.efb_peek.data = g_renderer->AccessEFB(PEEK_Z, e.efb_peek.x, e.efb_peek.y, 0);
break;
case Event::SWAP_EVENT:
Renderer::Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride, e.swap_event.fbHeight, rc);
break;
case Event::SWAP_EVENT:
Renderer::Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride,
e.swap_event.fbHeight, rc);
break;
case Event::BBOX_READ:
*e.bbox.data = g_renderer->BBoxRead(e.bbox.index);
break;
case Event::BBOX_READ:
*e.bbox.data = g_renderer->BBoxRead(e.bbox.index);
break;
case Event::PERF_QUERY:
g_perf_query->FlushResults();
break;
}
case Event::PERF_QUERY:
g_perf_query->FlushResults();
break;
}
}
void AsyncRequests::SetPassthrough(bool enable)
{
std::unique_lock<std::mutex> lock(m_mutex);
m_passthrough = enable;
std::unique_lock<std::mutex> lock(m_mutex);
m_passthrough = enable;
}

View file

@ -17,83 +17,81 @@ struct EfbPokeData;
class AsyncRequests
{
public:
struct Event
{
enum Type
{
EFB_POKE_COLOR,
EFB_POKE_Z,
EFB_PEEK_COLOR,
EFB_PEEK_Z,
SWAP_EVENT,
BBOX_READ,
PERF_QUERY,
} type;
u64 time;
struct Event
{
enum Type
{
EFB_POKE_COLOR,
EFB_POKE_Z,
EFB_PEEK_COLOR,
EFB_PEEK_Z,
SWAP_EVENT,
BBOX_READ,
PERF_QUERY,
} type;
u64 time;
union
{
struct
{
u16 x;
u16 y;
u32 data;
} efb_poke;
union {
struct
{
u16 x;
u16 y;
u32 data;
} efb_poke;
struct
{
u16 x;
u16 y;
u32* data;
} efb_peek;
struct
{
u16 x;
u16 y;
u32* data;
} efb_peek;
struct
{
u32 xfbAddr;
u32 fbWidth;
u32 fbStride;
u32 fbHeight;
} swap_event;
struct
{
u32 xfbAddr;
u32 fbWidth;
u32 fbStride;
u32 fbHeight;
} swap_event;
struct
{
int index;
u16* data;
} bbox;
struct
{
int index;
u16* data;
} bbox;
struct
{
} perf_query;
};
};
struct
{
} perf_query;
};
};
AsyncRequests();
AsyncRequests();
void PullEvents()
{
if (!m_empty.load())
PullEventsInternal();
}
void PushEvent(const Event& event, bool blocking = false);
void SetEnable(bool enable);
void SetPassthrough(bool enable);
static AsyncRequests* GetInstance() { return &s_singleton; }
void PullEvents()
{
if (!m_empty.load())
PullEventsInternal();
}
void PushEvent(const Event& event, bool blocking = false);
void SetEnable(bool enable);
void SetPassthrough(bool enable);
static AsyncRequests* GetInstance() { return &s_singleton; }
private:
void PullEventsInternal();
void HandleEvent(const Event& e);
void PullEventsInternal();
void HandleEvent(const Event& e);
static AsyncRequests s_singleton;
static AsyncRequests s_singleton;
std::atomic<bool> m_empty;
std::queue<Event> m_queue;
std::mutex m_mutex;
std::condition_variable m_cond;
std::atomic<bool> m_empty;
std::queue<Event> m_queue;
std::mutex m_mutex;
std::condition_variable m_cond;
bool m_wake_me_up_again;
bool m_enable;
bool m_passthrough;
bool m_wake_me_up_again;
bool m_enable;
bool m_passthrough;
std::vector<EfbPokeData> m_merged_efb_pokes;
std::vector<EfbPokeData> m_merged_efb_pokes;
};

View file

@ -21,219 +21,224 @@ namespace BPFunctions
// Reference: Yet Another GameCube Documentation
// ----------------------------------------------
void FlushPipeline()
{
VertexManagerBase::Flush();
VertexManagerBase::Flush();
}
void SetGenerationMode()
{
g_renderer->SetGenerationMode();
g_renderer->SetGenerationMode();
}
void SetScissor()
{
/* NOTE: the minimum value here for the scissor rect and offset is -342.
* GX internally adds on an offset of 342 to both the offset and scissor
* coords to ensure that the register was always unsigned.
*
* The code that was here before tried to "undo" this offset, but
* since we always take the difference, the +342 added to both
* sides cancels out. */
/* NOTE: the minimum value here for the scissor rect and offset is -342.
* GX internally adds on an offset of 342 to both the offset and scissor
* coords to ensure that the register was always unsigned.
*
* The code that was here before tried to "undo" this offset, but
* since we always take the difference, the +342 added to both
* sides cancels out. */
/* The scissor offset is always even, so to save space, the scissor offset
* register is scaled down by 2. So, if somebody calls
* GX_SetScissorBoxOffset(20, 20); the registers will be set to 10, 10. */
const int xoff = bpmem.scissorOffset.x * 2;
const int yoff = bpmem.scissorOffset.y * 2;
/* The scissor offset is always even, so to save space, the scissor offset
* register is scaled down by 2. So, if somebody calls
* GX_SetScissorBoxOffset(20, 20); the registers will be set to 10, 10. */
const int xoff = bpmem.scissorOffset.x * 2;
const int yoff = bpmem.scissorOffset.y * 2;
EFBRectangle rc (bpmem.scissorTL.x - xoff, bpmem.scissorTL.y - yoff,
bpmem.scissorBR.x - xoff + 1, bpmem.scissorBR.y - yoff + 1);
EFBRectangle rc(bpmem.scissorTL.x - xoff, bpmem.scissorTL.y - yoff, bpmem.scissorBR.x - xoff + 1,
bpmem.scissorBR.y - yoff + 1);
if (rc.left < 0) rc.left = 0;
if (rc.top < 0) rc.top = 0;
if (rc.right > EFB_WIDTH) rc.right = EFB_WIDTH;
if (rc.bottom > EFB_HEIGHT) rc.bottom = EFB_HEIGHT;
if (rc.left < 0)
rc.left = 0;
if (rc.top < 0)
rc.top = 0;
if (rc.right > EFB_WIDTH)
rc.right = EFB_WIDTH;
if (rc.bottom > EFB_HEIGHT)
rc.bottom = EFB_HEIGHT;
if (rc.left > rc.right) rc.right = rc.left;
if (rc.top > rc.bottom) rc.bottom = rc.top;
if (rc.left > rc.right)
rc.right = rc.left;
if (rc.top > rc.bottom)
rc.bottom = rc.top;
g_renderer->SetScissorRect(rc);
g_renderer->SetScissorRect(rc);
}
void SetDepthMode()
{
g_renderer->SetDepthMode();
g_renderer->SetDepthMode();
}
void SetBlendMode()
{
g_renderer->SetBlendMode(false);
g_renderer->SetBlendMode(false);
}
void SetDitherMode()
{
g_renderer->SetDitherMode();
g_renderer->SetDitherMode();
}
void SetLogicOpMode()
{
g_renderer->SetLogicOpMode();
g_renderer->SetLogicOpMode();
}
void SetColorMask()
{
g_renderer->SetColorMask();
g_renderer->SetColorMask();
}
/* Explanation of the magic behind ClearScreen:
There's numerous possible formats for the pixel data in the EFB.
However, in the HW accelerated backends we're always using RGBA8
for the EFB format, which causes some problems:
- We're using an alpha channel although the game doesn't
- If the actual EFB format is RGBA6_Z24 or R5G6B5_Z16, we are using more bits per channel than the native HW
There's numerous possible formats for the pixel data in the EFB.
However, in the HW accelerated backends we're always using RGBA8
for the EFB format, which causes some problems:
- We're using an alpha channel although the game doesn't
- If the actual EFB format is RGBA6_Z24 or R5G6B5_Z16, we are using more bits per channel than the
native HW
To properly emulate the above points, we're doing the following:
(1)
- disable alpha channel writing of any kind of rendering if the actual EFB format doesn't use an alpha channel
- NOTE: Always make sure that the EFB has been cleared to an alpha value of 0xFF in this case!
- Same for color channels, these need to be cleared to 0x00 though.
(2)
- convert the RGBA8 color to RGBA6/RGB8/RGB565 and convert it to RGBA8 again
- convert the Z24 depth value to Z16 and back to Z24
To properly emulate the above points, we're doing the following:
(1)
- disable alpha channel writing of any kind of rendering if the actual EFB format doesn't use an
alpha channel
- NOTE: Always make sure that the EFB has been cleared to an alpha value of 0xFF in this case!
- Same for color channels, these need to be cleared to 0x00 though.
(2)
- convert the RGBA8 color to RGBA6/RGB8/RGB565 and convert it to RGBA8 again
- convert the Z24 depth value to Z16 and back to Z24
*/
void ClearScreen(const EFBRectangle &rc)
void ClearScreen(const EFBRectangle& rc)
{
bool colorEnable = (bpmem.blendmode.colorupdate != 0);
bool alphaEnable = (bpmem.blendmode.alphaupdate != 0);
bool zEnable = (bpmem.zmode.updateenable != 0);
auto pixel_format = bpmem.zcontrol.pixel_format;
bool colorEnable = (bpmem.blendmode.colorupdate != 0);
bool alphaEnable = (bpmem.blendmode.alphaupdate != 0);
bool zEnable = (bpmem.zmode.updateenable != 0);
auto pixel_format = bpmem.zcontrol.pixel_format;
// (1): Disable unused color channels
if (pixel_format == PEControl::RGB8_Z24 ||
pixel_format == PEControl::RGB565_Z16 ||
pixel_format == PEControl::Z24)
{
alphaEnable = false;
}
// (1): Disable unused color channels
if (pixel_format == PEControl::RGB8_Z24 || pixel_format == PEControl::RGB565_Z16 ||
pixel_format == PEControl::Z24)
{
alphaEnable = false;
}
if (colorEnable || alphaEnable || zEnable)
{
u32 color = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB;
u32 z = bpmem.clearZValue;
if (colorEnable || alphaEnable || zEnable)
{
u32 color = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB;
u32 z = bpmem.clearZValue;
// (2) drop additional accuracy
if (pixel_format == PEControl::RGBA6_Z24)
{
color = RGBA8ToRGBA6ToRGBA8(color);
}
else if (pixel_format == PEControl::RGB565_Z16)
{
color = RGBA8ToRGB565ToRGBA8(color);
z = Z24ToZ16ToZ24(z);
}
g_renderer->ClearScreen(rc, colorEnable, alphaEnable, zEnable, color, z);
}
// (2) drop additional accuracy
if (pixel_format == PEControl::RGBA6_Z24)
{
color = RGBA8ToRGBA6ToRGBA8(color);
}
else if (pixel_format == PEControl::RGB565_Z16)
{
color = RGBA8ToRGB565ToRGBA8(color);
z = Z24ToZ16ToZ24(z);
}
g_renderer->ClearScreen(rc, colorEnable, alphaEnable, zEnable, color, z);
}
}
void OnPixelFormatChange()
{
int convtype = -1;
int convtype = -1;
// TODO : Check for Z compression format change
// When using 16bit Z, the game may enable a special compression format which we need to handle
// If we don't, Z values will be completely screwed up, currently only Star Wars:RS2 uses that.
// TODO : Check for Z compression format change
// When using 16bit Z, the game may enable a special compression format which we need to handle
// If we don't, Z values will be completely screwed up, currently only Star Wars:RS2 uses that.
/*
* When changing the EFB format, the pixel data won't get converted to the new format but stays the same.
* Since we are always using an RGBA8 buffer though, this causes issues in some games.
* Thus, we reinterpret the old EFB data with the new format here.
*/
if (!g_ActiveConfig.bEFBEmulateFormatChanges)
return;
/*
* When changing the EFB format, the pixel data won't get converted to the new format but stays
* the same.
* Since we are always using an RGBA8 buffer though, this causes issues in some games.
* Thus, we reinterpret the old EFB data with the new format here.
*/
if (!g_ActiveConfig.bEFBEmulateFormatChanges)
return;
auto old_format = Renderer::GetPrevPixelFormat();
auto new_format = bpmem.zcontrol.pixel_format;
auto old_format = Renderer::GetPrevPixelFormat();
auto new_format = bpmem.zcontrol.pixel_format;
// no need to reinterpret pixel data in these cases
if (new_format == old_format || old_format == PEControl::INVALID_FMT)
goto skip;
// no need to reinterpret pixel data in these cases
if (new_format == old_format || old_format == PEControl::INVALID_FMT)
goto skip;
// Check for pixel format changes
switch (old_format)
{
case PEControl::RGB8_Z24:
case PEControl::Z24:
// Z24 and RGB8_Z24 are treated equal, so just return in this case
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
goto skip;
// Check for pixel format changes
switch (old_format)
{
case PEControl::RGB8_Z24:
case PEControl::Z24:
// Z24 and RGB8_Z24 are treated equal, so just return in this case
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
goto skip;
if (new_format == PEControl::RGBA6_Z24)
convtype = 0;
else if (new_format == PEControl::RGB565_Z16)
convtype = 1;
break;
if (new_format == PEControl::RGBA6_Z24)
convtype = 0;
else if (new_format == PEControl::RGB565_Z16)
convtype = 1;
break;
case PEControl::RGBA6_Z24:
if (new_format == PEControl::RGB8_Z24 ||
new_format == PEControl::Z24)
convtype = 2;
else if (new_format == PEControl::RGB565_Z16)
convtype = 3;
break;
case PEControl::RGBA6_Z24:
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
convtype = 2;
else if (new_format == PEControl::RGB565_Z16)
convtype = 3;
break;
case PEControl::RGB565_Z16:
if (new_format == PEControl::RGB8_Z24 ||
new_format == PEControl::Z24)
convtype = 4;
else if (new_format == PEControl::RGBA6_Z24)
convtype = 5;
break;
case PEControl::RGB565_Z16:
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
convtype = 4;
else if (new_format == PEControl::RGBA6_Z24)
convtype = 5;
break;
default:
break;
}
default:
break;
}
if (convtype == -1)
{
ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d\n", static_cast<int>(old_format), static_cast<int>(new_format));
goto skip;
}
if (convtype == -1)
{
ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d\n", static_cast<int>(old_format),
static_cast<int>(new_format));
goto skip;
}
g_renderer->ReinterpretPixelData(convtype);
g_renderer->ReinterpretPixelData(convtype);
skip:
DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast<int>(new_format), static_cast<int>(bpmem.zcontrol.zformat));
DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast<int>(new_format),
static_cast<int>(bpmem.zcontrol.zformat));
Renderer::StorePixelFormat(new_format);
Renderer::StorePixelFormat(new_format);
}
void SetInterlacingMode(const BPCmd &bp)
void SetInterlacingMode(const BPCmd& bp)
{
// TODO
switch (bp.address)
{
case BPMEM_FIELDMODE:
{
// SDK always sets bpmem.lineptwidth.lineaspect via BPMEM_LINEPTWIDTH
// just before this cmd
const char *action[] = { "don't adjust", "adjust" };
DEBUG_LOG(VIDEO, "BPMEM_FIELDMODE texLOD:%s lineaspect:%s",
action[bpmem.fieldmode.texLOD],
action[bpmem.lineptwidth.lineaspect]);
}
break;
case BPMEM_FIELDMASK:
{
// Determines if fields will be written to EFB (always computed)
const char *action[] = { "skip", "write" };
DEBUG_LOG(VIDEO, "BPMEM_FIELDMASK even:%s odd:%s",
action[bpmem.fieldmask.even], action[bpmem.fieldmask.odd]);
}
break;
default:
ERROR_LOG(VIDEO, "SetInterlacingMode default");
break;
}
// TODO
switch (bp.address)
{
case BPMEM_FIELDMODE:
{
// SDK always sets bpmem.lineptwidth.lineaspect via BPMEM_LINEPTWIDTH
// just before this cmd
const char* action[] = {"don't adjust", "adjust"};
DEBUG_LOG(VIDEO, "BPMEM_FIELDMODE texLOD:%s lineaspect:%s", action[bpmem.fieldmode.texLOD],
action[bpmem.lineptwidth.lineaspect]);
}
break;
case BPMEM_FIELDMASK:
{
// Determines if fields will be written to EFB (always computed)
const char* action[] = {"skip", "write"};
DEBUG_LOG(VIDEO, "BPMEM_FIELDMASK even:%s odd:%s", action[bpmem.fieldmask.even],
action[bpmem.fieldmask.odd]);
}
break;
default:
ERROR_LOG(VIDEO, "SetInterlacingMode default");
break;
}
}
};

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
// ------------------------------------------
// Video backend must define these functions
// ------------------------------------------
@ -15,7 +14,6 @@ struct BPCmd;
namespace BPFunctions
{
void FlushPipeline();
void SetGenerationMode();
void SetScissor();
@ -24,7 +22,7 @@ void SetBlendMode();
void SetDitherMode();
void SetLogicOpMode();
void SetColorMask();
void ClearScreen(const EFBRectangle &rc);
void ClearScreen(const EFBRectangle& rc);
void OnPixelFormatChange();
void SetInterlacingMode(const BPCmd &bp);
void SetInterlacingMode(const BPCmd& bp);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,22 +2,21 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/BoundingBox.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BoundingBox.h"
namespace BoundingBox
{
// External vars
bool active = false;
u16 coords[4] = { 0x80, 0xA0, 0x80, 0xA0 };
u16 coords[4] = {0x80, 0xA0, 0x80, 0xA0};
// Save state
void DoState(PointerWrap& p)
{
p.Do(active);
p.Do(coords);
p.Do(active);
p.Do(coords);
}
} // namespace BoundingBox
} // namespace BoundingBox

View file

@ -11,7 +11,6 @@ class PointerWrap;
// Bounding Box manager
namespace BoundingBox
{
// Determines if bounding box is active
extern bool active;
@ -20,13 +19,13 @@ extern u16 coords[4];
enum
{
LEFT = 0,
RIGHT = 1,
TOP = 2,
BOTTOM = 3
LEFT = 0,
RIGHT = 1,
TOP = 2,
BOTTOM = 3
};
// Save state
void DoState(PointerWrap& p);
}; // end of namespace BoundingBox
}; // end of namespace BoundingBox

View file

@ -2,8 +2,8 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ChunkFile.h"
#include "VideoCommon/CPMemory.h"
#include "Common/ChunkFile.h"
// CP state
CPState g_main_cp_state;
@ -11,23 +11,23 @@ CPState g_preprocess_cp_state;
void DoCPState(PointerWrap& p)
{
// We don't save g_preprocess_cp_state separately because the GPU should be
// synced around state save/load.
p.DoArray(g_main_cp_state.array_bases);
p.DoArray(g_main_cp_state.array_strides);
p.Do(g_main_cp_state.matrix_index_a);
p.Do(g_main_cp_state.matrix_index_b);
p.Do(g_main_cp_state.vtx_desc.Hex);
p.DoArray(g_main_cp_state.vtx_attr);
p.DoMarker("CP Memory");
if (p.mode == PointerWrap::MODE_READ)
{
CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true;
}
// We don't save g_preprocess_cp_state separately because the GPU should be
// synced around state save/load.
p.DoArray(g_main_cp_state.array_bases);
p.DoArray(g_main_cp_state.array_strides);
p.Do(g_main_cp_state.matrix_index_a);
p.Do(g_main_cp_state.matrix_index_b);
p.Do(g_main_cp_state.vtx_desc.Hex);
p.DoArray(g_main_cp_state.vtx_attr);
p.DoMarker("CP Memory");
if (p.mode == PointerWrap::MODE_READ)
{
CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true;
}
}
void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}

View file

@ -10,234 +10,225 @@
// Vertex array numbers
enum
{
ARRAY_POSITION = 0,
ARRAY_NORMAL = 1,
ARRAY_COLOR = 2,
ARRAY_COLOR2 = 3,
ARRAY_TEXCOORD0 = 4,
ARRAY_POSITION = 0,
ARRAY_NORMAL = 1,
ARRAY_COLOR = 2,
ARRAY_COLOR2 = 3,
ARRAY_TEXCOORD0 = 4,
};
// Vertex components
enum
{
NOT_PRESENT = 0,
DIRECT = 1,
INDEX8 = 2,
INDEX16 = 3,
NOT_PRESENT = 0,
DIRECT = 1,
INDEX8 = 2,
INDEX16 = 3,
MASK_INDEXED = 2,
MASK_INDEXED = 2,
};
enum
{
FORMAT_UBYTE = 0, // 2 Cmp
FORMAT_BYTE = 1, // 3 Cmp
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
FORMAT_UBYTE = 0, // 2 Cmp
FORMAT_BYTE = 1, // 3 Cmp
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
};
enum
{
FORMAT_16B_565 = 0, // NA
FORMAT_24B_888 = 1,
FORMAT_32B_888x = 2,
FORMAT_16B_4444 = 3,
FORMAT_24B_6666 = 4,
FORMAT_32B_8888 = 5,
FORMAT_16B_565 = 0, // NA
FORMAT_24B_888 = 1,
FORMAT_32B_888x = 2,
FORMAT_16B_4444 = 3,
FORMAT_24B_6666 = 4,
FORMAT_32B_8888 = 5,
};
#pragma pack(4)
union TVtxDesc
{
u64 Hex;
struct
{
// 0: not present
// 1: present
u64 PosMatIdx : 1;
u64 Tex0MatIdx : 1;
u64 Tex1MatIdx : 1;
u64 Tex2MatIdx : 1;
u64 Tex3MatIdx : 1;
u64 Tex4MatIdx : 1;
u64 Tex5MatIdx : 1;
u64 Tex6MatIdx : 1;
u64 Tex7MatIdx : 1;
union TVtxDesc {
u64 Hex;
struct
{
// 0: not present
// 1: present
u64 PosMatIdx : 1;
u64 Tex0MatIdx : 1;
u64 Tex1MatIdx : 1;
u64 Tex2MatIdx : 1;
u64 Tex3MatIdx : 1;
u64 Tex4MatIdx : 1;
u64 Tex5MatIdx : 1;
u64 Tex6MatIdx : 1;
u64 Tex7MatIdx : 1;
// 00: not present
// 01: direct
// 10: 8 bit index
// 11: 16 bit index
u64 Position : 2;
u64 Normal : 2;
u64 Color0 : 2;
u64 Color1 : 2;
u64 Tex0Coord : 2;
u64 Tex1Coord : 2;
u64 Tex2Coord : 2;
u64 Tex3Coord : 2;
u64 Tex4Coord : 2;
u64 Tex5Coord : 2;
u64 Tex6Coord : 2;
u64 Tex7Coord : 2;
u64 :31;
};
// 00: not present
// 01: direct
// 10: 8 bit index
// 11: 16 bit index
u64 Position : 2;
u64 Normal : 2;
u64 Color0 : 2;
u64 Color1 : 2;
u64 Tex0Coord : 2;
u64 Tex1Coord : 2;
u64 Tex2Coord : 2;
u64 Tex3Coord : 2;
u64 Tex4Coord : 2;
u64 Tex5Coord : 2;
u64 Tex6Coord : 2;
u64 Tex7Coord : 2;
u64 : 31;
};
struct
{
u32 Hex0, Hex1;
};
struct
{
u32 Hex0, Hex1;
};
// Easily index into the Position..Tex7Coord fields.
u32 GetVertexArrayStatus(int idx)
{
return (Hex >> (9 + idx * 2)) & 0x3;
}
// Easily index into the Position..Tex7Coord fields.
u32 GetVertexArrayStatus(int idx) { return (Hex >> (9 + idx * 2)) & 0x3; }
};
union UVAT_group0
{
u32 Hex;
struct
{
// 0:8
u32 PosElements : 1;
u32 PosFormat : 3;
u32 PosFrac : 5;
// 9:12
u32 NormalElements : 1;
u32 NormalFormat : 3;
// 13:16
u32 Color0Elements : 1;
u32 Color0Comp : 3;
// 17:20
u32 Color1Elements : 1;
u32 Color1Comp : 3;
// 21:29
u32 Tex0CoordElements : 1;
u32 Tex0CoordFormat : 3;
u32 Tex0Frac : 5;
// 30:31
u32 ByteDequant : 1;
u32 NormalIndex3 : 1;
};
union UVAT_group0 {
u32 Hex;
struct
{
// 0:8
u32 PosElements : 1;
u32 PosFormat : 3;
u32 PosFrac : 5;
// 9:12
u32 NormalElements : 1;
u32 NormalFormat : 3;
// 13:16
u32 Color0Elements : 1;
u32 Color0Comp : 3;
// 17:20
u32 Color1Elements : 1;
u32 Color1Comp : 3;
// 21:29
u32 Tex0CoordElements : 1;
u32 Tex0CoordFormat : 3;
u32 Tex0Frac : 5;
// 30:31
u32 ByteDequant : 1;
u32 NormalIndex3 : 1;
};
};
union UVAT_group1
{
u32 Hex;
struct
{
// 0:8
u32 Tex1CoordElements : 1;
u32 Tex1CoordFormat : 3;
u32 Tex1Frac : 5;
// 9:17
u32 Tex2CoordElements : 1;
u32 Tex2CoordFormat : 3;
u32 Tex2Frac : 5;
// 18:26
u32 Tex3CoordElements : 1;
u32 Tex3CoordFormat : 3;
u32 Tex3Frac : 5;
// 27:30
u32 Tex4CoordElements : 1;
u32 Tex4CoordFormat : 3;
//
u32 : 1;
};
union UVAT_group1 {
u32 Hex;
struct
{
// 0:8
u32 Tex1CoordElements : 1;
u32 Tex1CoordFormat : 3;
u32 Tex1Frac : 5;
// 9:17
u32 Tex2CoordElements : 1;
u32 Tex2CoordFormat : 3;
u32 Tex2Frac : 5;
// 18:26
u32 Tex3CoordElements : 1;
u32 Tex3CoordFormat : 3;
u32 Tex3Frac : 5;
// 27:30
u32 Tex4CoordElements : 1;
u32 Tex4CoordFormat : 3;
//
u32 : 1;
};
};
union UVAT_group2
{
u32 Hex;
struct
{
// 0:4
u32 Tex4Frac : 5;
// 5:13
u32 Tex5CoordElements : 1;
u32 Tex5CoordFormat : 3;
u32 Tex5Frac : 5;
// 14:22
u32 Tex6CoordElements : 1;
u32 Tex6CoordFormat : 3;
u32 Tex6Frac : 5;
// 23:31
u32 Tex7CoordElements : 1;
u32 Tex7CoordFormat : 3;
u32 Tex7Frac : 5;
};
union UVAT_group2 {
u32 Hex;
struct
{
// 0:4
u32 Tex4Frac : 5;
// 5:13
u32 Tex5CoordElements : 1;
u32 Tex5CoordFormat : 3;
u32 Tex5Frac : 5;
// 14:22
u32 Tex6CoordElements : 1;
u32 Tex6CoordFormat : 3;
u32 Tex6Frac : 5;
// 23:31
u32 Tex7CoordElements : 1;
u32 Tex7CoordFormat : 3;
u32 Tex7Frac : 5;
};
};
struct ColorAttr
{
u8 Elements;
u8 Comp;
u8 Elements;
u8 Comp;
};
struct TexAttr
{
u8 Elements;
u8 Format;
u8 Frac;
u8 Elements;
u8 Format;
u8 Frac;
};
struct TVtxAttr
{
u8 PosElements;
u8 PosFormat;
u8 PosFrac;
u8 NormalElements;
u8 NormalFormat;
ColorAttr color[2];
TexAttr texCoord[8];
bool ByteDequant;
u8 NormalIndex3;
u8 PosElements;
u8 PosFormat;
u8 PosFrac;
u8 NormalElements;
u8 NormalFormat;
ColorAttr color[2];
TexAttr texCoord[8];
bool ByteDequant;
u8 NormalIndex3;
};
// Matrix indices
union TMatrixIndexA
{
struct
{
u32 PosNormalMtxIdx : 6;
u32 Tex0MtxIdx : 6;
u32 Tex1MtxIdx : 6;
u32 Tex2MtxIdx : 6;
u32 Tex3MtxIdx : 6;
};
struct
{
u32 Hex : 30;
u32 unused : 2;
};
union TMatrixIndexA {
struct
{
u32 PosNormalMtxIdx : 6;
u32 Tex0MtxIdx : 6;
u32 Tex1MtxIdx : 6;
u32 Tex2MtxIdx : 6;
u32 Tex3MtxIdx : 6;
};
struct
{
u32 Hex : 30;
u32 unused : 2;
};
};
union TMatrixIndexB
{
struct
{
u32 Tex4MtxIdx : 6;
u32 Tex5MtxIdx : 6;
u32 Tex6MtxIdx : 6;
u32 Tex7MtxIdx : 6;
};
struct
{
u32 Hex : 24;
u32 unused : 8;
};
union TMatrixIndexB {
struct
{
u32 Tex4MtxIdx : 6;
u32 Tex5MtxIdx : 6;
u32 Tex6MtxIdx : 6;
u32 Tex7MtxIdx : 6;
};
struct
{
u32 Hex : 24;
u32 unused : 8;
};
};
#pragma pack()
struct VAT
{
UVAT_group0 g0;
UVAT_group1 g1;
UVAT_group2 g2;
UVAT_group0 g0;
UVAT_group1 g1;
UVAT_group2 g2;
};
class VertexLoaderBase;
@ -245,19 +236,19 @@ class VertexLoaderBase;
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[16];
u32 array_strides[16];
TMatrixIndexA matrix_index_a;
TMatrixIndexB matrix_index_b;
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[8];
u32 array_bases[16];
u32 array_strides[16];
TMatrixIndexA matrix_index_a;
TMatrixIndexB matrix_index_b;
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[8];
// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty;
bool bases_dirty;
VertexLoaderBase* vertex_loaders[8];
int last_id;
// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty;
bool bases_dirty;
VertexLoaderBase* vertex_loaders[8];
int last_id;
};
class PointerWrap;
@ -269,7 +260,7 @@ extern CPState g_preprocess_cp_state;
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
// Fills memory with data from CP regs
void FillCPMemoryArray(u32 *memory);
void FillCPMemoryArray(u32* memory);
void DoCPState(PointerWrap& p);

View file

@ -20,7 +20,6 @@
namespace CommandProcessor
{
static int et_UpdateInterrupts;
// TODO(ector): Warn on bbox read/write
@ -28,7 +27,7 @@ static int et_UpdateInterrupts;
// STATE_TO_SAVE
SCPFifoStruct fifo;
static UCPStatusReg m_CPStatusReg;
static UCPCtrlReg m_CPCtrlReg;
static UCPCtrlReg m_CPCtrlReg;
static UCPClearReg m_CPClearReg;
static u16 m_bboxleft;
@ -44,493 +43,469 @@ static std::atomic<bool> s_interrupt_finish_waiting;
static bool IsOnThread()
{
return SConfig::GetInstance().bCPUThread;
return SConfig::GetInstance().bCPUThread;
}
static void UpdateInterrupts_Wrapper(u64 userdata, s64 cyclesLate)
{
UpdateInterrupts(userdata);
UpdateInterrupts(userdata);
}
void DoState(PointerWrap &p)
void DoState(PointerWrap& p)
{
p.DoPOD(m_CPStatusReg);
p.DoPOD(m_CPCtrlReg);
p.DoPOD(m_CPClearReg);
p.Do(m_bboxleft);
p.Do(m_bboxtop);
p.Do(m_bboxright);
p.Do(m_bboxbottom);
p.Do(m_tokenReg);
p.Do(fifo);
p.DoPOD(m_CPStatusReg);
p.DoPOD(m_CPCtrlReg);
p.DoPOD(m_CPClearReg);
p.Do(m_bboxleft);
p.Do(m_bboxtop);
p.Do(m_bboxright);
p.Do(m_bboxbottom);
p.Do(m_tokenReg);
p.Do(fifo);
p.Do(s_interrupt_set);
p.Do(s_interrupt_waiting);
p.Do(s_interrupt_token_waiting);
p.Do(s_interrupt_finish_waiting);
p.Do(s_interrupt_set);
p.Do(s_interrupt_waiting);
p.Do(s_interrupt_token_waiting);
p.Do(s_interrupt_finish_waiting);
}
static inline void WriteLow(volatile u32& _reg, u16 lowbits)
{
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
}
static inline void WriteHigh(volatile u32& _reg, u16 highbits)
{
Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16));
Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16));
}
static inline u16 ReadLow(u32 _reg)
{
return (u16)(_reg & 0xFFFF);
return (u16)(_reg & 0xFFFF);
}
static inline u16 ReadHigh(u32 _reg)
{
return (u16)(_reg >> 16);
return (u16)(_reg >> 16);
}
void Init()
{
m_CPStatusReg.Hex = 0;
m_CPStatusReg.CommandIdle = 1;
m_CPStatusReg.ReadIdle = 1;
m_CPStatusReg.Hex = 0;
m_CPStatusReg.CommandIdle = 1;
m_CPStatusReg.ReadIdle = 1;
m_CPCtrlReg.Hex = 0;
m_CPCtrlReg.Hex = 0;
m_CPClearReg.Hex = 0;
m_CPClearReg.Hex = 0;
m_bboxleft = 0;
m_bboxtop = 0;
m_bboxright = 640;
m_bboxbottom = 480;
m_bboxleft = 0;
m_bboxtop = 0;
m_bboxright = 640;
m_bboxbottom = 480;
m_tokenReg = 0;
m_tokenReg = 0;
memset(&fifo,0,sizeof(fifo));
fifo.bFF_Breakpoint = 0;
fifo.bFF_HiWatermark = 0;
fifo.bFF_HiWatermarkInt = 0;
fifo.bFF_LoWatermark = 0;
fifo.bFF_LoWatermarkInt = 0;
memset(&fifo, 0, sizeof(fifo));
fifo.bFF_Breakpoint = 0;
fifo.bFF_HiWatermark = 0;
fifo.bFF_HiWatermarkInt = 0;
fifo.bFF_LoWatermark = 0;
fifo.bFF_LoWatermarkInt = 0;
s_interrupt_set.store(false);
s_interrupt_waiting.store(false);
s_interrupt_finish_waiting.store(false);
s_interrupt_token_waiting.store(false);
s_interrupt_set.store(false);
s_interrupt_waiting.store(false);
s_interrupt_finish_waiting.store(false);
s_interrupt_token_waiting.store(false);
et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper);
et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper);
}
void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{
struct {
u32 addr;
u16* ptr;
bool readonly;
bool writes_align_to_32_bytes;
} directly_mapped_vars[] = {
{ FIFO_TOKEN_REGISTER, &m_tokenReg },
struct
{
u32 addr;
u16* ptr;
bool readonly;
bool writes_align_to_32_bytes;
} directly_mapped_vars[] = {
{FIFO_TOKEN_REGISTER, &m_tokenReg},
// Bounding box registers are read only.
{ FIFO_BOUNDING_BOX_LEFT, &m_bboxleft, true },
{ FIFO_BOUNDING_BOX_RIGHT, &m_bboxright, true },
{ FIFO_BOUNDING_BOX_TOP, &m_bboxtop, true },
{ FIFO_BOUNDING_BOX_BOTTOM, &m_bboxbottom, true },
// Bounding box registers are read only.
{FIFO_BOUNDING_BOX_LEFT, &m_bboxleft, true},
{FIFO_BOUNDING_BOX_RIGHT, &m_bboxright, true},
{FIFO_BOUNDING_BOX_TOP, &m_bboxtop, true},
{FIFO_BOUNDING_BOX_BOTTOM, &m_bboxbottom, true},
// Some FIFO addresses need to be aligned on 32 bytes on write - only
// the high part can be written directly without a mask.
{ FIFO_BASE_LO, MMIO::Utils::LowPart(&fifo.CPBase), false, true },
{ FIFO_BASE_HI, MMIO::Utils::HighPart(&fifo.CPBase) },
{ FIFO_END_LO, MMIO::Utils::LowPart(&fifo.CPEnd), false, true },
{ FIFO_END_HI, MMIO::Utils::HighPart(&fifo.CPEnd) },
{ FIFO_HI_WATERMARK_LO, MMIO::Utils::LowPart(&fifo.CPHiWatermark) },
{ FIFO_HI_WATERMARK_HI, MMIO::Utils::HighPart(&fifo.CPHiWatermark) },
{ FIFO_LO_WATERMARK_LO, MMIO::Utils::LowPart(&fifo.CPLoWatermark) },
{ FIFO_LO_WATERMARK_HI, MMIO::Utils::HighPart(&fifo.CPLoWatermark) },
// FIFO_RW_DISTANCE has some complex read code different for
// single/dual core.
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
// FIFO_READ_POINTER has different code for single/dual core.
};
// Some FIFO addresses need to be aligned on 32 bytes on write - only
// the high part can be written directly without a mask.
{FIFO_BASE_LO, MMIO::Utils::LowPart(&fifo.CPBase), false, true},
{FIFO_BASE_HI, MMIO::Utils::HighPart(&fifo.CPBase)},
{FIFO_END_LO, MMIO::Utils::LowPart(&fifo.CPEnd), false, true},
{FIFO_END_HI, MMIO::Utils::HighPart(&fifo.CPEnd)},
{FIFO_HI_WATERMARK_LO, MMIO::Utils::LowPart(&fifo.CPHiWatermark)},
{FIFO_HI_WATERMARK_HI, MMIO::Utils::HighPart(&fifo.CPHiWatermark)},
{FIFO_LO_WATERMARK_LO, MMIO::Utils::LowPart(&fifo.CPLoWatermark)},
{FIFO_LO_WATERMARK_HI, MMIO::Utils::HighPart(&fifo.CPLoWatermark)},
// FIFO_RW_DISTANCE has some complex read code different for
// single/dual core.
{FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true},
{FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer)},
// FIFO_READ_POINTER has different code for single/dual core.
};
for (auto& mapped_var : directly_mapped_vars)
{
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
mmio->Register(base | mapped_var.addr,
MMIO::DirectRead<u16>(mapped_var.ptr),
mapped_var.readonly
? MMIO::InvalidWrite<u16>()
: MMIO::DirectWrite<u16>(mapped_var.ptr, wmask)
);
}
for (auto& mapped_var : directly_mapped_vars)
{
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
mmio->Register(base | mapped_var.addr, MMIO::DirectRead<u16>(mapped_var.ptr),
mapped_var.readonly ? MMIO::InvalidWrite<u16>() :
MMIO::DirectWrite<u16>(mapped_var.ptr, wmask));
}
mmio->Register(base | FIFO_BP_LO,
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteLow(fifo.CPBreakpoint, val & 0xffe0);
})
);
mmio->Register(base | FIFO_BP_HI,
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPBreakpoint, val);
})
);
mmio->Register(
base | FIFO_BP_LO, MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) { WriteLow(fifo.CPBreakpoint, val & 0xffe0); }));
mmio->Register(base | FIFO_BP_HI,
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) { WriteHigh(fifo.CPBreakpoint, val); }));
// Timing and metrics MMIOs are stubbed with fixed values.
struct {
u32 addr;
u16 value;
} metrics_mmios[] = {
{ XF_RASBUSY_L, 0 },
{ XF_RASBUSY_H, 0 },
{ XF_CLKS_L, 0 },
{ XF_CLKS_H, 0 },
{ XF_WAIT_IN_L, 0 },
{ XF_WAIT_IN_H, 0 },
{ XF_WAIT_OUT_L, 0 },
{ XF_WAIT_OUT_H, 0 },
{ VCACHE_METRIC_CHECK_L, 0 },
{ VCACHE_METRIC_CHECK_H, 0 },
{ VCACHE_METRIC_MISS_L, 0 },
{ VCACHE_METRIC_MISS_H, 0 },
{ VCACHE_METRIC_STALL_L, 0 },
{ VCACHE_METRIC_STALL_H, 0 },
{ CLKS_PER_VTX_OUT, 4 },
};
for (auto& metrics_mmio : metrics_mmios)
{
mmio->Register(base | metrics_mmio.addr,
MMIO::Constant<u16>(metrics_mmio.value),
MMIO::InvalidWrite<u16>()
);
}
// Timing and metrics MMIOs are stubbed with fixed values.
struct
{
u32 addr;
u16 value;
} metrics_mmios[] = {
{XF_RASBUSY_L, 0},
{XF_RASBUSY_H, 0},
{XF_CLKS_L, 0},
{XF_CLKS_H, 0},
{XF_WAIT_IN_L, 0},
{XF_WAIT_IN_H, 0},
{XF_WAIT_OUT_L, 0},
{XF_WAIT_OUT_H, 0},
{VCACHE_METRIC_CHECK_L, 0},
{VCACHE_METRIC_CHECK_H, 0},
{VCACHE_METRIC_MISS_L, 0},
{VCACHE_METRIC_MISS_H, 0},
{VCACHE_METRIC_STALL_L, 0},
{VCACHE_METRIC_STALL_H, 0},
{CLKS_PER_VTX_OUT, 4},
};
for (auto& metrics_mmio : metrics_mmios)
{
mmio->Register(base | metrics_mmio.addr, MMIO::Constant<u16>(metrics_mmio.value),
MMIO::InvalidWrite<u16>());
}
mmio->Register(base | STATUS_REGISTER,
MMIO::ComplexRead<u16>([](u32) {
SetCpStatusRegister();
return m_CPStatusReg.Hex;
}),
MMIO::InvalidWrite<u16>()
);
mmio->Register(base | STATUS_REGISTER, MMIO::ComplexRead<u16>([](u32) {
SetCpStatusRegister();
return m_CPStatusReg.Hex;
}),
MMIO::InvalidWrite<u16>());
mmio->Register(base | CTRL_REGISTER,
MMIO::DirectRead<u16>(&m_CPCtrlReg.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UCPCtrlReg tmp(val);
m_CPCtrlReg.Hex = tmp.Hex;
SetCpControlRegister();
Fifo::RunGpu();
})
);
mmio->Register(base | CTRL_REGISTER, MMIO::DirectRead<u16>(&m_CPCtrlReg.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UCPCtrlReg tmp(val);
m_CPCtrlReg.Hex = tmp.Hex;
SetCpControlRegister();
Fifo::RunGpu();
}));
mmio->Register(base | CLEAR_REGISTER,
MMIO::DirectRead<u16>(&m_CPClearReg.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UCPClearReg tmp(val);
m_CPClearReg.Hex = tmp.Hex;
SetCpClearRegister();
Fifo::RunGpu();
})
);
mmio->Register(base | CLEAR_REGISTER, MMIO::DirectRead<u16>(&m_CPClearReg.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UCPClearReg tmp(val);
m_CPClearReg.Hex = tmp.Hex;
SetCpClearRegister();
Fifo::RunGpu();
}));
mmio->Register(base | PERF_SELECT,
MMIO::InvalidRead<u16>(),
MMIO::Nop<u16>()
);
mmio->Register(base | PERF_SELECT, MMIO::InvalidRead<u16>(), MMIO::Nop<u16>());
// Some MMIOs have different handlers for single core vs. dual core mode.
mmio->Register(base | FIFO_RW_DISTANCE_LO,
IsOnThread()
? MMIO::ComplexRead<u16>([](u32) {
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - fifo.CPBase + 32);
})
: MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)),
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), 0xFFE0)
);
mmio->Register(base | FIFO_RW_DISTANCE_HI,
IsOnThread()
? MMIO::ComplexRead<u16>([](u32) {
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - fifo.CPBase + 32);
})
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadWriteDistance, val);
Fifo::SyncGPU(Fifo::SYNC_GPU_OTHER);
if (fifo.CPReadWriteDistance == 0)
{
GPFifo::ResetGatherPipe();
Fifo::ResetVideoBuffer();
}
else
{
Fifo::ResetVideoBuffer();
}
Fifo::RunGpu();
})
);
mmio->Register(base | FIFO_READ_POINTER_LO,
IsOnThread()
? MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.SafeCPReadPointer))
: MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadPointer)),
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadPointer), 0xFFE0)
);
mmio->Register(base | FIFO_READ_POINTER_HI,
IsOnThread()
? MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.SafeCPReadPointer))
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadPointer)),
IsOnThread()
? MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadPointer, val);
fifo.SafeCPReadPointer = fifo.CPReadPointer;
})
: MMIO::DirectWrite<u16>(MMIO::Utils::HighPart(&fifo.CPReadPointer))
);
// Some MMIOs have different handlers for single core vs. dual core mode.
mmio->Register(base | FIFO_RW_DISTANCE_LO,
IsOnThread() ?
MMIO::ComplexRead<u16>([](u32) {
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer -
fifo.CPBase + 32);
}) :
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)),
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), 0xFFE0));
mmio->Register(base | FIFO_RW_DISTANCE_HI,
IsOnThread() ?
MMIO::ComplexRead<u16>([](u32) {
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer -
fifo.CPBase + 32);
}) :
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadWriteDistance, val);
Fifo::SyncGPU(Fifo::SYNC_GPU_OTHER);
if (fifo.CPReadWriteDistance == 0)
{
GPFifo::ResetGatherPipe();
Fifo::ResetVideoBuffer();
}
else
{
Fifo::ResetVideoBuffer();
}
Fifo::RunGpu();
}));
mmio->Register(base | FIFO_READ_POINTER_LO,
IsOnThread() ?
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.SafeCPReadPointer)) :
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadPointer)),
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadPointer), 0xFFE0));
mmio->Register(base | FIFO_READ_POINTER_HI,
IsOnThread() ?
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.SafeCPReadPointer)) :
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadPointer)),
IsOnThread() ? MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadPointer, val);
fifo.SafeCPReadPointer = fifo.CPReadPointer;
}) :
MMIO::DirectWrite<u16>(MMIO::Utils::HighPart(&fifo.CPReadPointer)));
}
void GatherPipeBursted()
{
if (IsOnThread())
SetCPStatusFromCPU();
if (IsOnThread())
SetCPStatusFromCPU();
ProcessFifoEvents();
// if we aren't linked, we don't care about gather pipe data
if (!m_CPCtrlReg.GPLinkEnable)
{
if (IsOnThread() && !Fifo::UseDeterministicGPUThread())
{
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode.
if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) &&
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) &&
fifo.CPReadWriteDistance > 0)
{
Fifo::FlushGpu();
}
}
Fifo::RunGpu();
return;
}
ProcessFifoEvents();
// if we aren't linked, we don't care about gather pipe data
if (!m_CPCtrlReg.GPLinkEnable)
{
if (IsOnThread() && !Fifo::UseDeterministicGPUThread())
{
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode.
if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) &&
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0)
{
Fifo::FlushGpu();
}
}
Fifo::RunGpu();
return;
}
// update the fifo pointer
if (fifo.CPWritePointer == fifo.CPEnd)
fifo.CPWritePointer = fifo.CPBase;
else
fifo.CPWritePointer += GATHER_PIPE_SIZE;
// update the fifo pointer
if (fifo.CPWritePointer == fifo.CPEnd)
fifo.CPWritePointer = fifo.CPBase;
else
fifo.CPWritePointer += GATHER_PIPE_SIZE;
if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable)
{
ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer;
ProcessorInterface::Fifo_CPUBase = fifo.CPBase;
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd;
}
if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable)
{
ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer;
ProcessorInterface::Fifo_CPUBase = fifo.CPBase;
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd;
}
// If the game is running close to overflowing, make the exception checking more frequent.
if (fifo.bFF_HiWatermark)
CoreTiming::ForceExceptionCheck(0);
// If the game is running close to overflowing, make the exception checking more frequent.
if (fifo.bFF_HiWatermark)
CoreTiming::ForceExceptionCheck(0);
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
Fifo::RunGpu();
Fifo::RunGpu();
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
// check if we are in sync
_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd, "FIFOs linked but out of sync");
// check if we are in sync
_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer,
"FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase,
"FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd,
"FIFOs linked but out of sync");
}
void UpdateInterrupts(u64 userdata)
{
if (userdata)
{
s_interrupt_set.store(true);
INFO_LOG(COMMANDPROCESSOR,"Interrupt set");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, true);
}
else
{
s_interrupt_set.store(false);
INFO_LOG(COMMANDPROCESSOR,"Interrupt cleared");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, false);
}
CoreTiming::ForceExceptionCheck(0);
s_interrupt_waiting.store(false);
Fifo::RunGpu();
if (userdata)
{
s_interrupt_set.store(true);
INFO_LOG(COMMANDPROCESSOR, "Interrupt set");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, true);
}
else
{
s_interrupt_set.store(false);
INFO_LOG(COMMANDPROCESSOR, "Interrupt cleared");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, false);
}
CoreTiming::ForceExceptionCheck(0);
s_interrupt_waiting.store(false);
Fifo::RunGpu();
}
void UpdateInterruptsFromVideoBackend(u64 userdata)
{
if (!Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
if (!Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
}
bool IsInterruptWaiting()
{
return s_interrupt_waiting.load();
return s_interrupt_waiting.load();
}
void SetInterruptTokenWaiting(bool waiting)
{
s_interrupt_token_waiting.store(waiting);
s_interrupt_token_waiting.store(waiting);
}
void SetInterruptFinishWaiting(bool waiting)
{
s_interrupt_finish_waiting.store(waiting);
s_interrupt_finish_waiting.store(waiting);
}
void SetCPStatusFromGPU()
{
// breakpoint
if (fifo.bFF_BPEnable)
{
if (fifo.CPBreakpoint == fifo.CPReadPointer)
{
if (!fifo.bFF_Breakpoint)
{
INFO_LOG(COMMANDPROCESSOR, "Hit breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = true;
}
}
else
{
if (fifo.bFF_Breakpoint)
INFO_LOG(COMMANDPROCESSOR, "Cleared breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = false;
}
}
else
{
if (fifo.bFF_Breakpoint)
INFO_LOG(COMMANDPROCESSOR, "Cleared breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = false;
}
// breakpoint
if (fifo.bFF_BPEnable)
{
if (fifo.CPBreakpoint == fifo.CPReadPointer)
{
if (!fifo.bFF_Breakpoint)
{
INFO_LOG(COMMANDPROCESSOR, "Hit breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = true;
}
}
else
{
if (fifo.bFF_Breakpoint)
INFO_LOG(COMMANDPROCESSOR, "Cleared breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = false;
}
}
else
{
if (fifo.bFF_Breakpoint)
INFO_LOG(COMMANDPROCESSOR, "Cleared breakpoint at %i", fifo.CPReadPointer);
fifo.bFF_Breakpoint = false;
}
// overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
// overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
if (interrupt != s_interrupt_set.load() && !s_interrupt_waiting.load())
{
u64 userdata = interrupt ? 1 : 0;
if (IsOnThread())
{
if (!interrupt || bpInt || undfInt || ovfInt)
{
// Schedule the interrupt asynchronously
s_interrupt_waiting.store(true);
CommandProcessor::UpdateInterruptsFromVideoBackend(userdata);
}
}
else
{
CommandProcessor::UpdateInterrupts(userdata);
}
}
if (interrupt != s_interrupt_set.load() && !s_interrupt_waiting.load())
{
u64 userdata = interrupt ? 1 : 0;
if (IsOnThread())
{
if (!interrupt || bpInt || undfInt || ovfInt)
{
// Schedule the interrupt asynchronously
s_interrupt_waiting.store(true);
CommandProcessor::UpdateInterruptsFromVideoBackend(userdata);
}
}
else
{
CommandProcessor::UpdateInterrupts(userdata);
}
}
}
void SetCPStatusFromCPU()
{
// overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
// overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
if (interrupt != s_interrupt_set.load() && !s_interrupt_waiting.load())
{
u64 userdata = interrupt ? 1 : 0;
if (IsOnThread())
{
if (!interrupt || bpInt || undfInt || ovfInt)
{
s_interrupt_set.store(interrupt);
INFO_LOG(COMMANDPROCESSOR,"Interrupt set");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, interrupt);
}
}
else
{
CommandProcessor::UpdateInterrupts(userdata);
}
}
if (interrupt != s_interrupt_set.load() && !s_interrupt_waiting.load())
{
u64 userdata = interrupt ? 1 : 0;
if (IsOnThread())
{
if (!interrupt || bpInt || undfInt || ovfInt)
{
s_interrupt_set.store(interrupt);
INFO_LOG(COMMANDPROCESSOR, "Interrupt set");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, interrupt);
}
}
else
{
CommandProcessor::UpdateInterrupts(userdata);
}
}
}
void ProcessFifoEvents()
{
if (IsOnThread() && (s_interrupt_waiting.load() || s_interrupt_finish_waiting.load() || s_interrupt_token_waiting.load()))
CoreTiming::ProcessFifoWaitEvents();
if (IsOnThread() && (s_interrupt_waiting.load() || s_interrupt_finish_waiting.load() ||
s_interrupt_token_waiting.load()))
CoreTiming::ProcessFifoWaitEvents();
}
void Shutdown()
{
}
void SetCpStatusRegister()
{
// Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer);
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
// Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer);
m_CPStatusReg.CommandIdle =
!fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
INFO_LOG(COMMANDPROCESSOR,"\t Read from STATUS_REGISTER : %04x", m_CPStatusReg.Hex);
DEBUG_LOG(COMMANDPROCESSOR, "(r) status: iBP %s | fReadIdle %s | fCmdIdle %s | iOvF %s | iUndF %s"
, m_CPStatusReg.Breakpoint ? "ON" : "OFF"
, m_CPStatusReg.ReadIdle ? "ON" : "OFF"
, m_CPStatusReg.CommandIdle ? "ON" : "OFF"
, m_CPStatusReg.OverflowHiWatermark ? "ON" : "OFF"
, m_CPStatusReg.UnderflowLoWatermark ? "ON" : "OFF"
);
INFO_LOG(COMMANDPROCESSOR, "\t Read from STATUS_REGISTER : %04x", m_CPStatusReg.Hex);
DEBUG_LOG(
COMMANDPROCESSOR, "(r) status: iBP %s | fReadIdle %s | fCmdIdle %s | iOvF %s | iUndF %s",
m_CPStatusReg.Breakpoint ? "ON" : "OFF", m_CPStatusReg.ReadIdle ? "ON" : "OFF",
m_CPStatusReg.CommandIdle ? "ON" : "OFF", m_CPStatusReg.OverflowHiWatermark ? "ON" : "OFF",
m_CPStatusReg.UnderflowLoWatermark ? "ON" : "OFF");
}
void SetCpControlRegister()
{
fifo.bFF_BPInt = m_CPCtrlReg.BPInt;
fifo.bFF_BPEnable = m_CPCtrlReg.BPEnable;
fifo.bFF_HiWatermarkInt = m_CPCtrlReg.FifoOverflowIntEnable;
fifo.bFF_LoWatermarkInt = m_CPCtrlReg.FifoUnderflowIntEnable;
fifo.bFF_GPLinkEnable = m_CPCtrlReg.GPLinkEnable;
fifo.bFF_BPInt = m_CPCtrlReg.BPInt;
fifo.bFF_BPEnable = m_CPCtrlReg.BPEnable;
fifo.bFF_HiWatermarkInt = m_CPCtrlReg.FifoOverflowIntEnable;
fifo.bFF_LoWatermarkInt = m_CPCtrlReg.FifoUnderflowIntEnable;
fifo.bFF_GPLinkEnable = m_CPCtrlReg.GPLinkEnable;
if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
Fifo::FlushGpu();
}
else
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
}
DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | BP %s | Int %s | OvF %s | UndF %s | LINK %s"
, fifo.bFF_GPReadEnable ? "ON" : "OFF"
, fifo.bFF_BPEnable ? "ON" : "OFF"
, fifo.bFF_BPInt ? "ON" : "OFF"
, m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF"
, m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF"
, m_CPCtrlReg.GPLinkEnable ? "ON" : "OFF"
);
if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
Fifo::FlushGpu();
}
else
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
}
DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | BP %s | Int %s | OvF %s | UndF %s | LINK %s",
fifo.bFF_GPReadEnable ? "ON" : "OFF", fifo.bFF_BPEnable ? "ON" : "OFF",
fifo.bFF_BPInt ? "ON" : "OFF", m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF",
m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF",
m_CPCtrlReg.GPLinkEnable ? "ON" : "OFF");
}
// NOTE: We intentionally don't emulate this function at the moment.
@ -539,4 +514,4 @@ void SetCpClearRegister()
{
}
} // end of namespace CommandProcessor
} // end of namespace CommandProcessor

View file

@ -8,120 +8,119 @@
#include "VideoCommon/VideoBackendBase.h"
class PointerWrap;
namespace MMIO { class Mapping; }
namespace MMIO
{
class Mapping;
}
namespace CommandProcessor
{
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern SCPFifoStruct fifo; // This one is shared between gfx thread and emulator thread.
// internal hardware addresses
enum
{
STATUS_REGISTER = 0x00,
CTRL_REGISTER = 0x02,
CLEAR_REGISTER = 0x04,
PERF_SELECT = 0x06,
FIFO_TOKEN_REGISTER = 0x0E,
FIFO_BOUNDING_BOX_LEFT = 0x10,
FIFO_BOUNDING_BOX_RIGHT = 0x12,
FIFO_BOUNDING_BOX_TOP = 0x14,
FIFO_BOUNDING_BOX_BOTTOM = 0x16,
FIFO_BASE_LO = 0x20,
FIFO_BASE_HI = 0x22,
FIFO_END_LO = 0x24,
FIFO_END_HI = 0x26,
FIFO_HI_WATERMARK_LO = 0x28,
FIFO_HI_WATERMARK_HI = 0x2a,
FIFO_LO_WATERMARK_LO = 0x2c,
FIFO_LO_WATERMARK_HI = 0x2e,
FIFO_RW_DISTANCE_LO = 0x30,
FIFO_RW_DISTANCE_HI = 0x32,
FIFO_WRITE_POINTER_LO = 0x34,
FIFO_WRITE_POINTER_HI = 0x36,
FIFO_READ_POINTER_LO = 0x38,
FIFO_READ_POINTER_HI = 0x3A,
FIFO_BP_LO = 0x3C,
FIFO_BP_HI = 0x3E,
XF_RASBUSY_L = 0x40,
XF_RASBUSY_H = 0x42,
XF_CLKS_L = 0x44,
XF_CLKS_H = 0x46,
XF_WAIT_IN_L = 0x48,
XF_WAIT_IN_H = 0x4a,
XF_WAIT_OUT_L = 0x4c,
XF_WAIT_OUT_H = 0x4e,
VCACHE_METRIC_CHECK_L = 0x50,
VCACHE_METRIC_CHECK_H = 0x52,
VCACHE_METRIC_MISS_L = 0x54,
VCACHE_METRIC_MISS_H = 0x56,
VCACHE_METRIC_STALL_L = 0x58,
VCACHE_METRIC_STALL_H = 0x5A,
CLKS_PER_VTX_IN_L = 0x60,
CLKS_PER_VTX_IN_H = 0x62,
CLKS_PER_VTX_OUT = 0x64,
STATUS_REGISTER = 0x00,
CTRL_REGISTER = 0x02,
CLEAR_REGISTER = 0x04,
PERF_SELECT = 0x06,
FIFO_TOKEN_REGISTER = 0x0E,
FIFO_BOUNDING_BOX_LEFT = 0x10,
FIFO_BOUNDING_BOX_RIGHT = 0x12,
FIFO_BOUNDING_BOX_TOP = 0x14,
FIFO_BOUNDING_BOX_BOTTOM = 0x16,
FIFO_BASE_LO = 0x20,
FIFO_BASE_HI = 0x22,
FIFO_END_LO = 0x24,
FIFO_END_HI = 0x26,
FIFO_HI_WATERMARK_LO = 0x28,
FIFO_HI_WATERMARK_HI = 0x2a,
FIFO_LO_WATERMARK_LO = 0x2c,
FIFO_LO_WATERMARK_HI = 0x2e,
FIFO_RW_DISTANCE_LO = 0x30,
FIFO_RW_DISTANCE_HI = 0x32,
FIFO_WRITE_POINTER_LO = 0x34,
FIFO_WRITE_POINTER_HI = 0x36,
FIFO_READ_POINTER_LO = 0x38,
FIFO_READ_POINTER_HI = 0x3A,
FIFO_BP_LO = 0x3C,
FIFO_BP_HI = 0x3E,
XF_RASBUSY_L = 0x40,
XF_RASBUSY_H = 0x42,
XF_CLKS_L = 0x44,
XF_CLKS_H = 0x46,
XF_WAIT_IN_L = 0x48,
XF_WAIT_IN_H = 0x4a,
XF_WAIT_OUT_L = 0x4c,
XF_WAIT_OUT_H = 0x4e,
VCACHE_METRIC_CHECK_L = 0x50,
VCACHE_METRIC_CHECK_H = 0x52,
VCACHE_METRIC_MISS_L = 0x54,
VCACHE_METRIC_MISS_H = 0x56,
VCACHE_METRIC_STALL_L = 0x58,
VCACHE_METRIC_STALL_H = 0x5A,
CLKS_PER_VTX_IN_L = 0x60,
CLKS_PER_VTX_IN_H = 0x62,
CLKS_PER_VTX_OUT = 0x64,
};
enum
{
GATHER_PIPE_SIZE = 32,
INT_CAUSE_CP = 0x800
GATHER_PIPE_SIZE = 32,
INT_CAUSE_CP = 0x800
};
// Fifo Status Register
union UCPStatusReg
{
struct
{
u16 OverflowHiWatermark : 1;
u16 UnderflowLoWatermark : 1;
u16 ReadIdle : 1;
u16 CommandIdle : 1;
u16 Breakpoint : 1;
u16 : 11;
};
u16 Hex;
UCPStatusReg() {Hex = 0; }
UCPStatusReg(u16 _hex) {Hex = _hex; }
union UCPStatusReg {
struct
{
u16 OverflowHiWatermark : 1;
u16 UnderflowLoWatermark : 1;
u16 ReadIdle : 1;
u16 CommandIdle : 1;
u16 Breakpoint : 1;
u16 : 11;
};
u16 Hex;
UCPStatusReg() { Hex = 0; }
UCPStatusReg(u16 _hex) { Hex = _hex; }
};
// Fifo Control Register
union UCPCtrlReg
{
struct
{
u16 GPReadEnable : 1;
u16 BPEnable : 1;
u16 FifoOverflowIntEnable : 1;
u16 FifoUnderflowIntEnable : 1;
u16 GPLinkEnable : 1;
u16 BPInt : 1;
u16 : 10;
};
u16 Hex;
UCPCtrlReg() {Hex = 0; }
UCPCtrlReg(u16 _hex) {Hex = _hex; }
union UCPCtrlReg {
struct
{
u16 GPReadEnable : 1;
u16 BPEnable : 1;
u16 FifoOverflowIntEnable : 1;
u16 FifoUnderflowIntEnable : 1;
u16 GPLinkEnable : 1;
u16 BPInt : 1;
u16 : 10;
};
u16 Hex;
UCPCtrlReg() { Hex = 0; }
UCPCtrlReg(u16 _hex) { Hex = _hex; }
};
// Fifo Clear Register
union UCPClearReg
{
struct
{
u16 ClearFifoOverflow : 1;
u16 ClearFifoUnderflow : 1;
u16 ClearMetrices : 1;
u16 : 13;
};
u16 Hex;
UCPClearReg() {Hex = 0; }
UCPClearReg(u16 _hex) {Hex = _hex; }
union UCPClearReg {
struct
{
u16 ClearFifoOverflow : 1;
u16 ClearFifoUnderflow : 1;
u16 ClearMetrices : 1;
u16 : 13;
};
u16 Hex;
UCPClearReg() { Hex = 0; }
UCPClearReg(u16 _hex) { Hex = _hex; }
};
// Init
void Init();
void Shutdown();
void DoState(PointerWrap &p);
void DoState(PointerWrap& p);
void RegisterMMIO(MMIO::Mapping* mmio, u32 base);
@ -140,4 +139,4 @@ void SetCpControlRegister();
void SetCpStatusRegister();
void ProcessFifoEvents();
} // namespace CommandProcessor
} // namespace CommandProcessor

View file

@ -13,43 +13,43 @@ typedef s32 int4[4];
struct PixelShaderConstants
{
int4 colors[4];
int4 kcolors[4];
int4 alpha;
float4 texdims[8];
int4 zbias[2];
int4 indtexscale[2];
int4 indtexmtx[6];
int4 fogcolor;
int4 fogi;
float4 fogf[2];
float4 zslope;
float4 efbscale;
int4 colors[4];
int4 kcolors[4];
int4 alpha;
float4 texdims[8];
int4 zbias[2];
int4 indtexscale[2];
int4 indtexmtx[6];
int4 fogcolor;
int4 fogi;
float4 fogf[2];
float4 zslope;
float4 efbscale;
};
struct VertexShaderConstants
{
float4 posnormalmatrix[6];
float4 projection[4];
int4 materials[4];
struct Light
{
int4 color;
float4 cosatt;
float4 distatt;
float4 pos;
float4 dir;
} lights [8];
float4 texmatrices[24];
float4 transformmatrices[64];
float4 normalmatrices[32];
float4 posttransformmatrices[64];
float4 pixelcentercorrection;
float4 posnormalmatrix[6];
float4 projection[4];
int4 materials[4];
struct Light
{
int4 color;
float4 cosatt;
float4 distatt;
float4 pos;
float4 dir;
} lights[8];
float4 texmatrices[24];
float4 transformmatrices[64];
float4 normalmatrices[32];
float4 posttransformmatrices[64];
float4 pixelcentercorrection;
};
struct GeometryShaderConstants
{
float4 stereoparams;
float4 lineptparams;
int4 texoffset;
float4 stereoparams;
float4 lineptparams;
int4 texoffset;
};

View file

@ -12,61 +12,53 @@
class DataReader
{
public:
__forceinline DataReader()
: buffer(nullptr), end(nullptr) {}
__forceinline DataReader() : buffer(nullptr), end(nullptr) {}
__forceinline DataReader(u8* src, u8* _end) : buffer(src), end(_end) {}
__forceinline u8* GetPointer() { return buffer; }
__forceinline u8* operator=(u8* src)
{
buffer = src;
return src;
}
__forceinline DataReader(u8* src, u8* _end)
: buffer(src), end(_end) {}
__forceinline size_t size() { return end - buffer; }
template <typename T, bool swapped = true>
__forceinline T Peek(int offset = 0)
{
T data;
std::memcpy(&data, &buffer[offset], sizeof(T));
__forceinline u8* GetPointer()
{
return buffer;
}
if (swapped)
data = Common::FromBigEndian(data);
__forceinline u8* operator=(u8* src)
{
buffer = src;
return src;
}
return data;
}
__forceinline size_t size()
{
return end - buffer;
}
template <typename T, bool swapped = true>
__forceinline T Read()
{
const T result = Peek<T, swapped>();
buffer += sizeof(T);
return result;
}
template <typename T, bool swapped = true> __forceinline T Peek(int offset = 0)
{
T data;
std::memcpy(&data, &buffer[offset], sizeof(T));
template <typename T, bool swapped = false>
__forceinline void Write(T data)
{
if (swapped)
data = Common::FromBigEndian(data);
if (swapped)
data = Common::FromBigEndian(data);
std::memcpy(buffer, &data, sizeof(T));
buffer += sizeof(T);
}
return data;
}
template <typename T, bool swapped = true> __forceinline T Read()
{
const T result = Peek<T, swapped>();
buffer += sizeof(T);
return result;
}
template <typename T, bool swapped = false> __forceinline void Write(T data)
{
if (swapped)
data = Common::FromBigEndian(data);
std::memcpy(buffer, &data, sizeof(T));
buffer += sizeof(T);
}
template <typename T = u8> __forceinline void Skip(size_t data = 1)
{
buffer += sizeof(T) * data;
}
template <typename T = u8>
__forceinline void Skip(size_t data = 1)
{
buffer += sizeof(T) * data;
}
private:
u8* __restrict buffer;
u8* end;
u8* __restrict buffer;
u8* end;
};

View file

@ -12,143 +12,152 @@
#include "VideoCommon/Debugger.h"
#include "VideoCommon/VideoConfig.h"
GFXDebuggerBase *g_pdebugger = nullptr;
volatile bool GFXDebuggerPauseFlag = false; // if true, the GFX thread will be spin locked until it's false again
volatile PauseEvent GFXDebuggerToPauseAtNext = NOT_PAUSE; // Event which will trigger spin locking the GFX thread
volatile int GFXDebuggerEventToPauseCount = 0; // Number of events to wait for until GFX thread will be paused
GFXDebuggerBase* g_pdebugger = nullptr;
volatile bool GFXDebuggerPauseFlag =
false; // if true, the GFX thread will be spin locked until it's false again
volatile PauseEvent GFXDebuggerToPauseAtNext =
NOT_PAUSE; // Event which will trigger spin locking the GFX thread
volatile int GFXDebuggerEventToPauseCount =
0; // Number of events to wait for until GFX thread will be paused
void GFXDebuggerUpdateScreen()
{
// TODO: Implement this in a backend-independent way
/* // update screen
if (D3D::bFrameInProgress)
{
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
D3D::dev->SetDepthStencilSurface(nullptr);
// TODO: Implement this in a backend-independent way
/* // update screen
if (D3D::bFrameInProgress)
{
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
D3D::dev->SetDepthStencilSurface(nullptr);
D3D::dev->StretchRect(FramebufferManager::GetEFBColorRTSurface(), nullptr,
D3D::GetBackBufferSurface(), nullptr,
D3DTEXF_LINEAR);
D3D::dev->StretchRect(FramebufferManager::GetEFBColorRTSurface(), nullptr,
D3D::GetBackBufferSurface(), nullptr,
D3DTEXF_LINEAR);
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface());
D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface());
D3D::dev->BeginScene();
}
else
{
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->BeginScene();
}*/
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface());
D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface());
D3D::dev->BeginScene();
}
else
{
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->BeginScene();
}*/
}
// GFX thread
void GFXDebuggerCheckAndPause(bool update)
{
if (GFXDebuggerPauseFlag)
{
g_pdebugger->OnPause();
while ( GFXDebuggerPauseFlag )
{
if (update) GFXDebuggerUpdateScreen();
Common::SleepCurrentThread(5);
}
g_pdebugger->OnContinue();
}
if (GFXDebuggerPauseFlag)
{
g_pdebugger->OnPause();
while (GFXDebuggerPauseFlag)
{
if (update)
GFXDebuggerUpdateScreen();
Common::SleepCurrentThread(5);
}
g_pdebugger->OnContinue();
}
}
// GFX thread
void GFXDebuggerToPause(bool update)
{
GFXDebuggerToPauseAtNext = NOT_PAUSE;
GFXDebuggerPauseFlag = true;
GFXDebuggerCheckAndPause(update);
GFXDebuggerToPauseAtNext = NOT_PAUSE;
GFXDebuggerPauseFlag = true;
GFXDebuggerCheckAndPause(update);
}
void ContinueGFXDebugger()
{
GFXDebuggerPauseFlag = false;
GFXDebuggerPauseFlag = false;
}
void GFXDebuggerBase::DumpPixelShader(const std::string& path)
{
const std::string filename = StringFromFormat("%sdump_ps.txt", path.c_str());
const std::string filename = StringFromFormat("%sdump_ps.txt", path.c_str());
std::string output;
bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
}
std::string output;
bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType,
///g_nativeVertexFmt->m_components);
}
else
{
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND,
///g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType,
///g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS,
///g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
}
File::CreateEmptyFile(filename);
File::WriteStringToFile(output, filename);
File::CreateEmptyFile(filename);
File::WriteStringToFile(output, filename);
}
void GFXDebuggerBase::DumpVertexShader(const std::string& path)
{
const std::string filename = StringFromFormat("%sdump_vs.txt", path.c_str());
const std::string filename = StringFromFormat("%sdump_vs.txt", path.c_str());
File::CreateEmptyFile(filename);
/// File::WriteStringToFile(GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename);
File::CreateEmptyFile(filename);
/// File::WriteStringToFile(GenerateVertexShaderCode(g_nativeVertexFmt->m_components,
///g_ActiveConfig.backend_info.APIType), filename);
}
void GFXDebuggerBase::DumpPixelShaderConstants(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpVertexShaderConstants(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpTextures(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpFrameBuffer(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpGeometry(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpVertexDecl(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpMatrices(const std::string& path)
{
// TODO
// TODO
}
void GFXDebuggerBase::DumpStats(const std::string& path)
{
// TODO
// TODO
}

View file

@ -9,50 +9,48 @@
class GFXDebuggerBase
{
public:
virtual ~GFXDebuggerBase() {}
// if paused, debugging functions can be enabled
virtual void OnPause() {}
virtual void OnContinue() {}
void DumpPixelShader(const std::string& path);
void DumpVertexShader(const std::string& path);
void DumpPixelShaderConstants(const std::string& path);
void DumpVertexShaderConstants(const std::string& path);
void DumpTextures(const std::string& path);
void DumpFrameBuffer(const std::string& path);
void DumpGeometry(const std::string& path);
void DumpVertexDecl(const std::string& path);
void DumpMatrices(const std::string& path);
void DumpStats(const std::string& path);
virtual ~GFXDebuggerBase() {}
// if paused, debugging functions can be enabled
virtual void OnPause() {}
virtual void OnContinue() {}
void DumpPixelShader(const std::string& path);
void DumpVertexShader(const std::string& path);
void DumpPixelShaderConstants(const std::string& path);
void DumpVertexShaderConstants(const std::string& path);
void DumpTextures(const std::string& path);
void DumpFrameBuffer(const std::string& path);
void DumpGeometry(const std::string& path);
void DumpVertexDecl(const std::string& path);
void DumpMatrices(const std::string& path);
void DumpStats(const std::string& path);
};
enum PauseEvent
{
NOT_PAUSE = 0,
NEXT_FRAME = 1<<0,
NEXT_FLUSH = 1<<1,
NOT_PAUSE = 0,
NEXT_FRAME = 1 << 0,
NEXT_FLUSH = 1 << 1,
NEXT_PIXEL_SHADER_CHANGE = 1<<2,
NEXT_VERTEX_SHADER_CHANGE = 1<<3,
NEXT_TEXTURE_CHANGE = 1<<4,
NEXT_NEW_TEXTURE = 1<<5,
NEXT_PIXEL_SHADER_CHANGE = 1 << 2,
NEXT_VERTEX_SHADER_CHANGE = 1 << 3,
NEXT_TEXTURE_CHANGE = 1 << 4,
NEXT_NEW_TEXTURE = 1 << 5,
NEXT_XFB_CMD = 1<<6, // TODO
NEXT_EFB_CMD = 1<<7, // TODO
NEXT_XFB_CMD = 1 << 6, // TODO
NEXT_EFB_CMD = 1 << 7, // TODO
NEXT_MATRIX_CMD = 1<<8, // TODO
NEXT_VERTEX_CMD = 1<<9, // TODO
NEXT_TEXTURE_CMD = 1<<10, // TODO
NEXT_LIGHT_CMD = 1<<11, // TODO
NEXT_FOG_CMD = 1<<12, // TODO
NEXT_MATRIX_CMD = 1 << 8, // TODO
NEXT_VERTEX_CMD = 1 << 9, // TODO
NEXT_TEXTURE_CMD = 1 << 10, // TODO
NEXT_LIGHT_CMD = 1 << 11, // TODO
NEXT_FOG_CMD = 1 << 12, // TODO
NEXT_SET_TLUT = 1<<13, // TODO
NEXT_SET_TLUT = 1 << 13, // TODO
NEXT_ERROR = 1<<14, // TODO
NEXT_ERROR = 1 << 14, // TODO
};
extern GFXDebuggerBase *g_pdebugger;
extern GFXDebuggerBase* g_pdebugger;
extern volatile bool GFXDebuggerPauseFlag;
extern volatile PauseEvent GFXDebuggerToPauseAtNext;
extern volatile int GFXDebuggerEventToPauseCount;
@ -61,6 +59,25 @@ void GFXDebuggerCheckAndPause(bool update);
void GFXDebuggerToPause(bool update);
void GFXDebuggerUpdateScreen();
#define GFX_DEBUGGER_PAUSE_AT(event,update) {if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount<=0) || GFXDebuggerPauseFlag) GFXDebuggerToPause(update);}
#define GFX_DEBUGGER_PAUSE_LOG_AT(event,update,dumpfunc) {if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount<=0) || GFXDebuggerPauseFlag) {{dumpfunc};GFXDebuggerToPause(update);}}
#define GFX_DEBUGGER_LOG_AT(event,dumpfunc) {if (( GFXDebuggerToPauseAtNext & event ) ) {{dumpfunc};}}
#define GFX_DEBUGGER_PAUSE_AT(event, update) \
{ \
if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \
GFXDebuggerPauseFlag) \
GFXDebuggerToPause(update); \
}
#define GFX_DEBUGGER_PAUSE_LOG_AT(event, update, dumpfunc) \
{ \
if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \
GFXDebuggerPauseFlag) \
{ \
{dumpfunc}; \
GFXDebuggerToPause(update); \
} \
}
#define GFX_DEBUGGER_LOG_AT(event, dumpfunc) \
{ \
if ((GFXDebuggerToPauseAtNext & event)) \
{ \
{dumpfunc}; \
} \
}

View file

@ -9,110 +9,119 @@
namespace DriverDetails
{
struct BugInfo
{
u32 m_os; // Which OS has the issue
Vendor m_vendor; // Which vendor has the error
Driver m_driver; // Which driver has the error
Family m_family; // Which family of hardware has the issue
Bug m_bug; // Which bug it is
double m_versionstart; // When it started
double m_versionend; // When it ended
bool m_hasbug; // Does it have it?
};
struct BugInfo
{
u32 m_os; // Which OS has the issue
Vendor m_vendor; // Which vendor has the error
Driver m_driver; // Which driver has the error
Family m_family; // Which family of hardware has the issue
Bug m_bug; // Which bug it is
double m_versionstart; // When it started
double m_versionend; // When it ended
bool m_hasbug; // Does it have it?
};
// Local members
// Local members
#ifdef _WIN32
const u32 m_os = OS_ALL | OS_WINDOWS;
const u32 m_os = OS_ALL | OS_WINDOWS;
#elif ANDROID
const u32 m_os = OS_ALL | OS_ANDROID;
const u32 m_os = OS_ALL | OS_ANDROID;
#elif __APPLE__
const u32 m_os = OS_ALL | OS_OSX;
const u32 m_os = OS_ALL | OS_OSX;
#elif __linux__
const u32 m_os = OS_ALL | OS_LINUX;
const u32 m_os = OS_ALL | OS_LINUX;
#elif __FreeBSD__
const u32 m_os = OS_ALL | OS_FREEBSD;
const u32 m_os = OS_ALL | OS_FREEBSD;
#endif
static Vendor m_vendor = VENDOR_UNKNOWN;
static Driver m_driver = DRIVER_UNKNOWN;
static Family m_family = Family::UNKNOWN;
static double m_version = 0.0;
static Vendor m_vendor = VENDOR_UNKNOWN;
static Driver m_driver = DRIVER_UNKNOWN;
static Family m_family = Family::UNKNOWN;
static double m_version = 0.0;
// This is a list of all known bugs for each vendor
// We use this to check if the device and driver has a issue
static BugInfo m_known_bugs[] = {
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENNEGATEDBOOLEAN,-1.0, -1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENEXPLICITFLUSH, -1.0, -1.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENVSYNC, -1.0, -1.0, true},
{OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, BUG_BROKENUBO, 900, 916, true},
{OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENUBO, 900, 913, true},
{OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENGEOMETRYSHADERS, -1.0, 1112.0, true},
{OS_ALL, VENDOR_MESA, DRIVER_I965, Family::INTEL_SANDY, BUG_BROKENGEOMETRYSHADERS, -1.0, 1120.0, true},
{OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENUBO, 900, 920, true},
{OS_ALL, VENDOR_MESA, DRIVER_ALL, Family::UNKNOWN, BUG_BROKENCOPYIMAGE, -1.0, 1064.0, true},
{OS_LINUX, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_BROKENPINNEDMEMORY, -1.0, -1.0, true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENBUFFERSTORAGE, -1.0, 33138.0, true},
{OS_OSX, VENDOR_INTEL, DRIVER_INTEL, Family::INTEL_SANDY, BUG_PRIMITIVERESTART, -1.0, -1.0, true},
{OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true},
{OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
};
// This is a list of all known bugs for each vendor
// We use this to check if the device and driver has a issue
static BugInfo m_known_bugs[] = {
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0,
true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENNEGATEDBOOLEAN, -1.0,
-1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENEXPLICITFLUSH, -1.0, -1.0,
true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENVSYNC, -1.0, -1.0, true},
{OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0,
true},
{OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, BUG_BROKENUBO, 900, 916, true},
{OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENUBO, 900, 913, true},
{OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENGEOMETRYSHADERS, -1.0, 1112.0,
true},
{OS_ALL, VENDOR_MESA, DRIVER_I965, Family::INTEL_SANDY, BUG_BROKENGEOMETRYSHADERS, -1.0, 1120.0,
true},
{OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENUBO, 900, 920, true},
{OS_ALL, VENDOR_MESA, DRIVER_ALL, Family::UNKNOWN, BUG_BROKENCOPYIMAGE, -1.0, 1064.0, true},
{OS_LINUX, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_BROKENPINNEDMEMORY, -1.0, -1.0, true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENBUFFERSTORAGE, -1.0,
33138.0, true},
{OS_OSX, VENDOR_INTEL, DRIVER_INTEL, Family::INTEL_SANDY, BUG_PRIMITIVERESTART, -1.0, -1.0,
true},
{OS_WINDOWS, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0,
true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0,
true},
{OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE,
101810.3907, 101810.3960, true},
{OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
};
static std::map<Bug, BugInfo> m_bugs;
static std::map<Bug, BugInfo> m_bugs;
void Init(Vendor vendor, Driver driver, const double version, const Family family)
{
m_vendor = vendor;
m_driver = driver;
m_version = version;
m_family = family;
void Init(Vendor vendor, Driver driver, const double version, const Family family)
{
m_vendor = vendor;
m_driver = driver;
m_version = version;
m_family = family;
if (driver == DRIVER_UNKNOWN)
switch (vendor)
{
case VENDOR_NVIDIA:
case VENDOR_TEGRA:
m_driver = DRIVER_NVIDIA;
break;
case VENDOR_ATI:
m_driver = DRIVER_ATI;
break;
case VENDOR_INTEL:
m_driver = DRIVER_INTEL;
break;
case VENDOR_IMGTEC:
m_driver = DRIVER_IMGTEC;
break;
case VENDOR_VIVANTE:
m_driver = DRIVER_VIVANTE;
break;
default:
break;
}
if (driver == DRIVER_UNKNOWN)
switch (vendor)
{
case VENDOR_NVIDIA:
case VENDOR_TEGRA:
m_driver = DRIVER_NVIDIA;
break;
case VENDOR_ATI:
m_driver = DRIVER_ATI;
break;
case VENDOR_INTEL:
m_driver = DRIVER_INTEL;
break;
case VENDOR_IMGTEC:
m_driver = DRIVER_IMGTEC;
break;
case VENDOR_VIVANTE:
m_driver = DRIVER_VIVANTE;
break;
default:
break;
}
for (auto& bug : m_known_bugs)
{
if (( bug.m_os & m_os ) &&
( bug.m_vendor == m_vendor || bug.m_vendor == VENDOR_ALL ) &&
( bug.m_driver == m_driver || bug.m_driver == DRIVER_ALL ) &&
( bug.m_family == m_family || bug.m_family == Family::UNKNOWN) &&
( bug.m_versionstart <= m_version || bug.m_versionstart == -1 ) &&
( bug.m_versionend > m_version || bug.m_versionend == -1 )
)
m_bugs.emplace(bug.m_bug, bug);
}
}
bool HasBug(Bug bug)
{
auto it = m_bugs.find(bug);
if (it == m_bugs.end())
return false;
return it->second.m_hasbug;
}
for (auto& bug : m_known_bugs)
{
if ((bug.m_os & m_os) && (bug.m_vendor == m_vendor || bug.m_vendor == VENDOR_ALL) &&
(bug.m_driver == m_driver || bug.m_driver == DRIVER_ALL) &&
(bug.m_family == m_family || bug.m_family == Family::UNKNOWN) &&
(bug.m_versionstart <= m_version || bug.m_versionstart == -1) &&
(bug.m_versionend > m_version || bug.m_versionend == -1))
m_bugs.emplace(bug.m_bug, bug);
}
}
bool HasBug(Bug bug)
{
auto it = m_bugs.find(bug);
if (it == m_bugs.end())
return false;
return it->second.m_hasbug;
}
}

View file

@ -8,197 +8,207 @@
namespace DriverDetails
{
// Enum of supported operating systems
enum OS
{
OS_ALL = (1 << 0),
OS_WINDOWS = (1 << 1),
OS_LINUX = (1 << 2),
OS_OSX = (1 << 3),
OS_ANDROID = (1 << 4),
OS_FREEBSD = (1 << 5),
};
// Enum of known vendors
// Tegra and Nvidia are separated out due to such substantial differences
enum Vendor
{
VENDOR_ALL = 0,
VENDOR_NVIDIA,
VENDOR_ATI,
VENDOR_INTEL,
VENDOR_ARM,
VENDOR_QUALCOMM,
VENDOR_IMGTEC,
VENDOR_TEGRA,
VENDOR_VIVANTE,
VENDOR_MESA,
VENDOR_UNKNOWN
};
// Enum of supported operating systems
enum OS
{
OS_ALL = (1 << 0),
OS_WINDOWS = (1 << 1),
OS_LINUX = (1 << 2),
OS_OSX = (1 << 3),
OS_ANDROID = (1 << 4),
OS_FREEBSD = (1 << 5),
};
// Enum of known vendors
// Tegra and Nvidia are separated out due to such substantial differences
enum Vendor
{
VENDOR_ALL = 0,
VENDOR_NVIDIA,
VENDOR_ATI,
VENDOR_INTEL,
VENDOR_ARM,
VENDOR_QUALCOMM,
VENDOR_IMGTEC,
VENDOR_TEGRA,
VENDOR_VIVANTE,
VENDOR_MESA,
VENDOR_UNKNOWN
};
// Enum of known drivers
enum Driver
{
DRIVER_ALL = 0,
DRIVER_NVIDIA, // Official Nvidia, including mobile GPU
DRIVER_NOUVEAU, // OSS nouveau
DRIVER_ATI, // Official ATI
DRIVER_R600, // OSS Radeon
DRIVER_INTEL, // Official Intel
DRIVER_I965, // OSS Intel
DRIVER_ARM, // Official Mali driver
DRIVER_LIMA, // OSS Mali driver
DRIVER_QUALCOMM, // Official Adreno driver
DRIVER_FREEDRENO, // OSS Adreno driver
DRIVER_IMGTEC, // Official PowerVR driver
DRIVER_VIVANTE, // Official Vivante driver
DRIVER_UNKNOWN // Unknown driver, default to official hardware driver
};
// Enum of known drivers
enum Driver
{
DRIVER_ALL = 0,
DRIVER_NVIDIA, // Official Nvidia, including mobile GPU
DRIVER_NOUVEAU, // OSS nouveau
DRIVER_ATI, // Official ATI
DRIVER_R600, // OSS Radeon
DRIVER_INTEL, // Official Intel
DRIVER_I965, // OSS Intel
DRIVER_ARM, // Official Mali driver
DRIVER_LIMA, // OSS Mali driver
DRIVER_QUALCOMM, // Official Adreno driver
DRIVER_FREEDRENO, // OSS Adreno driver
DRIVER_IMGTEC, // Official PowerVR driver
DRIVER_VIVANTE, // Official Vivante driver
DRIVER_UNKNOWN // Unknown driver, default to official hardware driver
};
enum class Family
{
UNKNOWN,
INTEL_SANDY,
INTEL_IVY,
};
enum class Family
{
UNKNOWN,
INTEL_SANDY,
INTEL_IVY,
};
// Enum of known bugs
// These can be vendor specific, but we put them all in here
// For putting a new bug in here, make sure to put a detailed comment above the enum
// This'll ensure we know exactly what the issue is.
enum Bug
{
// Bug: UBO buffer offset broken
// Affected devices: all mesa drivers
// Started Version: 9.0 (mesa doesn't support ubo before)
// Ended Version: up to 9.2
// The offset of glBindBufferRange was ignored on all Mesa Gallium3D drivers until 9.1.3
// Nouveau stored the offset as u16 which isn't enough for all cases with range until 9.1.6
// I965 has broken data fetches from uniform buffers which results in a dithering until 9.2.0
BUG_BROKENUBO,
// Bug: The pinned memory extension isn't working for index buffers
// Affected devices: AMD as they are the only vendor providing this extension
// Started Version: ?
// Ended Version: 13.9 working for me (neobrain).
// Affected OS: Linux
// Pinned memory is disabled for index buffer as the AMD driver (the only one with pinned memory support) seems
// to be broken. We just get flickering/black rendering when using pinned memory here -- degasus - 2013/08/20
// This bug only happens when paired with base_vertex.
// Please see issue #6105. Let's hope buffer storage solves this issue.
// TODO: Detect broken drivers.
BUG_BROKENPINNEDMEMORY,
// Bug: glBufferSubData/glMapBufferRange stalls + OOM
// Affected devices: Adreno a3xx/Mali-t6xx
// Started Version: -1
// Ended Version: -1
// Both Adreno and Mali have issues when you call glBufferSubData or glMapBufferRange
// The driver stalls in each instance no matter what you do
// Apparently Mali and Adreno share code in this regard since it was wrote by the same person.
BUG_BROKENBUFFERSTREAM,
// Bug: ARB_buffer_storage doesn't work with ARRAY_BUFFER type streams
// Affected devices: GeForce 4xx+
// Started Version: -1
// Ended Version: 332.21
// The buffer_storage streaming method is required for greater speed gains in our buffer streaming
// It reduces what is needed for streaming to basically a memcpy call
// It seems to work for all buffer types except GL_ARRAY_BUFFER
BUG_BROKENBUFFERSTORAGE,
// Bug: Intel HD 3000 on OS X has broken primitive restart
// Affected devices: Intel HD 3000
// Affected OS: OS X
// Started Version: -1
// Ended Version: -1
// The drivers on OS X has broken primitive restart.
// Intel HD 4000 series isn't affected by the bug
BUG_PRIMITIVERESTART,
// Bug: unsync mapping doesn't work fine
// Affected devices: Nvidia driver
// Started Version: -1
// Ended Version: -1
// The Nvidia driver (both Windows + Linux) doesn't like unsync mapping performance wise.
// Because of their threaded behavior, they seem not to handle unsync mapping complete unsync,
// in fact, they serialize the driver which adds a much bigger overhead.
// Workaround: Use BufferSubData
// TODO: some Windows AMD driver/GPU combination seems also affected
// but as they all support pinned memory, it doesn't matter
BUG_BROKENUNSYNCMAPPING,
// Bug: Intel's Window driver broke buffer_storage with GL_ELEMENT_ARRAY_BUFFER
// Affected devices: Intel (Windows)
// Started Version: 15.36.3.64.3907 (10.18.10.3907)
// Ended Version: 15.36.7.64.3960 (10.18.10.3960)
// Intel implemented buffer_storage in their GL 4.3 driver.
// It works for all the buffer types we use except GL_ELEMENT_ARRAY_BUFFER.
// Causes complete blackscreen issues.
BUG_INTELBROKENBUFFERSTORAGE,
// Bug: Qualcomm has broken boolean negation
// Affected devices: Adreno
// Started Version: -1
// Ended Version: -1
// Qualcomm has the boolean negation broken in their shader compiler
// Instead of inverting the boolean value it does a binary negation on the full 32bit register
// This causes a compare against zero to fail in their shader since it is no longer a 0 or 1 value
// but 0xFFFFFFFF or 0xFFFFFFFE depending on what the boolean value was before the negation.
//
// This bug has a secondary issue tied to it unlike other bugs.
// The correction of this bug is to check the boolean value against false which results in us
// not doing a negation of the source but instead checking against the boolean value we want.
// The issue with this is that Intel's Window driver is broken when checking if a boolean value is
// equal to true or false, so one has to do a boolean negation of the source
//
// eg.
// Broken on Qualcomm
// Works on Windows Intel
// if (!cond)
//
// Works on Qualcomm
// Broken on Windows Intel
// if (cond == false)
BUG_BROKENNEGATEDBOOLEAN,
// Enum of known bugs
// These can be vendor specific, but we put them all in here
// For putting a new bug in here, make sure to put a detailed comment above the enum
// This'll ensure we know exactly what the issue is.
enum Bug
{
// Bug: UBO buffer offset broken
// Affected devices: all mesa drivers
// Started Version: 9.0 (mesa doesn't support ubo before)
// Ended Version: up to 9.2
// The offset of glBindBufferRange was ignored on all Mesa Gallium3D drivers until 9.1.3
// Nouveau stored the offset as u16 which isn't enough for all cases with range until 9.1.6
// I965 has broken data fetches from uniform buffers which results in a dithering until 9.2.0
BUG_BROKENUBO,
// Bug: The pinned memory extension isn't working for index buffers
// Affected devices: AMD as they are the only vendor providing this extension
// Started Version: ?
// Ended Version: 13.9 working for me (neobrain).
// Affected OS: Linux
// Pinned memory is disabled for index buffer as the AMD driver (the only one with pinned memory
// support) seems
// to be broken. We just get flickering/black rendering when using pinned memory here -- degasus -
// 2013/08/20
// This bug only happens when paired with base_vertex.
// Please see issue #6105. Let's hope buffer storage solves this issue.
// TODO: Detect broken drivers.
BUG_BROKENPINNEDMEMORY,
// Bug: glBufferSubData/glMapBufferRange stalls + OOM
// Affected devices: Adreno a3xx/Mali-t6xx
// Started Version: -1
// Ended Version: -1
// Both Adreno and Mali have issues when you call glBufferSubData or glMapBufferRange
// The driver stalls in each instance no matter what you do
// Apparently Mali and Adreno share code in this regard since it was wrote by the same person.
BUG_BROKENBUFFERSTREAM,
// Bug: ARB_buffer_storage doesn't work with ARRAY_BUFFER type streams
// Affected devices: GeForce 4xx+
// Started Version: -1
// Ended Version: 332.21
// The buffer_storage streaming method is required for greater speed gains in our buffer streaming
// It reduces what is needed for streaming to basically a memcpy call
// It seems to work for all buffer types except GL_ARRAY_BUFFER
BUG_BROKENBUFFERSTORAGE,
// Bug: Intel HD 3000 on OS X has broken primitive restart
// Affected devices: Intel HD 3000
// Affected OS: OS X
// Started Version: -1
// Ended Version: -1
// The drivers on OS X has broken primitive restart.
// Intel HD 4000 series isn't affected by the bug
BUG_PRIMITIVERESTART,
// Bug: unsync mapping doesn't work fine
// Affected devices: Nvidia driver
// Started Version: -1
// Ended Version: -1
// The Nvidia driver (both Windows + Linux) doesn't like unsync mapping performance wise.
// Because of their threaded behavior, they seem not to handle unsync mapping complete unsync,
// in fact, they serialize the driver which adds a much bigger overhead.
// Workaround: Use BufferSubData
// TODO: some Windows AMD driver/GPU combination seems also affected
// but as they all support pinned memory, it doesn't matter
BUG_BROKENUNSYNCMAPPING,
// Bug: Intel's Window driver broke buffer_storage with GL_ELEMENT_ARRAY_BUFFER
// Affected devices: Intel (Windows)
// Started Version: 15.36.3.64.3907 (10.18.10.3907)
// Ended Version: 15.36.7.64.3960 (10.18.10.3960)
// Intel implemented buffer_storage in their GL 4.3 driver.
// It works for all the buffer types we use except GL_ELEMENT_ARRAY_BUFFER.
// Causes complete blackscreen issues.
BUG_INTELBROKENBUFFERSTORAGE,
// Bug: Qualcomm has broken boolean negation
// Affected devices: Adreno
// Started Version: -1
// Ended Version: -1
// Qualcomm has the boolean negation broken in their shader compiler
// Instead of inverting the boolean value it does a binary negation on the full 32bit register
// This causes a compare against zero to fail in their shader since it is no longer a 0 or 1 value
// but 0xFFFFFFFF or 0xFFFFFFFE depending on what the boolean value was before the negation.
//
// This bug has a secondary issue tied to it unlike other bugs.
// The correction of this bug is to check the boolean value against false which results in us
// not doing a negation of the source but instead checking against the boolean value we want.
// The issue with this is that Intel's Window driver is broken when checking if a boolean value is
// equal to true or false, so one has to do a boolean negation of the source
//
// eg.
// Broken on Qualcomm
// Works on Windows Intel
// if (!cond)
//
// Works on Qualcomm
// Broken on Windows Intel
// if (cond == false)
BUG_BROKENNEGATEDBOOLEAN,
// Bug: glCopyImageSubData doesn't work on i965
// Started Version: -1
// Ended Version: 10.6.4
// Mesa meta misses to disable the scissor test.
BUG_BROKENCOPYIMAGE,
// Bug: glCopyImageSubData doesn't work on i965
// Started Version: -1
// Ended Version: 10.6.4
// Mesa meta misses to disable the scissor test.
BUG_BROKENCOPYIMAGE,
// Bug: ARM Mali managed to break disabling vsync
// Affected Devices: Mali
// Started Version: r5p0-rev2
// Ended Version: -1
// If we disable vsync with eglSwapInterval(dpy, 0) then the screen will stop showing new updates after a handful of swaps.
// This was noticed on a Samsung Galaxy S6 with its Android 5.1.1 update.
// The default Android 5.0 image didn't encounter this issue.
// We can't actually detect what the driver version is on Android, so until the driver version lands that displays the version in
// the GL_VERSION string, we will have to force vsync to be enabled at all times.
BUG_BROKENVSYNC,
// Bug: ARM Mali managed to break disabling vsync
// Affected Devices: Mali
// Started Version: r5p0-rev2
// Ended Version: -1
// If we disable vsync with eglSwapInterval(dpy, 0) then the screen will stop showing new updates
// after a handful of swaps.
// This was noticed on a Samsung Galaxy S6 with its Android 5.1.1 update.
// The default Android 5.0 image didn't encounter this issue.
// We can't actually detect what the driver version is on Android, so until the driver version
// lands that displays the version in
// the GL_VERSION string, we will have to force vsync to be enabled at all times.
BUG_BROKENVSYNC,
// Bug: Broken lines in geometry shaders
// Affected Devices: Mesa r600/radeonsi, Mesa Sandy Bridge
// Started Version: -1
// Ended Version: 11.1.2 for radeon, -1 for Sandy
// Mesa introduced geometry shader support for radeon and sandy bridge devices and failed to test it with us.
// Causes misrenderings on a large amount of things that draw lines.
BUG_BROKENGEOMETRYSHADERS,
// Bug: Broken lines in geometry shaders
// Affected Devices: Mesa r600/radeonsi, Mesa Sandy Bridge
// Started Version: -1
// Ended Version: 11.1.2 for radeon, -1 for Sandy
// Mesa introduced geometry shader support for radeon and sandy bridge devices and failed to test
// it with us.
// Causes misrenderings on a large amount of things that draw lines.
BUG_BROKENGEOMETRYSHADERS,
// Bug: Explicit flush is very slow on Qualcomm
// Started Version: -1
// Ended Version: -1
// Our ARB_buffer_storage code uses explicit flush to avoid coherent mapping.
// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine.
// So let's use coherent mapping there.
BUG_BROKENEXPLICITFLUSH,
// Bug: Explicit flush is very slow on Qualcomm
// Started Version: -1
// Ended Version: -1
// Our ARB_buffer_storage code uses explicit flush to avoid coherent mapping.
// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is
// fine.
// So let's use coherent mapping there.
BUG_BROKENEXPLICITFLUSH,
// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
// Started Version: -1
// Ended Version: -1
// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to
// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from
// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else.
BUG_SLOWGETBUFFERSUBDATA,
};
// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
// Started Version: -1
// Ended Version: -1
// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is
// slow on AMD drivers, compared to
// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the
// first call moving the buffer from
// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData
// everywhere else.
BUG_SLOWGETBUFFERSUBDATA,
};
// Initializes our internal vendor, device family, and driver version
void Init(Vendor vendor, Driver driver, const double version, const Family family);
// Initializes our internal vendor, device family, and driver version
void Init(Vendor vendor, Driver driver, const double version, const Family family);
// Once Vendor and driver version is set, this will return if it has the applicable bug passed to it.
bool HasBug(Bug bug);
// Once Vendor and driver version is set, this will return if it has the applicable bug passed to
// it.
bool HasBug(Bug bug);
}

View file

@ -14,33 +14,33 @@ static constexpr u64 FPS_REFRESH_INTERVAL = 1000;
FPSCounter::FPSCounter()
{
m_update_time.Update();
m_render_time.Update();
m_update_time.Update();
m_render_time.Update();
}
void FPSCounter::LogRenderTimeToFile(u64 val)
{
if (!m_bench_file.is_open())
m_bench_file.open(File::GetUserPath(D_LOGS_IDX) + "render_time.txt");
if (!m_bench_file.is_open())
m_bench_file.open(File::GetUserPath(D_LOGS_IDX) + "render_time.txt");
m_bench_file << val << std::endl;
m_bench_file << val << std::endl;
}
void FPSCounter::Update()
{
if (m_update_time.GetTimeDifference() >= FPS_REFRESH_INTERVAL)
{
m_update_time.Update();
m_fps = m_counter - m_fps_last_counter;
m_fps_last_counter = m_counter;
m_bench_file.flush();
}
if (m_update_time.GetTimeDifference() >= FPS_REFRESH_INTERVAL)
{
m_update_time.Update();
m_fps = m_counter - m_fps_last_counter;
m_fps_last_counter = m_counter;
m_bench_file.flush();
}
if (g_ActiveConfig.bLogRenderTimeToFile)
{
LogRenderTimeToFile(m_render_time.GetTimeDifference());
m_render_time.Update();
}
if (g_ActiveConfig.bLogRenderTimeToFile)
{
LogRenderTimeToFile(m_render_time.GetTimeDifference());
m_render_time.Update();
}
m_counter++;
m_counter++;
}

View file

@ -11,22 +11,21 @@
class FPSCounter
{
public:
// Initializes the FPS counter.
FPSCounter();
// Initializes the FPS counter.
FPSCounter();
// Called when a frame is rendered (updated every second).
void Update();
unsigned int GetFPS() const { return m_fps; }
// Called when a frame is rendered (updated every second).
void Update();
unsigned int GetFPS() const { return m_fps; }
private:
unsigned int m_fps = 0;
unsigned int m_counter = 0;
unsigned int m_fps_last_counter = 0;
Common::Timer m_update_time;
unsigned int m_fps = 0;
unsigned int m_counter = 0;
unsigned int m_fps_last_counter = 0;
Common::Timer m_update_time;
Common::Timer m_render_time;
std::ofstream m_bench_file;
Common::Timer m_render_time;
std::ofstream m_bench_file;
void LogRenderTimeToFile(u64 val);
void LogRenderTimeToFile(u64 val);
};

View file

@ -16,12 +16,12 @@
#include "Core/ConfigManager.h"
#include "Core/CoreTiming.h"
#include "Core/NetPlayProto.h"
#include "Core/HW/Memmap.h"
#include "Core/NetPlayProto.h"
#include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h"
@ -30,7 +30,6 @@
namespace Fifo
{
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
static bool s_skip_current_frame = false;
@ -72,442 +71,450 @@ static u8* s_video_buffer_pp_read_ptr;
static std::atomic<int> s_sync_ticks;
static Common::Event s_sync_wakeup_event;
void DoState(PointerWrap &p)
void DoState(PointerWrap& p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
if (p.mode == PointerWrap::MODE_READ && s_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
}
p.DoArray(s_video_buffer, FIFO_SIZE);
u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
if (p.mode == PointerWrap::MODE_READ && s_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
}
p.Do(s_skip_current_frame);
p.Do(s_last_sync_gpu_tick);
p.Do(s_skip_current_frame);
p.Do(s_last_sync_gpu_tick);
}
void PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
if (doLock)
{
SyncGPU(SYNC_GPU_OTHER);
EmulatorState(false);
FlushGpu();
}
else
{
if (unpauseOnUnlock)
EmulatorState(true);
}
if (doLock)
{
SyncGPU(SYNC_GPU_OTHER);
EmulatorState(false);
FlushGpu();
}
else
{
if (unpauseOnUnlock)
EmulatorState(true);
}
}
void Init()
{
// Padded so that SIMD overreads in the vertex loader are safe
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
ResetVideoBuffer();
if (SConfig::GetInstance().bCPUThread)
s_gpu_mainloop.Prepare();
s_sync_ticks.store(0);
// Padded so that SIMD overreads in the vertex loader are safe
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
ResetVideoBuffer();
if (SConfig::GetInstance().bCPUThread)
s_gpu_mainloop.Prepare();
s_sync_ticks.store(0);
}
void Shutdown()
{
if (s_gpu_mainloop.IsRunning())
PanicAlert("Fifo shutting down while active");
if (s_gpu_mainloop.IsRunning())
PanicAlert("Fifo shutting down while active");
FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
s_video_buffer_pp_read_ptr = nullptr;
s_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
s_video_buffer_pp_read_ptr = nullptr;
s_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
}
void SetRendering(bool enabled)
{
s_skip_current_frame = !enabled;
s_skip_current_frame = !enabled;
}
bool WillSkipCurrentFrame()
{
return s_skip_current_frame;
return s_skip_current_frame;
}
// May be executed from any thread, even the graphics thread.
// Created to allow for self shutdown.
void ExitGpuLoop()
{
// This should break the wait loop in CPU thread
CommandProcessor::fifo.bFF_GPReadEnable = false;
FlushGpu();
// This should break the wait loop in CPU thread
CommandProcessor::fifo.bFF_GPReadEnable = false;
FlushGpu();
// Terminate GPU thread loop
s_emu_running_state.store(true);
s_gpu_mainloop.Stop(false);
// Terminate GPU thread loop
s_emu_running_state.store(true);
s_gpu_mainloop.Stop(false);
}
void EmulatorState(bool running)
{
s_emu_running_state.store(running);
s_gpu_mainloop.Wakeup();
s_emu_running_state.store(running);
s_gpu_mainloop.Wakeup();
}
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
if (s_use_deterministic_gpu_thread)
{
s_gpu_mainloop.Wait();
if (!s_gpu_mainloop.IsRunning())
return;
if (s_use_deterministic_gpu_thread)
{
s_gpu_mainloop.Wait();
if (!s_gpu_mainloop.IsRunning())
return;
// Opportunistically reset FIFOs so we don't wrap around.
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
// Opportunistically reset FIFOs so we don't wrap around.
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
s_fifo_aux_read_ptr = s_fifo_aux_data;
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
s_fifo_aux_read_ptr = s_fifo_aux_data;
if (may_move_read_ptr)
{
u8* write_ptr = s_video_buffer_write_ptr;
if (may_move_read_ptr)
{
u8* write_ptr = s_video_buffer_write_ptr;
// what's left over in the buffer
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
// what's left over in the buffer
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
}
}
memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
}
}
}
void PushFifoAuxBuffer(void* ptr, size_t size)
{
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down
return;
}
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
// That will sync us up to the last 32 bytes, so this short region
// of FIFO would have to point to a 2MB display list or something.
PanicAlert("absurdly large aux buffer");
return;
}
}
memcpy(s_fifo_aux_write_ptr, ptr, size);
s_fifo_aux_write_ptr += size;
if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down
return;
}
if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
// That will sync us up to the last 32 bytes, so this short region
// of FIFO would have to point to a 2MB display list or something.
PanicAlert("absurdly large aux buffer");
return;
}
}
memcpy(s_fifo_aux_write_ptr, ptr, size);
s_fifo_aux_write_ptr += size;
}
void* PopFifoAuxBuffer(size_t size)
{
void* ret = s_fifo_aux_read_ptr;
s_fifo_aux_read_ptr += size;
return ret;
void* ret = s_fifo_aux_read_ptr;
s_fifo_aux_read_ptr += size;
return ret;
}
// Description: RunGpuLoop() sends data through this function.
static void ReadDataFromFifo(u32 readPtr)
{
size_t len = 32;
if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
{
size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len, (unsigned long) FIFO_SIZE);
return;
}
memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
s_video_buffer_write_ptr = s_video_buffer + existing_len;
s_video_buffer_read_ptr = s_video_buffer;
}
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_write_ptr += len;
size_t len = 32;
if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
{
size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len,
(unsigned long)FIFO_SIZE);
return;
}
memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
s_video_buffer_write_ptr = s_video_buffer + existing_len;
s_video_buffer_read_ptr = s_video_buffer;
}
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_write_ptr += len;
}
// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u32 readPtr)
{
size_t len = 32;
u8 *write_ptr = s_video_buffer_write_ptr;
if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
{
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down, so the next asserts may fail
return;
}
size_t len = 32;
u8* write_ptr = s_video_buffer_write_ptr;
if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
{
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down, so the next asserts may fail
return;
}
if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
{
PanicAlert("desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len, (unsigned long) FIFO_SIZE);
return;
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
{
PanicAlert("desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len,
(unsigned long)FIFO_SIZE);
return;
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
void ResetVideoBuffer()
{
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
}
// Description: Main FIFO update loop
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
{
AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false);
AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false);
s_gpu_mainloop.Run(
[] {
const SConfig& param = SConfig::GetInstance();
s_gpu_mainloop.Run(
[] {
const SConfig& param = SConfig::GetInstance();
g_video_backend->PeekMessages();
g_video_backend->PeekMessages();
// Do nothing while paused
if (!s_emu_running_state.load())
return;
// Do nothing while paused
if (!s_emu_running_state.load())
return;
if (s_use_deterministic_gpu_thread)
{
AsyncRequests::GetInstance()->PullEvents();
if (s_use_deterministic_gpu_thread)
{
AsyncRequests::GetInstance()->PullEvents();
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
// See comment in SyncGPU
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr =
OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
s_video_buffer_seen_ptr = write_ptr;
}
}
else
{
SCPFifoStruct& fifo = CommandProcessor::fifo;
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
// See comment in SyncGPU
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr = OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
s_video_buffer_seen_ptr = write_ptr;
}
}
else
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
AsyncRequests::GetInstance()->PullEvents();
AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::SetCPStatusFromGPU();
CommandProcessor::SetCPStatusFromGPU();
// check if we are able to run this buffer
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance && !AtBreakpoint())
{
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break;
// check if we are able to run this buffer
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break;
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce "
"instability in the game. Please report it.",
fifo.CPReadWriteDistance - 32);
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
CommandProcessor::SetCPStatusFromGPU();
CommandProcessor::SetCPStatusFromGPU();
if (param.bSyncGPU)
{
cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old > 0 && old - (int)cyclesExecuted <= 0)
s_sync_wakeup_event.Set();
}
if (param.bSyncGPU)
{
cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old > 0 && old - (int)cyclesExecuted <= 0)
s_sync_wakeup_event.Set();
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing
// things down.
AsyncRequests::GetInstance()->PullEvents();
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old > 0)
s_sync_wakeup_event.Set();
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old > 0)
s_sync_wakeup_event.Set();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManagerBase::Flush();
}
},
100);
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManagerBase::Flush();
}
}, 100);
AsyncRequests::GetInstance()->SetEnable(false);
AsyncRequests::GetInstance()->SetPassthrough(true);
AsyncRequests::GetInstance()->SetEnable(false);
AsyncRequests::GetInstance()->SetPassthrough(true);
}
void FlushGpu()
{
const SConfig& param = SConfig::GetInstance();
const SConfig& param = SConfig::GetInstance();
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
return;
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.Wait();
s_gpu_mainloop.Wait();
}
void GpuMaySleep()
{
s_gpu_mainloop.AllowSleep();
s_gpu_mainloop.AllowSleep();
}
bool AtBreakpoint()
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
SCPFifoStruct& fifo = CommandProcessor::fifo;
return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
}
void RunGpu()
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
const SConfig& param = SConfig::GetInstance();
SCPFifoStruct& fifo = CommandProcessor::fifo;
const SConfig& param = SConfig::GetInstance();
// execute GPU
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
{
if (s_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
}
else
{
if (!reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
}
// execute GPU
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
if (s_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
}
else
{
if (!reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
}
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
// DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCPStatusFromGPU();
fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCPStatusFromGPU();
if (reset_simd_state)
{
FPURoundMode::LoadSIMDState();
}
}
if (reset_simd_state)
{
FPURoundMode::LoadSIMDState();
}
}
// wake up GPU thread
if (param.bCPUThread)
{
s_gpu_mainloop.Wakeup();
}
// wake up GPU thread
if (param.bCPUThread)
{
s_gpu_mainloop.Wakeup();
}
}
void UpdateWantDeterminism(bool want)
{
// We are paused (or not running at all yet), so
// it should be safe to change this.
const SConfig& param = SConfig::GetInstance();
bool gpu_thread = false;
switch (param.m_GPUDeterminismMode)
{
case GPU_DETERMINISM_AUTO:
gpu_thread = want;
// We are paused (or not running at all yet), so
// it should be safe to change this.
const SConfig& param = SConfig::GetInstance();
bool gpu_thread = false;
switch (param.m_GPUDeterminismMode)
{
case GPU_DETERMINISM_AUTO:
gpu_thread = want;
// Hack: For now movies are an exception to this being on (but not
// to wanting determinism in general). Once vertex arrays are
// fixed, there should be no reason to want this off for movies by
// default, so this can be removed.
if (!NetPlay::IsNetPlayRunning())
gpu_thread = false;
// Hack: For now movies are an exception to this being on (but not
// to wanting determinism in general). Once vertex arrays are
// fixed, there should be no reason to want this off for movies by
// default, so this can be removed.
if (!NetPlay::IsNetPlayRunning())
gpu_thread = false;
break;
case GPU_DETERMINISM_NONE:
gpu_thread = false;
break;
case GPU_DETERMINISM_FAKE_COMPLETION:
gpu_thread = true;
break;
}
break;
case GPU_DETERMINISM_NONE:
gpu_thread = false;
break;
case GPU_DETERMINISM_FAKE_COMPLETION:
gpu_thread = true;
break;
}
gpu_thread = gpu_thread && param.bCPUThread;
gpu_thread = gpu_thread && param.bCPUThread;
if (s_use_deterministic_gpu_thread != gpu_thread)
{
s_use_deterministic_gpu_thread = gpu_thread;
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}
}
if (s_use_deterministic_gpu_thread != gpu_thread)
{
s_use_deterministic_gpu_thread = gpu_thread;
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}
}
}
bool UseDeterministicGPUThread()
{
return s_use_deterministic_gpu_thread;
return s_use_deterministic_gpu_thread;
}
/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
@ -517,56 +524,55 @@ bool UseDeterministicGPUThread()
*/
static int Update(int ticks)
{
const SConfig& param = SConfig::GetInstance();
const SConfig& param = SConfig::GetInstance();
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || s_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
}
return param.iSyncGpuMaxDistance;
}
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || s_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
}
return param.iSyncGpuMaxDistance;
}
// Wakeup GPU
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
// Wakeup GPU
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
// Wait for GPU
if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
{
while (s_sync_ticks.load() > 0)
{
s_sync_wakeup_event.Wait();
}
}
// Wait for GPU
if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
{
while (s_sync_ticks.load() > 0)
{
s_sync_wakeup_event.Wait();
}
}
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}
static void SyncGPUCallback(u64 userdata, s64 cyclesLate)
{
u64 now = CoreTiming::GetTicks();
int next = Fifo::Update((int)(now - s_last_sync_gpu_tick));
s_last_sync_gpu_tick = now;
u64 now = CoreTiming::GetTicks();
int next = Fifo::Update((int)(now - s_last_sync_gpu_tick));
s_last_sync_gpu_tick = now;
if (next > 0)
CoreTiming::ScheduleEvent(next, s_event_sync_gpu);
if (next > 0)
CoreTiming::ScheduleEvent(next, s_event_sync_gpu);
}
// Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
void Prepare()
{
if (SConfig::GetInstance().bCPUThread && SConfig::GetInstance().bSyncGPU)
{
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
CoreTiming::ScheduleEvent(0, s_event_sync_gpu);
s_last_sync_gpu_tick = CoreTiming::GetTicks();
}
if (SConfig::GetInstance().bCPUThread && SConfig::GetInstance().bSyncGPU)
{
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
CoreTiming::ScheduleEvent(0, s_event_sync_gpu);
s_last_sync_gpu_tick = CoreTiming::GetTicks();
}
}
}

View file

@ -11,11 +11,10 @@ class PointerWrap;
namespace Fifo
{
void Init();
void Shutdown();
void Prepare(); // Must be called from the CPU thread.
void DoState(PointerWrap &f);
void Prepare(); // Must be called from the CPU thread.
void DoState(PointerWrap& f);
void PauseAndLock(bool doLock, bool unpauseOnUnlock);
void UpdateWantDeterminism(bool want);
bool UseDeterministicGPUThread();
@ -23,13 +22,13 @@ bool UseDeterministicGPUThread();
// Used for diagnostics.
enum SyncGPUReason
{
SYNC_GPU_OTHER,
SYNC_GPU_WRAPAROUND,
SYNC_GPU_EFB_POKE,
SYNC_GPU_PERFQUERY,
SYNC_GPU_BBOX,
SYNC_GPU_SWAP,
SYNC_GPU_AUX_SPACE,
SYNC_GPU_OTHER,
SYNC_GPU_WRAPAROUND,
SYNC_GPU_EFB_POKE,
SYNC_GPU_PERFQUERY,
SYNC_GPU_BBOX,
SYNC_GPU_SWAP,
SYNC_GPU_AUX_SPACE,
};
// In deterministic GPU thread mode this waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
@ -48,4 +47,4 @@ void ResetVideoBuffer();
void SetRendering(bool bEnabled);
bool WillSkipCurrentFrame();
} // namespace Fifo
} // namespace Fifo

View file

@ -2,18 +2,21 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/FramebufferManagerBase.h"
#include <algorithm>
#include <array>
#include <memory>
#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/VideoConfig.h"
std::unique_ptr<FramebufferManagerBase> g_framebuffer_manager;
std::unique_ptr<XFBSourceBase> FramebufferManagerBase::m_realXFBSource; // Only used in Real XFB mode
FramebufferManagerBase::VirtualXFBListType FramebufferManagerBase::m_virtualXFBList; // Only used in Virtual XFB mode
std::array<const XFBSourceBase*, FramebufferManagerBase::MAX_VIRTUAL_XFB> FramebufferManagerBase::m_overlappingXFBArray;
std::unique_ptr<XFBSourceBase>
FramebufferManagerBase::m_realXFBSource; // Only used in Real XFB mode
FramebufferManagerBase::VirtualXFBListType
FramebufferManagerBase::m_virtualXFBList; // Only used in Virtual XFB mode
std::array<const XFBSourceBase*, FramebufferManagerBase::MAX_VIRTUAL_XFB>
FramebufferManagerBase::m_overlappingXFBArray;
unsigned int FramebufferManagerBase::s_last_xfb_width = 1;
unsigned int FramebufferManagerBase::s_last_xfb_height = 1;
@ -22,229 +25,240 @@ unsigned int FramebufferManagerBase::m_EFBLayers = 1;
FramebufferManagerBase::FramebufferManagerBase()
{
// Can't hurt
m_overlappingXFBArray.fill(nullptr);
// Can't hurt
m_overlappingXFBArray.fill(nullptr);
}
FramebufferManagerBase::~FramebufferManagerBase()
{
// Necessary, as these are static members
// (they really shouldn't be and should be refactored at some point).
m_virtualXFBList.clear();
m_realXFBSource.reset();
// Necessary, as these are static members
// (they really shouldn't be and should be refactored at some point).
m_virtualXFBList.clear();
m_realXFBSource.reset();
}
const XFBSourceBase* const* FramebufferManagerBase::GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCountP)
const XFBSourceBase* const* FramebufferManagerBase::GetXFBSource(u32 xfbAddr, u32 fbWidth,
u32 fbHeight, u32* xfbCountP)
{
if (!g_ActiveConfig.bUseXFB)
return nullptr;
if (!g_ActiveConfig.bUseXFB)
return nullptr;
if (g_ActiveConfig.bUseRealXFB)
return GetRealXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
else
return GetVirtualXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
if (g_ActiveConfig.bUseRealXFB)
return GetRealXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
else
return GetVirtualXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
}
const XFBSourceBase* const* FramebufferManagerBase::GetRealXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCountP)
const XFBSourceBase* const* FramebufferManagerBase::GetRealXFBSource(u32 xfbAddr, u32 fbWidth,
u32 fbHeight, u32* xfbCountP)
{
*xfbCountP = 1;
*xfbCountP = 1;
// recreate if needed
if (m_realXFBSource && (m_realXFBSource->texWidth != fbWidth || m_realXFBSource->texHeight != fbHeight))
m_realXFBSource.reset();
// recreate if needed
if (m_realXFBSource &&
(m_realXFBSource->texWidth != fbWidth || m_realXFBSource->texHeight != fbHeight))
m_realXFBSource.reset();
if (!m_realXFBSource && g_framebuffer_manager)
m_realXFBSource = g_framebuffer_manager->CreateXFBSource(fbWidth, fbHeight, 1);
if (!m_realXFBSource && g_framebuffer_manager)
m_realXFBSource = g_framebuffer_manager->CreateXFBSource(fbWidth, fbHeight, 1);
if (!m_realXFBSource)
return nullptr;
if (!m_realXFBSource)
return nullptr;
m_realXFBSource->srcAddr = xfbAddr;
m_realXFBSource->srcAddr = xfbAddr;
m_realXFBSource->srcWidth = MAX_XFB_WIDTH;
m_realXFBSource->srcHeight = MAX_XFB_HEIGHT;
m_realXFBSource->srcWidth = MAX_XFB_WIDTH;
m_realXFBSource->srcHeight = MAX_XFB_HEIGHT;
m_realXFBSource->texWidth = fbWidth;
m_realXFBSource->texHeight = fbHeight;
m_realXFBSource->texWidth = fbWidth;
m_realXFBSource->texHeight = fbHeight;
m_realXFBSource->sourceRc.left = 0;
m_realXFBSource->sourceRc.top = 0;
m_realXFBSource->sourceRc.right = fbWidth;
m_realXFBSource->sourceRc.bottom = fbHeight;
m_realXFBSource->sourceRc.left = 0;
m_realXFBSource->sourceRc.top = 0;
m_realXFBSource->sourceRc.right = fbWidth;
m_realXFBSource->sourceRc.bottom = fbHeight;
// Decode YUYV data from GameCube RAM
m_realXFBSource->DecodeToTexture(xfbAddr, fbWidth, fbHeight);
// Decode YUYV data from GameCube RAM
m_realXFBSource->DecodeToTexture(xfbAddr, fbWidth, fbHeight);
m_overlappingXFBArray[0] = m_realXFBSource.get();
return &m_overlappingXFBArray[0];
m_overlappingXFBArray[0] = m_realXFBSource.get();
return &m_overlappingXFBArray[0];
}
const XFBSourceBase* const* FramebufferManagerBase::GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCountP)
const XFBSourceBase* const*
FramebufferManagerBase::GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCountP)
{
u32 xfbCount = 0;
u32 xfbCount = 0;
if (m_virtualXFBList.empty()) // no Virtual XFBs available
return nullptr;
if (m_virtualXFBList.empty()) // no Virtual XFBs available
return nullptr;
u32 srcLower = xfbAddr;
u32 srcUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 srcLower = xfbAddr;
u32 srcUpper = xfbAddr + 2 * fbWidth * fbHeight;
VirtualXFBListType::reverse_iterator
it = m_virtualXFBList.rbegin(),
vlend = m_virtualXFBList.rend();
for (; it != vlend; ++it)
{
VirtualXFB* vxfb = &*it;
VirtualXFBListType::reverse_iterator it = m_virtualXFBList.rbegin(),
vlend = m_virtualXFBList.rend();
for (; it != vlend; ++it)
{
VirtualXFB* vxfb = &*it;
u32 dstLower = vxfb->xfbAddr;
u32 dstUpper = vxfb->xfbAddr + 2 * vxfb->xfbWidth * vxfb->xfbHeight;
u32 dstLower = vxfb->xfbAddr;
u32 dstUpper = vxfb->xfbAddr + 2 * vxfb->xfbWidth * vxfb->xfbHeight;
if (AddressRangesOverlap(srcLower, srcUpper, dstLower, dstUpper))
{
m_overlappingXFBArray[xfbCount] = vxfb->xfbSource.get();
++xfbCount;
}
}
if (AddressRangesOverlap(srcLower, srcUpper, dstLower, dstUpper))
{
m_overlappingXFBArray[xfbCount] = vxfb->xfbSource.get();
++xfbCount;
}
}
*xfbCountP = xfbCount;
return &m_overlappingXFBArray[0];
*xfbCountP = xfbCount;
return &m_overlappingXFBArray[0];
}
void FramebufferManagerBase::CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
void FramebufferManagerBase::CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight,
const EFBRectangle& sourceRc, float Gamma)
{
if (g_ActiveConfig.bUseRealXFB)
{
if (g_framebuffer_manager)
g_framebuffer_manager->CopyToRealXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
else
{
CopyToVirtualXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
if (g_ActiveConfig.bUseRealXFB)
{
if (g_framebuffer_manager)
g_framebuffer_manager->CopyToRealXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
else
{
CopyToVirtualXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
}
void FramebufferManagerBase::CopyToVirtualXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
void FramebufferManagerBase::CopyToVirtualXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight,
const EFBRectangle& sourceRc, float Gamma)
{
if (!g_framebuffer_manager)
return;
if (!g_framebuffer_manager)
return;
VirtualXFBListType::iterator vxfb = FindVirtualXFB(xfbAddr, sourceRc.GetWidth(), fbHeight);
VirtualXFBListType::iterator vxfb = FindVirtualXFB(xfbAddr, sourceRc.GetWidth(), fbHeight);
if (m_virtualXFBList.end() == vxfb)
{
if (m_virtualXFBList.size() < MAX_VIRTUAL_XFB)
{
// create a new Virtual XFB and place it at the front of the list
m_virtualXFBList.emplace_front();
vxfb = m_virtualXFBList.begin();
}
else
{
// Replace the last virtual XFB
--vxfb;
}
}
//else // replace existing virtual XFB
if (m_virtualXFBList.end() == vxfb)
{
if (m_virtualXFBList.size() < MAX_VIRTUAL_XFB)
{
// create a new Virtual XFB and place it at the front of the list
m_virtualXFBList.emplace_front();
vxfb = m_virtualXFBList.begin();
}
else
{
// Replace the last virtual XFB
--vxfb;
}
}
// else // replace existing virtual XFB
// move this Virtual XFB to the front of the list.
if (m_virtualXFBList.begin() != vxfb)
m_virtualXFBList.splice(m_virtualXFBList.begin(), m_virtualXFBList, vxfb);
// move this Virtual XFB to the front of the list.
if (m_virtualXFBList.begin() != vxfb)
m_virtualXFBList.splice(m_virtualXFBList.begin(), m_virtualXFBList, vxfb);
unsigned int target_width, target_height;
g_framebuffer_manager->GetTargetSize(&target_width, &target_height);
unsigned int target_width, target_height;
g_framebuffer_manager->GetTargetSize(&target_width, &target_height);
// recreate if needed
if (vxfb->xfbSource && (vxfb->xfbSource->texWidth != target_width || vxfb->xfbSource->texHeight != target_height))
vxfb->xfbSource.reset();
// recreate if needed
if (vxfb->xfbSource &&
(vxfb->xfbSource->texWidth != target_width || vxfb->xfbSource->texHeight != target_height))
vxfb->xfbSource.reset();
if (!vxfb->xfbSource)
{
vxfb->xfbSource = g_framebuffer_manager->CreateXFBSource(target_width, target_height, m_EFBLayers);
if (!vxfb->xfbSource)
return;
if (!vxfb->xfbSource)
{
vxfb->xfbSource =
g_framebuffer_manager->CreateXFBSource(target_width, target_height, m_EFBLayers);
if (!vxfb->xfbSource)
return;
vxfb->xfbSource->texWidth = target_width;
vxfb->xfbSource->texHeight = target_height;
}
vxfb->xfbSource->texWidth = target_width;
vxfb->xfbSource->texHeight = target_height;
}
vxfb->xfbSource->srcAddr = vxfb->xfbAddr = xfbAddr;
vxfb->xfbSource->srcWidth = vxfb->xfbWidth = sourceRc.GetWidth();
vxfb->xfbSource->srcHeight = vxfb->xfbHeight = fbHeight;
vxfb->xfbSource->srcAddr = vxfb->xfbAddr = xfbAddr;
vxfb->xfbSource->srcWidth = vxfb->xfbWidth = sourceRc.GetWidth();
vxfb->xfbSource->srcHeight = vxfb->xfbHeight = fbHeight;
vxfb->xfbSource->sourceRc = g_renderer->ConvertEFBRectangle(sourceRc);
vxfb->xfbSource->sourceRc = g_renderer->ConvertEFBRectangle(sourceRc);
// keep stale XFB data from being used
ReplaceVirtualXFB();
// keep stale XFB data from being used
ReplaceVirtualXFB();
// Copy EFB data to XFB and restore render target again
vxfb->xfbSource->CopyEFB(Gamma);
// Copy EFB data to XFB and restore render target again
vxfb->xfbSource->CopyEFB(Gamma);
}
FramebufferManagerBase::VirtualXFBListType::iterator FramebufferManagerBase::FindVirtualXFB(u32 xfbAddr, u32 width, u32 height)
FramebufferManagerBase::VirtualXFBListType::iterator
FramebufferManagerBase::FindVirtualXFB(u32 xfbAddr, u32 width, u32 height)
{
const u32 srcLower = xfbAddr;
const u32 srcUpper = xfbAddr + 2 * width * height;
const u32 srcLower = xfbAddr;
const u32 srcUpper = xfbAddr + 2 * width * height;
return std::find_if(m_virtualXFBList.begin(), m_virtualXFBList.end(), [srcLower, srcUpper](const VirtualXFB& xfb) {
const u32 dstLower = xfb.xfbAddr;
const u32 dstUpper = xfb.xfbAddr + 2 * xfb.xfbWidth * xfb.xfbHeight;
return std::find_if(m_virtualXFBList.begin(), m_virtualXFBList.end(),
[srcLower, srcUpper](const VirtualXFB& xfb) {
const u32 dstLower = xfb.xfbAddr;
const u32 dstUpper = xfb.xfbAddr + 2 * xfb.xfbWidth * xfb.xfbHeight;
return dstLower >= srcLower && dstUpper <= srcUpper;
});
return dstLower >= srcLower && dstUpper <= srcUpper;
});
}
void FramebufferManagerBase::ReplaceVirtualXFB()
{
VirtualXFBListType::iterator it = m_virtualXFBList.begin();
VirtualXFBListType::iterator it = m_virtualXFBList.begin();
const s32 srcLower = it->xfbAddr;
const s32 srcUpper = it->xfbAddr + 2 * it->xfbWidth * it->xfbHeight;
const s32 lineSize = 2 * it->xfbWidth;
const s32 srcLower = it->xfbAddr;
const s32 srcUpper = it->xfbAddr + 2 * it->xfbWidth * it->xfbHeight;
const s32 lineSize = 2 * it->xfbWidth;
++it;
++it;
for (; it != m_virtualXFBList.end(); ++it)
{
s32 dstLower = it->xfbAddr;
s32 dstUpper = it->xfbAddr + 2 * it->xfbWidth * it->xfbHeight;
for (; it != m_virtualXFBList.end(); ++it)
{
s32 dstLower = it->xfbAddr;
s32 dstUpper = it->xfbAddr + 2 * it->xfbWidth * it->xfbHeight;
if (dstLower >= srcLower && dstUpper <= srcUpper)
{
// Invalidate the data
it->xfbAddr = 0;
it->xfbHeight = 0;
it->xfbWidth = 0;
}
else if (AddressRangesOverlap(srcLower, srcUpper, dstLower, dstUpper))
{
s32 upperOverlap = (srcUpper - dstLower) / lineSize;
s32 lowerOverlap = (dstUpper - srcLower) / lineSize;
if (dstLower >= srcLower && dstUpper <= srcUpper)
{
// Invalidate the data
it->xfbAddr = 0;
it->xfbHeight = 0;
it->xfbWidth = 0;
}
else if (AddressRangesOverlap(srcLower, srcUpper, dstLower, dstUpper))
{
s32 upperOverlap = (srcUpper - dstLower) / lineSize;
s32 lowerOverlap = (dstUpper - srcLower) / lineSize;
if (upperOverlap > 0 && lowerOverlap < 0)
{
it->xfbAddr += lineSize * upperOverlap;
it->xfbHeight -= upperOverlap;
}
else if (lowerOverlap > 0)
{
it->xfbHeight -= lowerOverlap;
}
}
}
if (upperOverlap > 0 && lowerOverlap < 0)
{
it->xfbAddr += lineSize * upperOverlap;
it->xfbHeight -= upperOverlap;
}
else if (lowerOverlap > 0)
{
it->xfbHeight -= lowerOverlap;
}
}
}
}
int FramebufferManagerBase::ScaleToVirtualXfbWidth(int x)
{
if (g_ActiveConfig.RealXFBEnabled())
return x;
if (g_ActiveConfig.RealXFBEnabled())
return x;
return x * (int)Renderer::GetTargetRectangle().GetWidth() / (int)FramebufferManagerBase::LastXfbWidth();
return x * (int)Renderer::GetTargetRectangle().GetWidth() /
(int)FramebufferManagerBase::LastXfbWidth();
}
int FramebufferManagerBase::ScaleToVirtualXfbHeight(int y)
{
if (g_ActiveConfig.RealXFBEnabled())
return y;
if (g_ActiveConfig.RealXFBEnabled())
return y;
return y * (int)Renderer::GetTargetRectangle().GetHeight() / (int)FramebufferManagerBase::LastXfbHeight();
return y * (int)Renderer::GetTargetRectangle().GetHeight() /
(int)FramebufferManagerBase::LastXfbHeight();
}

View file

@ -13,96 +13,97 @@
inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
{
return !((aLower >= bUpper) || (bLower >= aUpper));
return !((aLower >= bUpper) || (bLower >= aUpper));
}
struct XFBSourceBase
{
virtual ~XFBSourceBase() {}
virtual ~XFBSourceBase() {}
virtual void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) = 0;
virtual void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) = 0;
virtual void CopyEFB(float Gamma) = 0;
virtual void CopyEFB(float Gamma) = 0;
u32 srcAddr;
u32 srcWidth;
u32 srcHeight;
u32 srcAddr;
u32 srcWidth;
u32 srcHeight;
unsigned int texWidth;
unsigned int texHeight;
unsigned int texWidth;
unsigned int texHeight;
// TODO: only used by OGL
TargetRectangle sourceRc;
// TODO: only used by OGL
TargetRectangle sourceRc;
};
class FramebufferManagerBase
{
public:
enum
{
// There may be multiple XFBs in GameCube RAM. This is the maximum number to
// virtualize.
MAX_VIRTUAL_XFB = 8
};
enum
{
// There may be multiple XFBs in GameCube RAM. This is the maximum number to
// virtualize.
MAX_VIRTUAL_XFB = 8
};
FramebufferManagerBase();
virtual ~FramebufferManagerBase();
FramebufferManagerBase();
virtual ~FramebufferManagerBase();
static void CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,float Gamma);
static const XFBSourceBase* const* GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCount);
static void CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma);
static const XFBSourceBase* const* GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static void SetLastXfbWidth(unsigned int width) { s_last_xfb_width = width; }
static void SetLastXfbHeight(unsigned int height) { s_last_xfb_height = height; }
static unsigned int LastXfbWidth() { return s_last_xfb_width; }
static unsigned int LastXfbHeight() { return s_last_xfb_height; }
static int ScaleToVirtualXfbWidth(int x);
static int ScaleToVirtualXfbHeight(int y);
static unsigned int GetEFBLayers() { return m_EFBLayers; }
static void SetLastXfbWidth(unsigned int width) { s_last_xfb_width = width; }
static void SetLastXfbHeight(unsigned int height) { s_last_xfb_height = height; }
static unsigned int LastXfbWidth() { return s_last_xfb_width; }
static unsigned int LastXfbHeight() { return s_last_xfb_height; }
static int ScaleToVirtualXfbWidth(int x);
static int ScaleToVirtualXfbHeight(int y);
static unsigned int GetEFBLayers() { return m_EFBLayers; }
protected:
struct VirtualXFB
{
VirtualXFB()
{
}
struct VirtualXFB
{
VirtualXFB() {}
// Address and size in GameCube RAM
u32 xfbAddr = 0;
u32 xfbWidth = 0;
u32 xfbHeight = 0;
// Address and size in GameCube RAM
u32 xfbAddr = 0;
u32 xfbWidth = 0;
u32 xfbHeight = 0;
std::unique_ptr<XFBSourceBase> xfbSource;
};
std::unique_ptr<XFBSourceBase> xfbSource;
};
typedef std::list<VirtualXFB> VirtualXFBListType;
typedef std::list<VirtualXFB> VirtualXFBListType;
static unsigned int m_EFBLayers;
static unsigned int m_EFBLayers;
private:
virtual std::unique_ptr<XFBSourceBase> CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) = 0;
// TODO: figure out why OGL is different for this guy
virtual void GetTargetSize(unsigned int *width, unsigned int *height) = 0;
virtual std::unique_ptr<XFBSourceBase>
CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) = 0;
// TODO: figure out why OGL is different for this guy
virtual void GetTargetSize(unsigned int* width, unsigned int* height) = 0;
static VirtualXFBListType::iterator FindVirtualXFB(u32 xfbAddr, u32 width, u32 height);
static VirtualXFBListType::iterator FindVirtualXFB(u32 xfbAddr, u32 width, u32 height);
static void ReplaceVirtualXFB();
static void ReplaceVirtualXFB();
// TODO: merge these virtual funcs, they are nearly all the same
virtual void CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,float Gamma = 1.0f) = 0;
static void CopyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,float Gamma = 1.0f);
// TODO: merge these virtual funcs, they are nearly all the same
virtual void CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma = 1.0f) = 0;
static void CopyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma = 1.0f);
static const XFBSourceBase* const* GetRealXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCount);
static const XFBSourceBase* const* GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCount);
static const XFBSourceBase* const* GetRealXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static const XFBSourceBase* const* GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static std::unique_ptr<XFBSourceBase> m_realXFBSource; // Only used in Real XFB mode
static VirtualXFBListType m_virtualXFBList; // Only used in Virtual XFB mode
static std::unique_ptr<XFBSourceBase> m_realXFBSource; // Only used in Real XFB mode
static VirtualXFBListType m_virtualXFBList; // Only used in Virtual XFB mode
static std::array<const XFBSourceBase*, MAX_VIRTUAL_XFB> m_overlappingXFBArray;
static std::array<const XFBSourceBase*, MAX_VIRTUAL_XFB> m_overlappingXFBArray;
static unsigned int s_last_xfb_width;
static unsigned int s_last_xfb_height;
static unsigned int s_last_xfb_width;
static unsigned int s_last_xfb_height;
};
extern std::unique_ptr<FramebufferManagerBase> g_framebuffer_manager;

View file

@ -11,321 +11,324 @@
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/VideoConfig.h"
static const char* primitives_ogl[] = {"points", "lines", "triangles"};
static const char* primitives_ogl[] =
{
"points",
"lines",
"triangles"
};
static const char* primitives_d3d[] = {"point", "line", "triangle"};
static const char* primitives_d3d[] =
{
"point",
"line",
"triangle"
};
template <class T>
static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_vertex = false);
template <class T>
static void EndPrimitive(T& out, API_TYPE ApiType);
template<class T> static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_vertex = false);
template<class T> static void EndPrimitive(T& out, API_TYPE ApiType);
template<class T>
template <class T>
static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType)
{
T out;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
geometry_shader_uid_data dummy_data;
geometry_shader_uid_data* uid_data = out.template GetUidData<geometry_shader_uid_data>();
if (uid_data != nullptr)
memset(uid_data, 0, sizeof(*uid_data));
else
uid_data = &dummy_data;
T out;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
geometry_shader_uid_data dummy_data;
geometry_shader_uid_data* uid_data = out.template GetUidData<geometry_shader_uid_data>();
if (uid_data != nullptr)
memset(uid_data, 0, sizeof(*uid_data));
else
uid_data = &dummy_data;
uid_data->primitive_type = primitive_type;
const unsigned int vertex_in = primitive_type + 1;
unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4;
uid_data->primitive_type = primitive_type;
const unsigned int vertex_in = primitive_type + 1;
unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4;
uid_data->wireframe = g_ActiveConfig.bWireFrame;
if (g_ActiveConfig.bWireFrame)
vertex_out++;
uid_data->wireframe = g_ActiveConfig.bWireFrame;
if (g_ActiveConfig.bWireFrame)
vertex_out++;
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (ApiType == API_OPENGL)
{
// Insert layout parameters
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
{
out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[primitive_type], g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
out.Write("layout(%s_strip, max_vertices = %d) out;\n", g_ActiveConfig.bWireFrame ? "line" : "triangle", vertex_out);
}
else
{
out.Write("layout(%s) in;\n", primitives_ogl[primitive_type]);
out.Write("layout(%s_strip, max_vertices = %d) out;\n", g_ActiveConfig.bWireFrame ? "line" : "triangle", g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
}
}
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (ApiType == API_OPENGL)
{
// Insert layout parameters
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
{
out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[primitive_type],
g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
out.Write("layout(%s_strip, max_vertices = %d) out;\n",
g_ActiveConfig.bWireFrame ? "line" : "triangle", vertex_out);
}
else
{
out.Write("layout(%s) in;\n", primitives_ogl[primitive_type]);
out.Write("layout(%s_strip, max_vertices = %d) out;\n",
g_ActiveConfig.bWireFrame ? "line" : "triangle",
g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
}
}
out.Write("%s", s_lighting_struct);
out.Write("%s", s_lighting_struct);
// uniforms
if (ApiType == API_OPENGL)
out.Write("layout(std140%s) uniform GSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 3" : "");
else
out.Write("cbuffer GSBlock {\n");
out.Write(
"\tfloat4 " I_STEREOPARAMS";\n"
"\tfloat4 " I_LINEPTPARAMS";\n"
"\tint4 " I_TEXOFFSET";\n"
"};\n");
// uniforms
if (ApiType == API_OPENGL)
out.Write("layout(std140%s) uniform GSBlock {\n",
g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 3" : "");
else
out.Write("cbuffer GSBlock {\n");
out.Write("\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"};\n");
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, ApiType, "");
out.Write("};\n");
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, ApiType, "");
out.Write("};\n");
if (ApiType == API_OPENGL)
{
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("#define InstanceID gl_InvocationID\n");
if (ApiType == API_OPENGL)
{
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("#define InstanceID gl_InvocationID\n");
out.Write("in VertexData {\n");
GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(true, true));
out.Write("} vs[%d];\n", vertex_in);
out.Write("in VertexData {\n");
GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(true, true));
out.Write("} vs[%d];\n", vertex_in);
out.Write("out VertexData {\n");
GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(true, false));
out.Write("out VertexData {\n");
GenerateVSOutputMembers<T>(out, ApiType, GetInterpolationQualifier(true, false));
if (g_ActiveConfig.iStereoMode > 0)
out.Write("\tflat int layer;\n");
if (g_ActiveConfig.iStereoMode > 0)
out.Write("\tflat int layer;\n");
out.Write("} ps;\n");
out.Write("} ps;\n");
out.Write("void main()\n{\n");
}
else // D3D
{
out.Write("struct VertexData {\n");
out.Write("\tVS_OUTPUT o;\n");
out.Write("void main()\n{\n");
}
else // D3D
{
out.Write("struct VertexData {\n");
out.Write("\tVS_OUTPUT o;\n");
if (g_ActiveConfig.iStereoMode > 0)
out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");
if (g_ActiveConfig.iStereoMode > 0)
out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.Write("};\n");
out.Write("};\n");
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
{
out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output, in uint InstanceID : SV_GSInstanceID)\n{\n", primitives_d3d[primitive_type], vertex_in, g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
}
else
{
out.Write("[maxvertexcount(%d)]\n", g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output)\n{\n", primitives_d3d[primitive_type], vertex_in, g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
}
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
{
out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out,
g_ActiveConfig.iStereoMode > 0 ? 2 : 1);
out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{\n",
primitives_d3d[primitive_type], vertex_in,
g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
}
else
{
out.Write("[maxvertexcount(%d)]\n",
g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out);
out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream<VertexData> output)\n{\n",
primitives_d3d[primitive_type], vertex_in,
g_ActiveConfig.bWireFrame ? "Line" : "Triangle");
}
out.Write("\tVertexData ps;\n");
}
out.Write("\tVertexData ps;\n");
}
if (primitive_type == PRIMITIVE_LINES)
{
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT start, end;\n");
AssignVSOutputMembers(out, "start", "vs[0]");
AssignVSOutputMembers(out, "end", "vs[1]");
}
else
{
out.Write("\tVS_OUTPUT start = o[0];\n");
out.Write("\tVS_OUTPUT end = o[1];\n");
}
if (primitive_type == PRIMITIVE_LINES)
{
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT start, end;\n");
AssignVSOutputMembers(out, "start", "vs[0]");
AssignVSOutputMembers(out, "end", "vs[1]");
}
else
{
out.Write("\tVS_OUTPUT start = o[0];\n");
out.Write("\tVS_OUTPUT end = o[1];\n");
}
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
out.Write(
"\tfloat2 offset;\n"
"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"\tif (" I_LINEPTPARAMS".y * to.y > " I_LINEPTPARAMS".x * to.x) {\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
"\t\toffset = float2(" I_LINEPTPARAMS".z / " I_LINEPTPARAMS".x, 0);\n"
"\t} else {\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
"\t\toffset = float2(0, -" I_LINEPTPARAMS".z / " I_LINEPTPARAMS".y);\n"
"\t}\n");
}
else if (primitive_type == PRIMITIVE_POINTS)
{
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT center;\n");
AssignVSOutputMembers(out, "center", "vs[0]");
}
else
{
out.Write("\tVS_OUTPUT center = o[0];\n");
}
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
out.Write("\tfloat2 offset;\n"
"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
"\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
"\t} else {\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
"\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
"\t}\n");
}
else if (primitive_type == PRIMITIVE_POINTS)
{
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT center;\n");
AssignVSOutputMembers(out, "center", "vs[0]");
}
else
{
out.Write("\tVS_OUTPUT center = o[0];\n");
}
// Offset from center to upper right vertex
// Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS".w / " I_LINEPTPARAMS".x, -" I_LINEPTPARAMS".w / " I_LINEPTPARAMS".y) * center.pos.w;\n");
}
// Offset from center to upper right vertex
// Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n");
}
if (g_ActiveConfig.iStereoMode > 0)
{
// If the GPU supports invocation we don't need a for loop and can simply use the
// invocation identifier to determine which layer we're rendering.
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("\tint eye = InstanceID;\n");
else
out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n");
}
if (g_ActiveConfig.iStereoMode > 0)
{
// If the GPU supports invocation we don't need a for loop and can simply use the
// invocation identifier to determine which layer we're rendering.
if (g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("\tint eye = InstanceID;\n");
else
out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n");
}
if (g_ActiveConfig.bWireFrame)
out.Write("\tVS_OUTPUT first;\n");
if (g_ActiveConfig.bWireFrame)
out.Write("\tVS_OUTPUT first;\n");
out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in);
out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in);
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT f;\n");
AssignVSOutputMembers(out, "f", "vs[i]");
}
else
{
out.Write("\tVS_OUTPUT f = o[i];\n");
}
if (ApiType == API_OPENGL)
{
out.Write("\tVS_OUTPUT f;\n");
AssignVSOutputMembers(out, "f", "vs[i]");
}
else
{
out.Write("\tVS_OUTPUT f = o[i];\n");
}
if (g_ActiveConfig.iStereoMode > 0)
{
// Select the output layer
out.Write("\tps.layer = eye;\n");
if (ApiType == API_OPENGL)
out.Write("\tgl_Layer = eye;\n");
if (g_ActiveConfig.iStereoMode > 0)
{
// Select the output layer
out.Write("\tps.layer = eye;\n");
if (ApiType == API_OPENGL)
out.Write("\tgl_Layer = eye;\n");
// For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
// to the depth of the vertex. We retrieve the depth value from the w-component of the projected
// vertex which contains the negated z-component of the original vertex.
// For negative parallax (out-of-screen effects) we subtract a convergence value from
// the depth value. This results in objects at a distance smaller than the convergence
// distance to seemingly appear in front of the screen.
// This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
out.Write("\tf.pos.x += " I_STEREOPARAMS"[eye] * (f.pos.w - " I_STEREOPARAMS"[2]);\n");
}
// For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
// to the depth of the vertex. We retrieve the depth value from the w-component of the projected
// vertex which contains the negated z-component of the original vertex.
// For negative parallax (out-of-screen effects) we subtract a convergence value from
// the depth value. This results in objects at a distance smaller than the convergence
// distance to seemingly appear in front of the screen.
// This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
out.Write("\tf.pos.x += " I_STEREOPARAMS "[eye] * (f.pos.w - " I_STEREOPARAMS "[2]);\n");
}
if (primitive_type == PRIMITIVE_LINES)
{
out.Write("\tVS_OUTPUT l = f;\n"
"\tVS_OUTPUT r = f;\n");
if (primitive_type == PRIMITIVE_LINES)
{
out.Write("\tVS_OUTPUT l = f;\n"
"\tVS_OUTPUT r = f;\n");
out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
"\tr.pos.xy += offset * r.pos.w;\n");
out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
"\tr.pos.xy += offset * r.pos.w;\n");
out.Write("\tif (" I_TEXOFFSET"[2] != 0) {\n");
out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET"[2]);\n");
out.Write("\tif (" I_TEXOFFSET "[2] != 0) {\n");
out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
out.Write("\tif (((" I_TEXOFFSET"[0] >> %d) & 0x1) != 0)\n", i);
out.Write("\t\tr.tex%d.x += texOffset;\n", i);
}
out.Write("\t}\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
out.Write("\tif (((" I_TEXOFFSET "[0] >> %d) & 0x1) != 0)\n", i);
out.Write("\t\tr.tex%d.x += texOffset;\n", i);
}
out.Write("\t}\n");
EmitVertex<T>(out, "l", ApiType, true);
EmitVertex<T>(out, "r", ApiType);
}
else if (primitive_type == PRIMITIVE_POINTS)
{
out.Write("\tVS_OUTPUT ll = f;\n"
"\tVS_OUTPUT lr = f;\n"
"\tVS_OUTPUT ul = f;\n"
"\tVS_OUTPUT ur = f;\n");
EmitVertex<T>(out, "l", ApiType, true);
EmitVertex<T>(out, "r", ApiType);
}
else if (primitive_type == PRIMITIVE_POINTS)
{
out.Write("\tVS_OUTPUT ll = f;\n"
"\tVS_OUTPUT lr = f;\n"
"\tVS_OUTPUT ul = f;\n"
"\tVS_OUTPUT ur = f;\n");
out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
"\tlr.pos.xy += float2(1,-1) * offset;\n"
"\tul.pos.xy += float2(-1,1) * offset;\n"
"\tur.pos.xy += offset;\n");
out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
"\tlr.pos.xy += float2(1,-1) * offset;\n"
"\tul.pos.xy += float2(-1,1) * offset;\n"
"\tur.pos.xy += offset;\n");
out.Write("\tif (" I_TEXOFFSET"[3] != 0) {\n");
out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET"[3]), 1.0 / float(" I_TEXOFFSET"[3]));\n");
out.Write("\tif (" I_TEXOFFSET "[3] != 0) {\n");
out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
"[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
out.Write("\tif (((" I_TEXOFFSET"[1] >> %d) & 0x1) != 0) {\n", i);
out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i);
out.Write("\t\tlr.tex%d.xy += texOffset;\n", i);
out.Write("\t\tur.tex%d.xy += float2(1,0) * texOffset;\n", i);
out.Write("\t}\n");
}
out.Write("\t}\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
out.Write("\tif (((" I_TEXOFFSET "[1] >> %d) & 0x1) != 0) {\n", i);
out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i);
out.Write("\t\tlr.tex%d.xy += texOffset;\n", i);
out.Write("\t\tur.tex%d.xy += float2(1,0) * texOffset;\n", i);
out.Write("\t}\n");
}
out.Write("\t}\n");
EmitVertex<T>(out, "ll", ApiType, true);
EmitVertex<T>(out, "lr", ApiType);
EmitVertex<T>(out, "ul", ApiType);
EmitVertex<T>(out, "ur", ApiType);
}
else
{
EmitVertex<T>(out, "f", ApiType, true);
}
EmitVertex<T>(out, "ll", ApiType, true);
EmitVertex<T>(out, "lr", ApiType);
EmitVertex<T>(out, "ul", ApiType);
EmitVertex<T>(out, "ur", ApiType);
}
else
{
EmitVertex<T>(out, "f", ApiType, true);
}
out.Write("\t}\n");
out.Write("\t}\n");
EndPrimitive<T>(out, ApiType);
EndPrimitive<T>(out, ApiType);
if (g_ActiveConfig.iStereoMode > 0 && !g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("\t}\n");
if (g_ActiveConfig.iStereoMode > 0 && !g_ActiveConfig.backend_info.bSupportsGSInstancing)
out.Write("\t}\n");
out.Write("}\n");
out.Write("}\n");
return out;
return out;
}
template<class T>
template <class T>
static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_vertex)
{
if (g_ActiveConfig.bWireFrame && first_vertex)
out.Write("\tif (i == 0) first = %s;\n", vertex);
if (g_ActiveConfig.bWireFrame && first_vertex)
out.Write("\tif (i == 0) first = %s;\n", vertex);
if (ApiType == API_OPENGL)
{
out.Write("\tgl_Position = %s.pos;\n", vertex);
AssignVSOutputMembers(out, "ps", vertex);
}
else
{
out.Write("\tps.o = %s;\n", vertex);
}
if (ApiType == API_OPENGL)
{
out.Write("\tgl_Position = %s.pos;\n", vertex);
AssignVSOutputMembers(out, "ps", vertex);
}
else
{
out.Write("\tps.o = %s;\n", vertex);
}
if (ApiType == API_OPENGL)
out.Write("\tEmitVertex();\n");
else
out.Write("\toutput.Append(ps);\n");
if (ApiType == API_OPENGL)
out.Write("\tEmitVertex();\n");
else
out.Write("\toutput.Append(ps);\n");
}
template<class T>
template <class T>
static void EndPrimitive(T& out, API_TYPE ApiType)
{
if (g_ActiveConfig.bWireFrame)
EmitVertex<T>(out, "first", ApiType);
if (g_ActiveConfig.bWireFrame)
EmitVertex<T>(out, "first", ApiType);
if (ApiType == API_OPENGL)
out.Write("\tEndPrimitive();\n");
else
out.Write("\toutput.RestartStrip();\n");
if (ApiType == API_OPENGL)
out.Write("\tEndPrimitive();\n");
else
out.Write("\toutput.RestartStrip();\n");
}
GeometryShaderUid GetGeometryShaderUid(u32 primitive_type, API_TYPE ApiType)
{
return GenerateGeometryShader<GeometryShaderUid>(primitive_type, ApiType);
return GenerateGeometryShader<GeometryShaderUid>(primitive_type, ApiType);
}
ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType)
{
return GenerateGeometryShader<ShaderCode>(primitive_type, ApiType);
return GenerateGeometryShader<ShaderCode>(primitive_type, ApiType);
}

View file

@ -12,14 +12,17 @@
struct geometry_shader_uid_data
{
u32 NumValues() const { return sizeof(geometry_shader_uid_data); }
bool IsPassthrough() const { return primitive_type == PRIMITIVE_TRIANGLES && !stereo && !wireframe; }
u32 NumValues() const { return sizeof(geometry_shader_uid_data); }
bool IsPassthrough() const
{
return primitive_type == PRIMITIVE_TRIANGLES && !stereo && !wireframe;
}
u32 stereo : 1;
u32 numTexGens : 4;
u32 pixel_lighting : 1;
u32 primitive_type : 2;
u32 wireframe : 1;
u32 stereo : 1;
u32 numTexGens : 4;
u32 pixel_lighting : 1;
u32 primitive_type : 2;
u32 wireframe : 1;
};
#pragma pack()

View file

@ -11,9 +11,7 @@
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
static const int LINE_PT_TEX_OFFSETS[8] = {
0, 16, 8, 4, 2, 1, 1, 1
};
static const int LINE_PT_TEX_OFFSETS[8] = {0, 16, 8, 4, 2, 1, 1, 1};
GeometryShaderConstants GeometryShaderManager::constants;
bool GeometryShaderManager::dirty;
@ -23,13 +21,13 @@ static bool s_viewport_changed;
void GeometryShaderManager::Init()
{
memset(&constants, 0, sizeof(constants));
memset(&constants, 0, sizeof(constants));
// Init any intial constants which aren't zero when bpmem is zero.
SetViewportChanged();
SetProjectionChanged();
// Init any intial constants which aren't zero when bpmem is zero.
SetViewportChanged();
SetProjectionChanged();
dirty = true;
dirty = true;
}
void GeometryShaderManager::Shutdown()
@ -38,87 +36,89 @@ void GeometryShaderManager::Shutdown()
void GeometryShaderManager::Dirty()
{
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_projection_changed = true;
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_projection_changed = true;
dirty = true;
dirty = true;
}
void GeometryShaderManager::SetConstants()
{
if (s_projection_changed && g_ActiveConfig.iStereoMode > 0)
{
s_projection_changed = false;
if (s_projection_changed && g_ActiveConfig.iStereoMode > 0)
{
s_projection_changed = false;
if (xfmem.projection.type == GX_PERSPECTIVE)
{
float offset = (g_ActiveConfig.iStereoDepth / 1000.0f) * (g_ActiveConfig.iStereoDepthPercentage / 100.0f);
constants.stereoparams[0] = g_ActiveConfig.bStereoSwapEyes ? offset : -offset;
constants.stereoparams[1] = g_ActiveConfig.bStereoSwapEyes ? -offset : offset;
}
else
{
constants.stereoparams[0] = constants.stereoparams[1] = 0;
}
if (xfmem.projection.type == GX_PERSPECTIVE)
{
float offset = (g_ActiveConfig.iStereoDepth / 1000.0f) *
(g_ActiveConfig.iStereoDepthPercentage / 100.0f);
constants.stereoparams[0] = g_ActiveConfig.bStereoSwapEyes ? offset : -offset;
constants.stereoparams[1] = g_ActiveConfig.bStereoSwapEyes ? -offset : offset;
}
else
{
constants.stereoparams[0] = constants.stereoparams[1] = 0;
}
constants.stereoparams[2] = (float)(g_ActiveConfig.iStereoConvergence * (g_ActiveConfig.iStereoConvergencePercentage / 100.0f));
constants.stereoparams[2] = (float)(g_ActiveConfig.iStereoConvergence *
(g_ActiveConfig.iStereoConvergencePercentage / 100.0f));
dirty = true;
}
dirty = true;
}
if (s_viewport_changed)
{
s_viewport_changed = false;
if (s_viewport_changed)
{
s_viewport_changed = false;
constants.lineptparams[0] = 2.0f * xfmem.viewport.wd;
constants.lineptparams[1] = -2.0f * xfmem.viewport.ht;
constants.lineptparams[0] = 2.0f * xfmem.viewport.wd;
constants.lineptparams[1] = -2.0f * xfmem.viewport.ht;
dirty = true;
}
dirty = true;
}
}
void GeometryShaderManager::SetViewportChanged()
{
s_viewport_changed = true;
s_viewport_changed = true;
}
void GeometryShaderManager::SetProjectionChanged()
{
s_projection_changed = true;
s_projection_changed = true;
}
void GeometryShaderManager::SetLinePtWidthChanged()
{
constants.lineptparams[2] = bpmem.lineptwidth.linesize / 6.f;
constants.lineptparams[3] = bpmem.lineptwidth.pointsize / 6.f;
constants.texoffset[2] = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.lineoff];
constants.texoffset[3] = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.pointoff];
dirty = true;
constants.lineptparams[2] = bpmem.lineptwidth.linesize / 6.f;
constants.lineptparams[3] = bpmem.lineptwidth.pointsize / 6.f;
constants.texoffset[2] = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.lineoff];
constants.texoffset[3] = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.pointoff];
dirty = true;
}
void GeometryShaderManager::SetTexCoordChanged(u8 texmapid)
{
TCoordInfo& tc = bpmem.texcoords[texmapid];
int bitmask = 1 << texmapid;
constants.texoffset[0] &= ~bitmask;
constants.texoffset[0] |= tc.s.line_offset << texmapid;
constants.texoffset[1] &= ~bitmask;
constants.texoffset[1] |= tc.s.point_offset << texmapid;
dirty = true;
TCoordInfo& tc = bpmem.texcoords[texmapid];
int bitmask = 1 << texmapid;
constants.texoffset[0] &= ~bitmask;
constants.texoffset[0] |= tc.s.line_offset << texmapid;
constants.texoffset[1] &= ~bitmask;
constants.texoffset[1] |= tc.s.point_offset << texmapid;
dirty = true;
}
void GeometryShaderManager::DoState(PointerWrap &p)
void GeometryShaderManager::DoState(PointerWrap& p)
{
p.Do(s_projection_changed);
p.Do(s_viewport_changed);
p.Do(s_projection_changed);
p.Do(s_viewport_changed);
p.Do(constants);
p.Do(constants);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}
}

View file

@ -13,17 +13,17 @@ class PointerWrap;
class GeometryShaderManager
{
public:
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap &p);
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap& p);
static void SetConstants();
static void SetViewportChanged();
static void SetProjectionChanged();
static void SetLinePtWidthChanged();
static void SetTexCoordChanged(u8 texmapid);
static void SetConstants();
static void SetViewportChanged();
static void SetProjectionChanged();
static void SetLinePtWidthChanged();
static void SetTexCoordChanged(u8 texmapid);
static GeometryShaderConstants constants;
static bool dirty;
static GeometryShaderConstants constants;
static bool dirty;
};

View file

@ -2,6 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <SOIL/SOIL.h>
#include <algorithm>
#include <cinttypes>
#include <cstring>
@ -13,17 +14,16 @@
#include <utility>
#include <vector>
#include <xxhash.h>
#include <SOIL/SOIL.h>
#include "Common/FileSearch.h"
#include "Common/FileUtil.h"
#include "Common/Flag.h"
#include "Common/Hash.h"
#include "Common/Logging/Log.h"
#include "Common/MemoryUtil.h"
#include "Common/StringUtil.h"
#include "Common/Thread.h"
#include "Common/Timer.h"
#include "Common/Logging/Log.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/HiresTextures.h"
#include "VideoCommon/OnScreenDisplay.h"
@ -32,7 +32,7 @@
static std::unordered_map<std::string, std::string> s_textureMap;
static std::unordered_map<std::string, std::shared_ptr<HiresTexture>> s_textureCache;
static std::mutex s_textureCacheMutex;
static std::mutex s_textureCacheAquireMutex; // for high priority access
static std::mutex s_textureCacheAquireMutex; // for high priority access
static Common::Flag s_textureCacheAbortLoading;
static bool s_check_native_format;
static bool s_check_new_format;
@ -41,412 +41,442 @@ static std::thread s_prefetcher;
static const std::string s_format_prefix = "tex1_";
HiresTexture::Level::Level()
: data(nullptr, SOIL_free_image_data)
HiresTexture::Level::Level() : data(nullptr, SOIL_free_image_data)
{
}
void HiresTexture::Init()
{
s_check_native_format = false;
s_check_new_format = false;
s_check_native_format = false;
s_check_new_format = false;
Update();
Update();
}
void HiresTexture::Shutdown()
{
if (s_prefetcher.joinable())
{
s_textureCacheAbortLoading.Set();
s_prefetcher.join();
}
if (s_prefetcher.joinable())
{
s_textureCacheAbortLoading.Set();
s_prefetcher.join();
}
s_textureMap.clear();
s_textureCache.clear();
s_textureMap.clear();
s_textureCache.clear();
}
void HiresTexture::Update()
{
if (s_prefetcher.joinable())
{
s_textureCacheAbortLoading.Set();
s_prefetcher.join();
}
if (s_prefetcher.joinable())
{
s_textureCacheAbortLoading.Set();
s_prefetcher.join();
}
if (!g_ActiveConfig.bHiresTextures)
{
s_textureMap.clear();
s_textureCache.clear();
return;
}
if (!g_ActiveConfig.bHiresTextures)
{
s_textureMap.clear();
s_textureCache.clear();
return;
}
if (!g_ActiveConfig.bCacheHiresTextures)
{
s_textureCache.clear();
}
if (!g_ActiveConfig.bCacheHiresTextures)
{
s_textureCache.clear();
}
const std::string& game_id = SConfig::GetInstance().m_strUniqueID;
const std::string texture_directory = GetTextureDirectory(game_id);
std::vector<std::string> extensions {
".png",
".bmp",
".tga",
".dds",
".jpg" // Why not? Could be useful for large photo-like textures
};
const std::string& game_id = SConfig::GetInstance().m_strUniqueID;
const std::string texture_directory = GetTextureDirectory(game_id);
std::vector<std::string> extensions{
".png", ".bmp", ".tga", ".dds",
".jpg" // Why not? Could be useful for large photo-like textures
};
std::vector<std::string> filenames = DoFileSearch(extensions, {texture_directory}, /*recursive*/ true);
std::vector<std::string> filenames =
DoFileSearch(extensions, {texture_directory}, /*recursive*/ true);
const std::string code = game_id + "_";
const std::string code = game_id + "_";
for (auto& rFilename : filenames)
{
std::string FileName;
SplitPath(rFilename, nullptr, &FileName, nullptr);
for (auto& rFilename : filenames)
{
std::string FileName;
SplitPath(rFilename, nullptr, &FileName, nullptr);
if (FileName.substr(0, code.length()) == code)
{
s_textureMap[FileName] = rFilename;
s_check_native_format = true;
}
if (FileName.substr(0, code.length()) == code)
{
s_textureMap[FileName] = rFilename;
s_check_native_format = true;
}
if (FileName.substr(0, s_format_prefix.length()) == s_format_prefix)
{
s_textureMap[FileName] = rFilename;
s_check_new_format = true;
}
}
if (FileName.substr(0, s_format_prefix.length()) == s_format_prefix)
{
s_textureMap[FileName] = rFilename;
s_check_new_format = true;
}
}
if (g_ActiveConfig.bCacheHiresTextures)
{
// remove cached but deleted textures
auto iter = s_textureCache.begin();
while (iter != s_textureCache.end())
{
if (s_textureMap.find(iter->first) == s_textureMap.end())
{
iter = s_textureCache.erase(iter);
}
else
{
iter++;
}
}
if (g_ActiveConfig.bCacheHiresTextures)
{
// remove cached but deleted textures
auto iter = s_textureCache.begin();
while (iter != s_textureCache.end())
{
if (s_textureMap.find(iter->first) == s_textureMap.end())
{
iter = s_textureCache.erase(iter);
}
else
{
iter++;
}
}
s_textureCacheAbortLoading.Clear();
s_prefetcher = std::thread(Prefetch);
}
s_textureCacheAbortLoading.Clear();
s_prefetcher = std::thread(Prefetch);
}
}
void HiresTexture::Prefetch()
{
Common::SetCurrentThreadName("Prefetcher");
Common::SetCurrentThreadName("Prefetcher");
size_t size_sum = 0;
size_t sys_mem = MemPhysical();
size_t recommended_min_mem = 2 * size_t(1024 * 1024 * 1024);
// keep 2GB memory for system stability if system RAM is 4GB+ - use half of memory in other cases
size_t max_mem = (sys_mem / 2 < recommended_min_mem) ? (sys_mem / 2) : (sys_mem - recommended_min_mem);
u32 starttime = Common::Timer::GetTimeMs();
for (const auto& entry : s_textureMap)
{
const std::string& base_filename = entry.first;
size_t size_sum = 0;
size_t sys_mem = MemPhysical();
size_t recommended_min_mem = 2 * size_t(1024 * 1024 * 1024);
// keep 2GB memory for system stability if system RAM is 4GB+ - use half of memory in other cases
size_t max_mem =
(sys_mem / 2 < recommended_min_mem) ? (sys_mem / 2) : (sys_mem - recommended_min_mem);
u32 starttime = Common::Timer::GetTimeMs();
for (const auto& entry : s_textureMap)
{
const std::string& base_filename = entry.first;
if (base_filename.find("_mip") == std::string::npos)
{
{
// try to get this mutex first, so the video thread is allow to get the real mutex faster
std::unique_lock<std::mutex> lk(s_textureCacheAquireMutex);
}
std::unique_lock<std::mutex> lk(s_textureCacheMutex);
if (base_filename.find("_mip") == std::string::npos)
{
{
// try to get this mutex first, so the video thread is allow to get the real mutex faster
std::unique_lock<std::mutex> lk(s_textureCacheAquireMutex);
}
std::unique_lock<std::mutex> lk(s_textureCacheMutex);
auto iter = s_textureCache.find(base_filename);
if (iter == s_textureCache.end())
{
// unlock while loading a texture. This may result in a race condition where we'll load a texture twice,
// but it reduces the stuttering a lot. Notice: The loading library _must_ be thread safe now.
// But bad luck, SOIL isn't, so TODO: remove SOIL usage here and use libpng directly
// Also TODO: remove s_textureCacheAquireMutex afterwards. It won't be needed as the main mutex will be locked rarely
//lk.unlock();
std::unique_ptr<HiresTexture> texture = Load(base_filename, 0, 0);
//lk.lock();
if (texture)
{
std::shared_ptr<HiresTexture> ptr(std::move(texture));
iter = s_textureCache.insert(iter, std::make_pair(base_filename, ptr));
}
}
if (iter != s_textureCache.end())
{
for (const Level& l : iter->second->m_levels)
{
size_sum += l.data_size;
}
}
}
auto iter = s_textureCache.find(base_filename);
if (iter == s_textureCache.end())
{
// unlock while loading a texture. This may result in a race condition where we'll load a
// texture twice,
// but it reduces the stuttering a lot. Notice: The loading library _must_ be thread safe
// now.
// But bad luck, SOIL isn't, so TODO: remove SOIL usage here and use libpng directly
// Also TODO: remove s_textureCacheAquireMutex afterwards. It won't be needed as the main
// mutex will be locked rarely
// lk.unlock();
std::unique_ptr<HiresTexture> texture = Load(base_filename, 0, 0);
// lk.lock();
if (texture)
{
std::shared_ptr<HiresTexture> ptr(std::move(texture));
iter = s_textureCache.insert(iter, std::make_pair(base_filename, ptr));
}
}
if (iter != s_textureCache.end())
{
for (const Level& l : iter->second->m_levels)
{
size_sum += l.data_size;
}
}
}
if (s_textureCacheAbortLoading.IsSet())
{
return;
}
if (s_textureCacheAbortLoading.IsSet())
{
return;
}
if (size_sum > max_mem)
{
g_Config.bCacheHiresTextures = false;
if (size_sum > max_mem)
{
g_Config.bCacheHiresTextures = false;
OSD::AddMessage(StringFromFormat("Custom Textures prefetching after %.1f MB aborted, not enough RAM available", size_sum / (1024.0 * 1024.0)), 10000);
return;
}
}
u32 stoptime = Common::Timer::GetTimeMs();
OSD::AddMessage(StringFromFormat("Custom Textures loaded, %.1f MB in %.1f s", size_sum / (1024.0 * 1024.0), (stoptime - starttime) / 1000.0), 10000);
OSD::AddMessage(
StringFromFormat(
"Custom Textures prefetching after %.1f MB aborted, not enough RAM available",
size_sum / (1024.0 * 1024.0)),
10000);
return;
}
}
u32 stoptime = Common::Timer::GetTimeMs();
OSD::AddMessage(StringFromFormat("Custom Textures loaded, %.1f MB in %.1f s",
size_sum / (1024.0 * 1024.0), (stoptime - starttime) / 1000.0),
10000);
}
std::string HiresTexture::GenBaseName(const u8* texture, size_t texture_size, const u8* tlut, size_t tlut_size, u32 width, u32 height, int format, bool has_mipmaps, bool dump)
std::string HiresTexture::GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format,
bool has_mipmaps, bool dump)
{
std::string name = "";
bool convert = false;
if (!dump && s_check_native_format)
{
// try to load the old format first
u64 tex_hash = GetHashHiresTexture(texture, (int)texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
u64 tlut_hash = tlut_size ? GetHashHiresTexture(tlut, (int)tlut_size, g_ActiveConfig.iSafeTextureCache_ColorSamples) : 0;
name = StringFromFormat("%s_%08x_%i", SConfig::GetInstance().m_strUniqueID.c_str(), (u32)(tex_hash ^ tlut_hash), (u16)format);
if (s_textureMap.find(name) != s_textureMap.end())
{
if (g_ActiveConfig.bConvertHiresTextures)
convert = true;
else
return name;
}
}
std::string name = "";
bool convert = false;
if (!dump && s_check_native_format)
{
// try to load the old format first
u64 tex_hash = GetHashHiresTexture(texture, (int)texture_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples);
u64 tlut_hash = tlut_size ? GetHashHiresTexture(tlut, (int)tlut_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples) :
0;
name = StringFromFormat("%s_%08x_%i", SConfig::GetInstance().m_strUniqueID.c_str(),
(u32)(tex_hash ^ tlut_hash), (u16)format);
if (s_textureMap.find(name) != s_textureMap.end())
{
if (g_ActiveConfig.bConvertHiresTextures)
convert = true;
else
return name;
}
}
if (dump || s_check_new_format || convert)
{
// checking for min/max on paletted textures
u32 min = 0xffff;
u32 max = 0;
switch(tlut_size)
{
case 0: break;
case 16 * 2:
for (size_t i = 0; i < texture_size; i++)
{
min = std::min<u32>(min, texture[i] & 0xf);
min = std::min<u32>(min, texture[i] >> 4);
max = std::max<u32>(max, texture[i] & 0xf);
max = std::max<u32>(max, texture[i] >> 4);
}
break;
case 256 * 2:
for (size_t i = 0; i < texture_size; i++)
{
min = std::min<u32>(min, texture[i]);
max = std::max<u32>(max, texture[i]);
}
break;
case 16384 * 2:
for (size_t i = 0; i < texture_size/2; i++)
{
min = std::min<u32>(min, Common::swap16(((u16*)texture)[i]) & 0x3fff);
max = std::max<u32>(max, Common::swap16(((u16*)texture)[i]) & 0x3fff);
}
break;
}
if (tlut_size > 0)
{
tlut_size = 2 * (max + 1 - min);
tlut += 2 * min;
}
if (dump || s_check_new_format || convert)
{
// checking for min/max on paletted textures
u32 min = 0xffff;
u32 max = 0;
switch (tlut_size)
{
case 0:
break;
case 16 * 2:
for (size_t i = 0; i < texture_size; i++)
{
min = std::min<u32>(min, texture[i] & 0xf);
min = std::min<u32>(min, texture[i] >> 4);
max = std::max<u32>(max, texture[i] & 0xf);
max = std::max<u32>(max, texture[i] >> 4);
}
break;
case 256 * 2:
for (size_t i = 0; i < texture_size; i++)
{
min = std::min<u32>(min, texture[i]);
max = std::max<u32>(max, texture[i]);
}
break;
case 16384 * 2:
for (size_t i = 0; i < texture_size / 2; i++)
{
min = std::min<u32>(min, Common::swap16(((u16*)texture)[i]) & 0x3fff);
max = std::max<u32>(max, Common::swap16(((u16*)texture)[i]) & 0x3fff);
}
break;
}
if (tlut_size > 0)
{
tlut_size = 2 * (max + 1 - min);
tlut += 2 * min;
}
u64 tex_hash = XXH64(texture, texture_size, 0);
u64 tlut_hash = tlut_size ? XXH64(tlut, tlut_size, 0) : 0;
u64 tex_hash = XXH64(texture, texture_size, 0);
u64 tlut_hash = tlut_size ? XXH64(tlut, tlut_size, 0) : 0;
std::string basename = s_format_prefix + StringFromFormat("%dx%d%s_%016" PRIx64, width, height, has_mipmaps ? "_m" : "", tex_hash);
std::string tlutname = tlut_size ? StringFromFormat("_%016" PRIx64, tlut_hash) : "";
std::string formatname = StringFromFormat("_%d", format);
std::string fullname = basename + tlutname + formatname;
std::string basename = s_format_prefix + StringFromFormat("%dx%d%s_%016" PRIx64, width, height,
has_mipmaps ? "_m" : "", tex_hash);
std::string tlutname = tlut_size ? StringFromFormat("_%016" PRIx64, tlut_hash) : "";
std::string formatname = StringFromFormat("_%d", format);
std::string fullname = basename + tlutname + formatname;
for (int level = 0; level < 10 && convert; level++)
{
std::string oldname = name;
if (level)
oldname += StringFromFormat("_mip%d", level);
for (int level = 0; level < 10 && convert; level++)
{
std::string oldname = name;
if (level)
oldname += StringFromFormat("_mip%d", level);
// skip not existing levels
if (s_textureMap.find(oldname) == s_textureMap.end())
continue;
// skip not existing levels
if (s_textureMap.find(oldname) == s_textureMap.end())
continue;
for (int i = 0;; i++)
{
// for hash collisions, padd with an integer
std::string newname = fullname;
if (level)
newname += StringFromFormat("_mip%d", level);
if (i)
newname += StringFromFormat(".%d", i);
for (int i = 0;; i++)
{
// for hash collisions, padd with an integer
std::string newname = fullname;
if (level)
newname += StringFromFormat("_mip%d", level);
if (i)
newname += StringFromFormat(".%d", i);
// new texture
if (s_textureMap.find(newname) == s_textureMap.end())
{
std::string src = s_textureMap[oldname];
size_t postfix = src.find_last_of('.');
std::string dst = src.substr(0, postfix - oldname.length()) + newname + src.substr(postfix, src.length() - postfix);
if (File::Rename(src, dst))
{
s_textureMap.erase(oldname);
s_textureMap[newname] = dst;
s_check_new_format = true;
OSD::AddMessage(StringFromFormat("Rename custom texture %s to %s", oldname.c_str(), newname.c_str()), 5000);
}
else
{
ERROR_LOG(VIDEO, "rename failed");
}
break;
}
else
{
// dst fail already exist, compare content
std::string a, b;
File::ReadFileToString(s_textureMap[oldname], a);
File::ReadFileToString(s_textureMap[newname], b);
// new texture
if (s_textureMap.find(newname) == s_textureMap.end())
{
std::string src = s_textureMap[oldname];
size_t postfix = src.find_last_of('.');
std::string dst = src.substr(0, postfix - oldname.length()) + newname +
src.substr(postfix, src.length() - postfix);
if (File::Rename(src, dst))
{
s_textureMap.erase(oldname);
s_textureMap[newname] = dst;
s_check_new_format = true;
OSD::AddMessage(StringFromFormat("Rename custom texture %s to %s", oldname.c_str(),
newname.c_str()),
5000);
}
else
{
ERROR_LOG(VIDEO, "rename failed");
}
break;
}
else
{
// dst fail already exist, compare content
std::string a, b;
File::ReadFileToString(s_textureMap[oldname], a);
File::ReadFileToString(s_textureMap[newname], b);
if (a == b && a != "")
{
// equal, so remove
if (File::Delete(s_textureMap[oldname]))
{
s_textureMap.erase(oldname);
OSD::AddMessage(StringFromFormat("Delete double old custom texture %s", oldname.c_str()), 5000);
}
else
{
ERROR_LOG(VIDEO, "delete failed");
}
break;
}
if (a == b && a != "")
{
// equal, so remove
if (File::Delete(s_textureMap[oldname]))
{
s_textureMap.erase(oldname);
OSD::AddMessage(
StringFromFormat("Delete double old custom texture %s", oldname.c_str()), 5000);
}
else
{
ERROR_LOG(VIDEO, "delete failed");
}
break;
}
// else continue in this loop with the next higher padding variable
}
}
}
// else continue in this loop with the next higher padding variable
}
}
}
// try to match a wildcard template
if (!dump && s_textureMap.find(basename + "_*" + formatname) != s_textureMap.end())
return basename + "_*" + formatname;
// try to match a wildcard template
if (!dump && s_textureMap.find(basename + "_*" + formatname) != s_textureMap.end())
return basename + "_*" + formatname;
// else generate the complete texture
if (dump || s_textureMap.find(fullname) != s_textureMap.end())
return fullname;
}
// else generate the complete texture
if (dump || s_textureMap.find(fullname) != s_textureMap.end())
return fullname;
}
return name;
return name;
}
std::shared_ptr<HiresTexture> HiresTexture::Search(const u8* texture, size_t texture_size, const u8* tlut, size_t tlut_size, u32 width, u32 height, int format, bool has_mipmaps)
std::shared_ptr<HiresTexture> HiresTexture::Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps)
{
std::string base_filename = GenBaseName(texture, texture_size, tlut, tlut_size, width, height, format, has_mipmaps);
std::string base_filename =
GenBaseName(texture, texture_size, tlut, tlut_size, width, height, format, has_mipmaps);
std::lock_guard<std::mutex> lk2(s_textureCacheAquireMutex);
std::lock_guard<std::mutex> lk(s_textureCacheMutex);
std::lock_guard<std::mutex> lk2(s_textureCacheAquireMutex);
std::lock_guard<std::mutex> lk(s_textureCacheMutex);
auto iter = s_textureCache.find(base_filename);
if (iter != s_textureCache.end())
{
return iter->second;
}
auto iter = s_textureCache.find(base_filename);
if (iter != s_textureCache.end())
{
return iter->second;
}
std::shared_ptr<HiresTexture> ptr(Load(base_filename, width, height));
std::shared_ptr<HiresTexture> ptr(Load(base_filename, width, height));
if (ptr && g_ActiveConfig.bCacheHiresTextures)
{
s_textureCache[base_filename] = ptr;
}
if (ptr && g_ActiveConfig.bCacheHiresTextures)
{
s_textureCache[base_filename] = ptr;
}
return ptr;
return ptr;
}
std::unique_ptr<HiresTexture> HiresTexture::Load(const std::string& base_filename, u32 width, u32 height)
std::unique_ptr<HiresTexture> HiresTexture::Load(const std::string& base_filename, u32 width,
u32 height)
{
std::unique_ptr<HiresTexture> ret;
for (int level = 0;; level++)
{
std::string filename = base_filename;
if (level)
{
filename += StringFromFormat("_mip%u", level);
}
std::unique_ptr<HiresTexture> ret;
for (int level = 0;; level++)
{
std::string filename = base_filename;
if (level)
{
filename += StringFromFormat("_mip%u", level);
}
if (s_textureMap.find(filename) != s_textureMap.end())
{
Level l;
if (s_textureMap.find(filename) != s_textureMap.end())
{
Level l;
File::IOFile file;
file.Open(s_textureMap[filename], "rb");
std::vector<u8> buffer(file.GetSize());
file.ReadBytes(buffer.data(), file.GetSize());
File::IOFile file;
file.Open(s_textureMap[filename], "rb");
std::vector<u8> buffer(file.GetSize());
file.ReadBytes(buffer.data(), file.GetSize());
int channels;
l.data = SOILPointer(SOIL_load_image_from_memory(buffer.data(), (int)buffer.size(), (int*)&l.width, (int*)&l.height, &channels, SOIL_LOAD_RGBA), SOIL_free_image_data);
l.data_size = (size_t)l.width * l.height * 4;
int channels;
l.data =
SOILPointer(SOIL_load_image_from_memory(buffer.data(), (int)buffer.size(), (int*)&l.width,
(int*)&l.height, &channels, SOIL_LOAD_RGBA),
SOIL_free_image_data);
l.data_size = (size_t)l.width * l.height * 4;
if (l.data == nullptr)
{
ERROR_LOG(VIDEO, "Custom texture %s failed to load", filename.c_str());
break;
}
if (l.data == nullptr)
{
ERROR_LOG(VIDEO, "Custom texture %s failed to load", filename.c_str());
break;
}
if (!level)
{
if (l.width * height != l.height * width)
ERROR_LOG(VIDEO, "Invalid custom texture size %dx%d for texture %s. The aspect differs from the native size %dx%d.",
l.width, l.height, filename.c_str(), width, height);
if (width && height && (l.width % width || l.height % height))
WARN_LOG(VIDEO, "Invalid custom texture size %dx%d for texture %s. Please use an integer upscaling factor based on the native size %dx%d.",
l.width, l.height, filename.c_str(), width, height);
width = l.width;
height = l.height;
}
else if (width != l.width || height != l.height)
{
ERROR_LOG(VIDEO, "Invalid custom texture size %dx%d for texture %s. This mipmap layer _must_ be %dx%d.",
l.width, l.height, filename.c_str(), width, height);
l.data.reset();
break;
}
if (!level)
{
if (l.width * height != l.height * width)
ERROR_LOG(VIDEO, "Invalid custom texture size %dx%d for texture %s. The aspect differs "
"from the native size %dx%d.",
l.width, l.height, filename.c_str(), width, height);
if (width && height && (l.width % width || l.height % height))
WARN_LOG(VIDEO, "Invalid custom texture size %dx%d for texture %s. Please use an integer "
"upscaling factor based on the native size %dx%d.",
l.width, l.height, filename.c_str(), width, height);
width = l.width;
height = l.height;
}
else if (width != l.width || height != l.height)
{
ERROR_LOG(
VIDEO,
"Invalid custom texture size %dx%d for texture %s. This mipmap layer _must_ be %dx%d.",
l.width, l.height, filename.c_str(), width, height);
l.data.reset();
break;
}
if (!ret)
ret = std::unique_ptr<HiresTexture>(new HiresTexture);
ret->m_levels.push_back(std::move(l));
if (!ret)
ret = std::unique_ptr<HiresTexture>(new HiresTexture);
ret->m_levels.push_back(std::move(l));
// no more mipmaps available
if (width == 1 && height == 1)
break;
// no more mipmaps available
if (width == 1 && height == 1)
break;
// calculate the size of the next mipmap
width = std::max(1u, width >> 1);
height = std::max(1u, height >> 1);
}
else
{
break;
}
}
// calculate the size of the next mipmap
width = std::max(1u, width >> 1);
height = std::max(1u, height >> 1);
}
else
{
break;
}
}
return ret;
return ret;
}
std::string HiresTexture::GetTextureDirectory(const std::string& game_id)
{
const std::string texture_directory = File::GetUserPath(D_HIRESTEXTURES_IDX) + game_id;
const std::string texture_directory = File::GetUserPath(D_HIRESTEXTURES_IDX) + game_id;
// If there's no directory with the region-specific ID, look for a 3-character region-free one
if (!File::Exists(texture_directory))
return File::GetUserPath(D_HIRESTEXTURES_IDX) + game_id.substr(0, 3);
// If there's no directory with the region-specific ID, look for a 3-character region-free one
if (!File::Exists(texture_directory))
return File::GetUserPath(D_HIRESTEXTURES_IDX) + game_id.substr(0, 3);
return texture_directory;
return texture_directory;
}
HiresTexture::~HiresTexture()

View file

@ -13,46 +13,39 @@
class HiresTexture
{
public:
using SOILPointer = std::unique_ptr<u8, void(*)(unsigned char*)>;
using SOILPointer = std::unique_ptr<u8, void (*)(unsigned char*)>;
static void Init();
static void Update();
static void Shutdown();
static void Init();
static void Update();
static void Shutdown();
static std::shared_ptr<HiresTexture> Search(
const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size,
u32 width, u32 height,
int format, bool has_mipmaps
);
static std::shared_ptr<HiresTexture> Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps);
static std::string GenBaseName(
const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size,
u32 width, u32 height,
int format, bool has_mipmaps,
bool dump = false
);
static std::string GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format,
bool has_mipmaps, bool dump = false);
~HiresTexture();
~HiresTexture();
struct Level
{
Level();
struct Level
{
Level();
SOILPointer data;
size_t data_size = 0;
u32 width = 0;
u32 height = 0;
};
std::vector<Level> m_levels;
SOILPointer data;
size_t data_size = 0;
u32 width = 0;
u32 height = 0;
};
std::vector<Level> m_levels;
private:
static std::unique_ptr<HiresTexture> Load(const std::string& base_filename, u32 width, u32 height);
static void Prefetch();
static std::unique_ptr<HiresTexture> Load(const std::string& base_filename, u32 width,
u32 height);
static void Prefetch();
static std::string GetTextureDirectory(const std::string& game_id);
HiresTexture() {}
static std::string GetTextureDirectory(const std::string& game_id);
HiresTexture() {}
};

View file

@ -5,22 +5,21 @@
#include <list>
#include <string>
#include "png.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/MsgHandler.h"
#include "VideoCommon/ImageWrite.h"
#include "png.h"
bool SaveData(const std::string& filename, const std::string& data)
{
std::ofstream f;
OpenFStream(f, filename, std::ios::binary);
f << data;
std::ofstream f;
OpenFStream(f, filename, std::ios::binary);
f << data;
return true;
return true;
}
/*
TextureToPng
@ -28,86 +27,89 @@ Inputs:
data : This is an array of RGBA with 8 bits per channel. 4 bytes for each pixel.
row_stride: Determines the amount of bytes per row of pixels.
*/
bool TextureToPng(u8* data, int row_stride, const std::string& filename, int width, int height, bool saveAlpha)
bool TextureToPng(u8* data, int row_stride, const std::string& filename, int width, int height,
bool saveAlpha)
{
bool success = false;
bool success = false;
if (!data)
return false;
if (!data)
return false;
char title[] = "Dolphin Screenshot";
char title_key[] = "Title";
png_structp png_ptr = nullptr;
png_infop info_ptr = nullptr;
char title[] = "Dolphin Screenshot";
char title_key[] = "Title";
png_structp png_ptr = nullptr;
png_infop info_ptr = nullptr;
// Open file for writing (binary mode)
File::IOFile fp(filename, "wb");
if (!fp.IsOpen())
{
PanicAlertT("Screenshot failed: Could not open file \"%s\" (error %d)", filename.c_str(), errno);
goto finalise;
}
// Open file for writing (binary mode)
File::IOFile fp(filename, "wb");
if (!fp.IsOpen())
{
PanicAlertT("Screenshot failed: Could not open file \"%s\" (error %d)", filename.c_str(),
errno);
goto finalise;
}
// Initialize write structure
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (png_ptr == nullptr)
{
PanicAlert("Screenshot failed: Could not allocate write struct");
goto finalise;
}
// Initialize write structure
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (png_ptr == nullptr)
{
PanicAlert("Screenshot failed: Could not allocate write struct");
goto finalise;
}
// Initialize info structure
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == nullptr)
{
PanicAlert("Screenshot failed: Could not allocate info struct");
goto finalise;
}
// Initialize info structure
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == nullptr)
{
PanicAlert("Screenshot failed: Could not allocate info struct");
goto finalise;
}
// Setup Exception handling
if (setjmp(png_jmpbuf(png_ptr)))
{
PanicAlert("Screenshot failed: Error during PNG creation");
goto finalise;
}
// Setup Exception handling
if (setjmp(png_jmpbuf(png_ptr)))
{
PanicAlert("Screenshot failed: Error during PNG creation");
goto finalise;
}
png_init_io(png_ptr, fp.GetHandle());
png_init_io(png_ptr, fp.GetHandle());
// Write header (8 bit color depth)
png_set_IHDR(png_ptr, info_ptr, width, height,
8, PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
// Write header (8 bit color depth)
png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
png_text title_text;
title_text.compression = PNG_TEXT_COMPRESSION_NONE;
title_text.key = title_key;
title_text.text = title;
png_set_text(png_ptr, info_ptr, &title_text, 1);
png_text title_text;
title_text.compression = PNG_TEXT_COMPRESSION_NONE;
title_text.key = title_key;
title_text.text = title;
png_set_text(png_ptr, info_ptr, &title_text, 1);
png_write_info(png_ptr, info_ptr);
png_write_info(png_ptr, info_ptr);
// Write image data
for (auto y = 0; y < height; ++y)
{
u8* row_ptr = (u8*)data + y * row_stride;
u8* ptr = row_ptr;
for (auto x = 0; x < row_stride / 4; ++x)
{
if (!saveAlpha)
ptr[3] = 0xff;
ptr += 4;
}
png_write_row(png_ptr, row_ptr);
}
// Write image data
for (auto y = 0; y < height; ++y)
{
u8* row_ptr = (u8*)data + y * row_stride;
u8* ptr = row_ptr;
for (auto x = 0; x < row_stride / 4; ++x)
{
if (!saveAlpha)
ptr[3] = 0xff;
ptr += 4;
}
png_write_row(png_ptr, row_ptr);
}
// End write
png_write_end(png_ptr, nullptr);
// End write
png_write_end(png_ptr, nullptr);
success = true;
success = true;
finalise:
if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr);
if (info_ptr != nullptr)
png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
if (png_ptr != nullptr)
png_destroy_write_struct(&png_ptr, (png_infopp) nullptr);
return success;
return success;
}

View file

@ -8,4 +8,5 @@
#include "Common/CommonTypes.h"
bool SaveData(const std::string& filename, const std::string& data);
bool TextureToPng(u8* data, int row_stride, const std::string& filename, int width, int height, bool saveAlpha = true);
bool TextureToPng(u8* data, int row_stride, const std::string& filename, int width, int height,
bool saveAlpha = true);

View file

@ -11,9 +11,9 @@
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VideoConfig.h"
//Init
u16 *IndexGenerator::index_buffer_current;
u16 *IndexGenerator::BASEIptr;
// Init
u16* IndexGenerator::index_buffer_current;
u16* IndexGenerator::BASEIptr;
u32 IndexGenerator::base_index;
static const u16 s_primitive_restart = -1;
@ -22,85 +22,84 @@ static u16* (*primitive_table[8])(u16*, u32, u32);
void IndexGenerator::Init()
{
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
primitive_table[GX_DRAW_QUADS] = IndexGenerator::AddQuads<true>;
primitive_table[GX_DRAW_QUADS_2] = IndexGenerator::AddQuads_nonstandard<true>;
primitive_table[GX_DRAW_TRIANGLES] = IndexGenerator::AddList<true>;
primitive_table[GX_DRAW_TRIANGLE_STRIP] = IndexGenerator::AddStrip<true>;
primitive_table[GX_DRAW_TRIANGLE_FAN] = IndexGenerator::AddFan<true>;
}
else
{
primitive_table[GX_DRAW_QUADS] = IndexGenerator::AddQuads<false>;
primitive_table[GX_DRAW_QUADS_2] = IndexGenerator::AddQuads_nonstandard<false>;
primitive_table[GX_DRAW_TRIANGLES] = IndexGenerator::AddList<false>;
primitive_table[GX_DRAW_TRIANGLE_STRIP] = IndexGenerator::AddStrip<false>;
primitive_table[GX_DRAW_TRIANGLE_FAN] = IndexGenerator::AddFan<false>;
}
primitive_table[GX_DRAW_LINES] = &IndexGenerator::AddLineList;
primitive_table[GX_DRAW_LINE_STRIP] = &IndexGenerator::AddLineStrip;
primitive_table[GX_DRAW_POINTS] = &IndexGenerator::AddPoints;
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
primitive_table[GX_DRAW_QUADS] = IndexGenerator::AddQuads<true>;
primitive_table[GX_DRAW_QUADS_2] = IndexGenerator::AddQuads_nonstandard<true>;
primitive_table[GX_DRAW_TRIANGLES] = IndexGenerator::AddList<true>;
primitive_table[GX_DRAW_TRIANGLE_STRIP] = IndexGenerator::AddStrip<true>;
primitive_table[GX_DRAW_TRIANGLE_FAN] = IndexGenerator::AddFan<true>;
}
else
{
primitive_table[GX_DRAW_QUADS] = IndexGenerator::AddQuads<false>;
primitive_table[GX_DRAW_QUADS_2] = IndexGenerator::AddQuads_nonstandard<false>;
primitive_table[GX_DRAW_TRIANGLES] = IndexGenerator::AddList<false>;
primitive_table[GX_DRAW_TRIANGLE_STRIP] = IndexGenerator::AddStrip<false>;
primitive_table[GX_DRAW_TRIANGLE_FAN] = IndexGenerator::AddFan<false>;
}
primitive_table[GX_DRAW_LINES] = &IndexGenerator::AddLineList;
primitive_table[GX_DRAW_LINE_STRIP] = &IndexGenerator::AddLineStrip;
primitive_table[GX_DRAW_POINTS] = &IndexGenerator::AddPoints;
}
void IndexGenerator::Start(u16* Indexptr)
{
index_buffer_current = Indexptr;
BASEIptr = Indexptr;
base_index = 0;
index_buffer_current = Indexptr;
BASEIptr = Indexptr;
base_index = 0;
}
void IndexGenerator::AddIndices(int primitive, u32 numVerts)
{
index_buffer_current = primitive_table[primitive](index_buffer_current, numVerts, base_index);
base_index += numVerts;
index_buffer_current = primitive_table[primitive](index_buffer_current, numVerts, base_index);
base_index += numVerts;
}
// Triangles
template <bool pr> __forceinline u16* IndexGenerator::WriteTriangle(u16 *Iptr, u32 index1, u32 index2, u32 index3)
template <bool pr>
__forceinline u16* IndexGenerator::WriteTriangle(u16* Iptr, u32 index1, u32 index2, u32 index3)
{
*Iptr++ = index1;
*Iptr++ = index2;
*Iptr++ = index3;
if (pr)
*Iptr++ = s_primitive_restart;
return Iptr;
*Iptr++ = index1;
*Iptr++ = index2;
*Iptr++ = index3;
if (pr)
*Iptr++ = s_primitive_restart;
return Iptr;
}
template <bool pr> u16* IndexGenerator::AddList(u16 *Iptr, u32 const numVerts, u32 index)
template <bool pr>
u16* IndexGenerator::AddList(u16* Iptr, u32 const numVerts, u32 index)
{
for (u32 i = 2; i < numVerts; i+=3)
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 2, index + i - 1, index + i);
}
return Iptr;
for (u32 i = 2; i < numVerts; i += 3)
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 2, index + i - 1, index + i);
}
return Iptr;
}
template <bool pr> u16* IndexGenerator::AddStrip(u16 *Iptr, u32 const numVerts, u32 index)
template <bool pr>
u16* IndexGenerator::AddStrip(u16* Iptr, u32 const numVerts, u32 index)
{
if (pr)
{
for (u32 i = 0; i < numVerts; ++i)
{
*Iptr++ = index + i;
}
*Iptr++ = s_primitive_restart;
if (pr)
{
for (u32 i = 0; i < numVerts; ++i)
{
*Iptr++ = index + i;
}
*Iptr++ = s_primitive_restart;
}
else
{
bool wind = false;
for (u32 i = 2; i < numVerts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 2, index + i - !wind, index + i - wind);
}
else
{
bool wind = false;
for (u32 i = 2; i < numVerts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr,
index + i - 2,
index + i - !wind,
index + i - wind);
wind ^= true;
}
}
return Iptr;
wind ^= true;
}
}
return Iptr;
}
/**
@ -122,37 +121,38 @@ template <bool pr> u16* IndexGenerator::AddStrip(u16 *Iptr, u32 const numVerts,
* so we use 6 indices for 3 triangles
*/
template <bool pr> u16* IndexGenerator::AddFan(u16 *Iptr, u32 numVerts, u32 index)
template <bool pr>
u16* IndexGenerator::AddFan(u16* Iptr, u32 numVerts, u32 index)
{
u32 i = 2;
u32 i = 2;
if (pr)
{
for (; i+3<=numVerts; i+=3)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = index + i + 2;
*Iptr++ = s_primitive_restart;
}
if (pr)
{
for (; i + 3 <= numVerts; i += 3)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = index + i + 2;
*Iptr++ = s_primitive_restart;
}
for (; i+2<=numVerts; i+=2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = s_primitive_restart;
}
}
for (; i + 2 <= numVerts; i += 2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = s_primitive_restart;
}
}
for (; i < numVerts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr, index, index + i - 1, index + i);
}
return Iptr;
for (; i < numVerts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr, index, index + i - 1, index + i);
}
return Iptr;
}
/*
@ -172,77 +172,78 @@ template <bool pr> u16* IndexGenerator::AddFan(u16 *Iptr, u32 numVerts, u32 inde
* A simple triangle has to be rendered for three vertices.
* ZWW do this for sun rays
*/
template <bool pr> u16* IndexGenerator::AddQuads(u16 *Iptr, u32 numVerts, u32 index)
template <bool pr>
u16* IndexGenerator::AddQuads(u16* Iptr, u32 numVerts, u32 index)
{
u32 i = 3;
for (; i < numVerts; i+=4)
{
if (pr)
{
*Iptr++ = index + i - 2;
*Iptr++ = index + i - 1;
*Iptr++ = index + i - 3;
*Iptr++ = index + i - 0;
*Iptr++ = s_primitive_restart;
}
else
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 2, index + i - 1);
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 1, index + i - 0);
}
}
u32 i = 3;
for (; i < numVerts; i += 4)
{
if (pr)
{
*Iptr++ = index + i - 2;
*Iptr++ = index + i - 1;
*Iptr++ = index + i - 3;
*Iptr++ = index + i - 0;
*Iptr++ = s_primitive_restart;
}
else
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 2, index + i - 1);
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 1, index + i - 0);
}
}
// three vertices remaining, so render a triangle
if (i == numVerts)
{
Iptr = WriteTriangle<pr>(Iptr, index+numVerts-3, index+numVerts-2, index+numVerts-1);
}
return Iptr;
// three vertices remaining, so render a triangle
if (i == numVerts)
{
Iptr =
WriteTriangle<pr>(Iptr, index + numVerts - 3, index + numVerts - 2, index + numVerts - 1);
}
return Iptr;
}
template <bool pr> u16* IndexGenerator::AddQuads_nonstandard(u16 *Iptr, u32 numVerts, u32 index)
template <bool pr>
u16* IndexGenerator::AddQuads_nonstandard(u16* Iptr, u32 numVerts, u32 index)
{
WARN_LOG(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
return AddQuads<pr>(Iptr, numVerts, index);
WARN_LOG(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
return AddQuads<pr>(Iptr, numVerts, index);
}
// Lines
u16* IndexGenerator::AddLineList(u16 *Iptr, u32 numVerts, u32 index)
u16* IndexGenerator::AddLineList(u16* Iptr, u32 numVerts, u32 index)
{
for (u32 i = 1; i < numVerts; i+=2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
}
return Iptr;
for (u32 i = 1; i < numVerts; i += 2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
}
return Iptr;
}
// shouldn't be used as strips as LineLists are much more common
// so converting them to lists
u16* IndexGenerator::AddLineStrip(u16 *Iptr, u32 numVerts, u32 index)
u16* IndexGenerator::AddLineStrip(u16* Iptr, u32 numVerts, u32 index)
{
for (u32 i = 1; i < numVerts; ++i)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
}
return Iptr;
for (u32 i = 1; i < numVerts; ++i)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
}
return Iptr;
}
// Points
u16* IndexGenerator::AddPoints(u16 *Iptr, u32 numVerts, u32 index)
u16* IndexGenerator::AddPoints(u16* Iptr, u32 numVerts, u32 index)
{
for (u32 i = 0; i != numVerts; ++i)
{
*Iptr++ = index + i;
}
return Iptr;
for (u32 i = 0; i != numVerts; ++i)
{
*Iptr++ = index + i;
}
return Iptr;
}
u32 IndexGenerator::GetRemainingIndices()
{
u32 max_index = 65534; // -1 is reserved for primitive restart (ogl + dx11)
return max_index - base_index;
u32 max_index = 65534; // -1 is reserved for primitive restart (ogl + dx11)
return max_index - base_index;
}

View file

@ -12,37 +12,41 @@
class IndexGenerator
{
public:
// Init
static void Init();
static void Start(u16 *Indexptr);
// Init
static void Init();
static void Start(u16* Indexptr);
static void AddIndices(int primitive, u32 numVertices);
static void AddIndices(int primitive, u32 numVertices);
// returns numprimitives
static u32 GetNumVerts() {return base_index;}
static u32 GetIndexLen() {return (u32)(index_buffer_current - BASEIptr);}
static u32 GetRemainingIndices();
// returns numprimitives
static u32 GetNumVerts() { return base_index; }
static u32 GetIndexLen() { return (u32)(index_buffer_current - BASEIptr); }
static u32 GetRemainingIndices();
private:
// Triangles
template <bool pr> static u16* AddList(u16 *Iptr, u32 numVerts, u32 index);
template <bool pr> static u16* AddStrip(u16 *Iptr, u32 numVerts, u32 index);
template <bool pr> static u16* AddFan(u16 *Iptr, u32 numVerts, u32 index);
template <bool pr> static u16* AddQuads(u16 *Iptr, u32 numVerts, u32 index);
template <bool pr> static u16* AddQuads_nonstandard(u16 *Iptr, u32 numVerts, u32 index);
// Triangles
template <bool pr>
static u16* AddList(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddStrip(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddFan(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddQuads(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddQuads_nonstandard(u16* Iptr, u32 numVerts, u32 index);
// Lines
static u16* AddLineList(u16 *Iptr, u32 numVerts, u32 index);
static u16* AddLineStrip(u16 *Iptr, u32 numVerts, u32 index);
// Lines
static u16* AddLineList(u16* Iptr, u32 numVerts, u32 index);
static u16* AddLineStrip(u16* Iptr, u32 numVerts, u32 index);
// Points
static u16* AddPoints(u16 *Iptr, u32 numVerts, u32 index);
// Points
static u16* AddPoints(u16* Iptr, u32 numVerts, u32 index);
template <bool pr> static u16* WriteTriangle(u16 *Iptr, u32 index1, u32 index2, u32 index3);
template <bool pr>
static u16* WriteTriangle(u16* Iptr, u32 index1, u32 index2, u32 index3);
static u16 *index_buffer_current;
static u16 *BASEIptr;
static u32 base_index;
static u16* index_buffer_current;
static u16* BASEIptr;
static u32 base_index;
};

View file

@ -10,7 +10,6 @@
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/XFMemory.h"
#define LIGHT_COL "%s[%d].color.%s"
#define LIGHT_COL_PARAMS(index, swizzle) (I_LIGHTS), (index), (swizzle)
@ -31,79 +30,88 @@
*/
struct LightingUidData
{
u32 matsource : 4; // 4x1 bit
u32 enablelighting : 4; // 4x1 bit
u32 ambsource : 4; // 4x1 bit
u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits
u32 matsource : 4; // 4x1 bit
u32 enablelighting : 4; // 4x1 bit
u32 ambsource : 4; // 4x1 bit
u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits
};
static const char s_lighting_struct[] =
"struct Light {\n"
"\tint4 color;\n"
"\tfloat4 cosatt;\n"
"\tfloat4 distatt;\n"
"\tfloat4 pos;\n"
"\tfloat4 dir;\n"
"};\n";
static const char s_lighting_struct[] = "struct Light {\n"
"\tint4 color;\n"
"\tfloat4 cosatt;\n"
"\tfloat4 distatt;\n"
"\tfloat4 pos;\n"
"\tfloat4 dir;\n"
"};\n";
template<class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, int coloralpha)
template <class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index,
int coloralpha)
{
const LitChannel& chan = (litchan_index > 1) ? xfmem.alpha[litchan_index-2] : xfmem.color[litchan_index];
const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw";
const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4";
const LitChannel& chan =
(litchan_index > 1) ? xfmem.alpha[litchan_index - 2] : xfmem.color[litchan_index];
const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw";
const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4";
uid_data.attnfunc |= chan.attnfunc << (2*litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index);
uid_data.attnfunc |= chan.attnfunc << (2 * litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2 * litchan_index);
switch (chan.attnfunc)
{
case LIGHTATTN_NONE:
case LIGHTATTN_DIR:
object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = 1.0;\n");
object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
break;
case LIGHTATTN_SPEC:
object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(index));
object.Write("cosAttn = " LIGHT_COSATT".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.Write("distAttn = %s(" LIGHT_DISTATT".xyz);\n", (chan.diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, float3(1.0, attn, attn*attn));\n");
break;
case LIGHTATTN_SPOT:
object.Write("ldir = " LIGHT_POS".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0, dot(ldir, " LIGHT_DIR".xyz));\n", LIGHT_DIR_PARAMS(index));
// attn*attn may overflow
object.Write("attn = max(0.0, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / dot(" LIGHT_DISTATT".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index));
break;
default: _assert_(0);
}
switch (chan.attnfunc)
{
case LIGHTATTN_NONE:
case LIGHTATTN_DIR:
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = 1.0;\n");
object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
break;
case LIGHTATTN_SPEC:
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
".xyz)) : 0.0;\n",
LIGHT_DIR_PARAMS(index));
object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n",
(chan.diffusefunc == LIGHTDIF_NONE) ? "" : "normalize",
LIGHT_DISTATT_PARAMS(index));
object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n");
break;
case LIGHTATTN_SPOT:
object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n",
LIGHT_DIR_PARAMS(index));
// attn*attn may overflow
object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_DISTATT_PARAMS(index));
break;
default:
_assert_(0);
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL")));\n",
swizzle, swizzle_components,
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n",
swizzle, swizzle_components,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
default: _assert_(0);
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle,
swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n",
swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
default:
_assert_(0);
}
object.Write("\n");
object.Write("\n");
}
// vertex shader
@ -111,149 +119,151 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template<class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* inColorName, const char* dest)
template <class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components,
const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++)
{
const LitChannel& color = xfmem.color[j];
const LitChannel& alpha = xfmem.alpha[j];
for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++)
{
const LitChannel& color = xfmem.color[j];
const LitChannel& alpha = xfmem.alpha[j];
object.Write("{\n");
object.Write("{\n");
uid_data.matsource |= xfmem.color[j].matsource << j;
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("int4 mat = int4(round(%s0 * 255.0));\n", inColorName);
else
object.Write("int4 mat = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j+2);
}
uid_data.matsource |= xfmem.color[j].matsource << j;
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("int4 mat = int4(round(%s0 * 255.0));\n", inColorName);
else
object.Write("int4 mat = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j + 2);
}
uid_data.enablelighting |= xfmem.color[j].enablelighting << j;
if (color.enablelighting)
{
uid_data.ambsource |= xfmem.color[j].ambsource << j;
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc = int4(round(%s0 * 255.0));\n", inColorName);
else
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returing 1.0 is the same as disabled lightning, so this could be fine
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.Write("lacc = %s[%d];\n", I_MATERIALS, j);
}
}
else
{
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
uid_data.enablelighting |= xfmem.color[j].enablelighting << j;
if (color.enablelighting)
{
uid_data.ambsource |= xfmem.color[j].ambsource << j;
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("lacc = int4(round(%s0 * 255.0));\n", inColorName);
else
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returing 1.0 is the same as disabled lightning, so this could be fine
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.Write("lacc = %s[%d];\n", I_MATERIALS, j);
}
}
else
{
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
// check if alpha is different
uid_data.matsource |= xfmem.alpha[j].matsource << (j+2);
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = int(round(%s0.w * 255.0));\n", inColorName);
else object.Write("mat.w = 255;\n");
}
else // from color
{
object.Write("mat.w = %s[%d].w;\n", I_MATERIALS, j+2);
}
}
// check if alpha is different
uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2);
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = int(round(%s0.w * 255.0));\n", inColorName);
else
object.Write("mat.w = 255;\n");
}
else // from color
{
object.Write("mat.w = %s[%d].w;\n", I_MATERIALS, j + 2);
}
}
uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j+2);
if (alpha.enablelighting)
{
uid_data.ambsource |= xfmem.alpha[j].ambsource << (j+2);
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc.w = int(round(%s0.w * 255.0));\n", inColorName);
else
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
object.Write("lacc.w = 255;\n");
}
else // from color
{
object.Write("lacc.w = %s[%d].w;\n", I_MATERIALS, j);
}
}
else
{
object.Write("lacc.w = 255;\n");
}
uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2);
if (alpha.enablelighting)
{
uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2);
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("lacc.w = int(round(%s0.w * 255.0));\n", inColorName);
else
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
object.Write("lacc.w = 255;\n");
}
else // from color
{
object.Write("lacc.w = %s[%d].w;\n", I_MATERIALS, j);
}
}
else
{
object.Write("lacc.w = 255;\n");
}
if (color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2*j);
uid_data.attnfunc |= alpha.attnfunc << (2*(j+2));
uid_data.diffusefunc |= color.diffusefunc << (2*j);
uid_data.diffusefunc |= alpha.diffusefunc << (2*(j+2));
uid_data.light_mask |= color.GetFullLightMask() << (8*j);
uid_data.light_mask |= alpha.GetFullLightMask() << (8*(j+2));
if (color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if (mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
{
GenerateLightShader<T>(object, uid_data, i, j, 3);
}
}
}
}
if (color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2 * j);
uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2));
uid_data.diffusefunc |= color.diffusefunc << (2 * j);
uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2));
uid_data.light_mask |= color.GetFullLightMask() << (8 * j);
uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2));
if (color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if (mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1 << i))
{
GenerateLightShader<T>(object, uid_data, i, j, 3);
}
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j+2, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j+2);
int coloralpha = color.enablelighting ? 1 : 2;
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask & (1 << i)) && (color.GetFullLightMask() & (1 << i)))
GenerateLightShader<T>(object, uid_data, i, j, 1);
if (!(mask & (1 << i)) && (alpha.GetFullLightMask() & (1 << i)))
GenerateLightShader<T>(object, uid_data, i, j + 2, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j + 2);
int coloralpha = color.enablelighting ? 1 : 2;
uid_data.light_mask |= workingchannel.GetFullLightMask() << (8*lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
GenerateLightShader<T>(object, uid_data, i, lit_index, coloralpha);
}
}
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}\n");
}
uid_data.light_mask |= workingchannel.GetFullLightMask() << (8 * lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1 << i))
GenerateLightShader<T>(object, uid_data, i, lit_index, coloralpha);
}
}
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}\n");
}
}

View file

@ -8,24 +8,24 @@
constexpr u8 Convert3To8(u8 v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
constexpr u8 Convert4To8(u8 v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
constexpr u8 Convert5To8(u8 v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
constexpr u8 Convert6To8(u8 v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}

View file

@ -23,172 +23,175 @@ static Common::Flag s_FifoShuttingDown;
static volatile struct
{
u32 xfbAddr;
u32 fbWidth;
u32 fbStride;
u32 fbHeight;
u32 xfbAddr;
u32 fbWidth;
u32 fbStride;
u32 fbHeight;
} s_beginFieldArgs;
void VideoBackendBase::Video_ExitLoop()
{
Fifo::ExitGpuLoop();
s_FifoShuttingDown.Set();
Fifo::ExitGpuLoop();
s_FifoShuttingDown.Set();
}
// Run from the CPU thread (from VideoInterface.cpp)
void VideoBackendBase::Video_BeginField(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight)
{
if (m_initialized && g_ActiveConfig.bUseXFB)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbStride = fbStride;
s_beginFieldArgs.fbHeight = fbHeight;
}
if (m_initialized && g_ActiveConfig.bUseXFB)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbStride = fbStride;
s_beginFieldArgs.fbHeight = fbHeight;
}
}
// Run from the CPU thread (from VideoInterface.cpp)
void VideoBackendBase::Video_EndField()
{
if (m_initialized && g_ActiveConfig.bUseXFB && g_renderer)
{
Fifo::SyncGPU(Fifo::SYNC_GPU_SWAP);
if (m_initialized && g_ActiveConfig.bUseXFB && g_renderer)
{
Fifo::SyncGPU(Fifo::SYNC_GPU_SWAP);
AsyncRequests::Event e;
e.time = 0;
e.type = AsyncRequests::Event::SWAP_EVENT;
AsyncRequests::Event e;
e.time = 0;
e.type = AsyncRequests::Event::SWAP_EVENT;
e.swap_event.xfbAddr = s_beginFieldArgs.xfbAddr;
e.swap_event.fbWidth = s_beginFieldArgs.fbWidth;
e.swap_event.fbStride = s_beginFieldArgs.fbStride;
e.swap_event.fbHeight = s_beginFieldArgs.fbHeight;
AsyncRequests::GetInstance()->PushEvent(e, false);
}
e.swap_event.xfbAddr = s_beginFieldArgs.xfbAddr;
e.swap_event.fbWidth = s_beginFieldArgs.fbWidth;
e.swap_event.fbStride = s_beginFieldArgs.fbStride;
e.swap_event.fbHeight = s_beginFieldArgs.fbHeight;
AsyncRequests::GetInstance()->PushEvent(e, false);
}
}
u32 VideoBackendBase::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData)
{
if (!g_ActiveConfig.bEFBAccessEnable)
{
return 0;
}
if (!g_ActiveConfig.bEFBAccessEnable)
{
return 0;
}
if (type == POKE_COLOR || type == POKE_Z)
{
AsyncRequests::Event e;
e.type = type == POKE_COLOR ? AsyncRequests::Event::EFB_POKE_COLOR : AsyncRequests::Event::EFB_POKE_Z;
e.time = 0;
e.efb_poke.data = InputData;
e.efb_poke.x = x;
e.efb_poke.y = y;
AsyncRequests::GetInstance()->PushEvent(e, false);
return 0;
}
else
{
AsyncRequests::Event e;
u32 result;
e.type = type == PEEK_COLOR ? AsyncRequests::Event::EFB_PEEK_COLOR : AsyncRequests::Event::EFB_PEEK_Z;
e.time = 0;
e.efb_peek.x = x;
e.efb_peek.y = y;
e.efb_peek.data = &result;
AsyncRequests::GetInstance()->PushEvent(e, true);
return result;
}
if (type == POKE_COLOR || type == POKE_Z)
{
AsyncRequests::Event e;
e.type = type == POKE_COLOR ? AsyncRequests::Event::EFB_POKE_COLOR :
AsyncRequests::Event::EFB_POKE_Z;
e.time = 0;
e.efb_poke.data = InputData;
e.efb_poke.x = x;
e.efb_poke.y = y;
AsyncRequests::GetInstance()->PushEvent(e, false);
return 0;
}
else
{
AsyncRequests::Event e;
u32 result;
e.type = type == PEEK_COLOR ? AsyncRequests::Event::EFB_PEEK_COLOR :
AsyncRequests::Event::EFB_PEEK_Z;
e.time = 0;
e.efb_peek.x = x;
e.efb_peek.y = y;
e.efb_peek.data = &result;
AsyncRequests::GetInstance()->PushEvent(e, true);
return result;
}
}
u32 VideoBackendBase::Video_GetQueryResult(PerfQueryType type)
{
if (!g_perf_query->ShouldEmulate())
{
return 0;
}
if (!g_perf_query->ShouldEmulate())
{
return 0;
}
Fifo::SyncGPU(Fifo::SYNC_GPU_PERFQUERY);
Fifo::SyncGPU(Fifo::SYNC_GPU_PERFQUERY);
AsyncRequests::Event e;
e.time = 0;
e.type = AsyncRequests::Event::PERF_QUERY;
AsyncRequests::Event e;
e.time = 0;
e.type = AsyncRequests::Event::PERF_QUERY;
if (!g_perf_query->IsFlushed())
AsyncRequests::GetInstance()->PushEvent(e, true);
if (!g_perf_query->IsFlushed())
AsyncRequests::GetInstance()->PushEvent(e, true);
return g_perf_query->GetQueryResult(type);
return g_perf_query->GetQueryResult(type);
}
u16 VideoBackendBase::Video_GetBoundingBox(int index)
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return 0;
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return 0;
if (!g_ActiveConfig.bBBoxEnable)
{
static bool warn_once = true;
if (warn_once)
ERROR_LOG(VIDEO, "BBox shall be used but it is disabled. Please use a gameini to enable it for this game.");
warn_once = false;
return 0;
}
if (!g_ActiveConfig.bBBoxEnable)
{
static bool warn_once = true;
if (warn_once)
ERROR_LOG(VIDEO, "BBox shall be used but it is disabled. Please use a gameini to enable it "
"for this game.");
warn_once = false;
return 0;
}
Fifo::SyncGPU(Fifo::SYNC_GPU_BBOX);
Fifo::SyncGPU(Fifo::SYNC_GPU_BBOX);
AsyncRequests::Event e;
u16 result;
e.time = 0;
e.type = AsyncRequests::Event::BBOX_READ;
e.bbox.index = index;
e.bbox.data = &result;
AsyncRequests::GetInstance()->PushEvent(e, true);
AsyncRequests::Event e;
u16 result;
e.time = 0;
e.type = AsyncRequests::Event::BBOX_READ;
e.bbox.index = index;
e.bbox.data = &result;
AsyncRequests::GetInstance()->PushEvent(e, true);
return result;
return result;
}
void VideoBackendBase::InitializeShared()
{
VideoCommon_Init();
VideoCommon_Init();
s_FifoShuttingDown.Clear();
memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs));
m_invalid = false;
s_FifoShuttingDown.Clear();
memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs));
m_invalid = false;
}
// Run from the CPU thread
void VideoBackendBase::DoState(PointerWrap& p)
{
bool software = false;
p.Do(software);
bool software = false;
p.Do(software);
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}
VideoCommon_DoState(p);
p.DoMarker("VideoCommon");
VideoCommon_DoState(p);
p.DoMarker("VideoCommon");
p.Do(s_beginFieldArgs);
p.DoMarker("VideoBackendBase");
p.Do(s_beginFieldArgs);
p.DoMarker("VideoBackendBase");
// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
m_invalid = true;
// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
m_invalid = true;
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
}
void VideoBackendBase::CheckInvalidState()
{
if (m_invalid)
{
m_invalid = false;
if (m_invalid)
{
m_invalid = false;
BPReload();
TextureCacheBase::Invalidate();
}
BPReload();
TextureCacheBase::Invalidate();
}
}

View file

@ -5,7 +5,7 @@
#pragma once
#include <cstring>
#include <functional> // for hash
#include <functional> // for hash
#include "Common/CommonTypes.h"
#include "Common/Hash.h"
@ -14,88 +14,86 @@
// m_components
enum
{
VB_HAS_POSMTXIDX =(1<<1),
VB_HAS_TEXMTXIDX0=(1<<2),
VB_HAS_TEXMTXIDX1=(1<<3),
VB_HAS_TEXMTXIDX2=(1<<4),
VB_HAS_TEXMTXIDX3=(1<<5),
VB_HAS_TEXMTXIDX4=(1<<6),
VB_HAS_TEXMTXIDX5=(1<<7),
VB_HAS_TEXMTXIDX6=(1<<8),
VB_HAS_TEXMTXIDX7=(1<<9),
VB_HAS_TEXMTXIDXALL=(0xff<<2),
VB_HAS_POSMTXIDX = (1 << 1),
VB_HAS_TEXMTXIDX0 = (1 << 2),
VB_HAS_TEXMTXIDX1 = (1 << 3),
VB_HAS_TEXMTXIDX2 = (1 << 4),
VB_HAS_TEXMTXIDX3 = (1 << 5),
VB_HAS_TEXMTXIDX4 = (1 << 6),
VB_HAS_TEXMTXIDX5 = (1 << 7),
VB_HAS_TEXMTXIDX6 = (1 << 8),
VB_HAS_TEXMTXIDX7 = (1 << 9),
VB_HAS_TEXMTXIDXALL = (0xff << 2),
//VB_HAS_POS=0, // Implied, it always has pos! don't bother testing
VB_HAS_NRM0=(1<<10),
VB_HAS_NRM1=(1<<11),
VB_HAS_NRM2=(1<<12),
VB_HAS_NRMALL=(7<<10),
// VB_HAS_POS=0, // Implied, it always has pos! don't bother testing
VB_HAS_NRM0 = (1 << 10),
VB_HAS_NRM1 = (1 << 11),
VB_HAS_NRM2 = (1 << 12),
VB_HAS_NRMALL = (7 << 10),
VB_HAS_COL0=(1<<13),
VB_HAS_COL1=(1<<14),
VB_HAS_COL0 = (1 << 13),
VB_HAS_COL1 = (1 << 14),
VB_HAS_UV0=(1<<15),
VB_HAS_UV1=(1<<16),
VB_HAS_UV2=(1<<17),
VB_HAS_UV3=(1<<18),
VB_HAS_UV4=(1<<19),
VB_HAS_UV5=(1<<20),
VB_HAS_UV6=(1<<21),
VB_HAS_UV7=(1<<22),
VB_HAS_UVALL=(0xff<<15),
VB_HAS_UVTEXMTXSHIFT=13,
VB_HAS_UV0 = (1 << 15),
VB_HAS_UV1 = (1 << 16),
VB_HAS_UV2 = (1 << 17),
VB_HAS_UV3 = (1 << 18),
VB_HAS_UV4 = (1 << 19),
VB_HAS_UV5 = (1 << 20),
VB_HAS_UV6 = (1 << 21),
VB_HAS_UV7 = (1 << 22),
VB_HAS_UVALL = (0xff << 15),
VB_HAS_UVTEXMTXSHIFT = 13,
};
enum VarType
{
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
};
struct AttributeFormat
{
VarType type;
int components;
int offset;
bool enable;
bool integer;
VarType type;
int components;
int offset;
bool enable;
bool integer;
};
struct PortableVertexDeclaration
{
int stride;
int stride;
AttributeFormat position;
AttributeFormat normals[3];
AttributeFormat colors[2];
AttributeFormat texcoords[8];
AttributeFormat posmtx;
AttributeFormat position;
AttributeFormat normals[3];
AttributeFormat colors[2];
AttributeFormat texcoords[8];
AttributeFormat posmtx;
inline bool operator<(const PortableVertexDeclaration& b) const
{
return memcmp(this, &b, sizeof(PortableVertexDeclaration)) < 0;
}
inline bool operator==(const PortableVertexDeclaration& b) const
{
return memcmp(this, &b, sizeof(PortableVertexDeclaration)) == 0;
}
inline bool operator<(const PortableVertexDeclaration& b) const
{
return memcmp(this, &b, sizeof(PortableVertexDeclaration)) < 0;
}
inline bool operator==(const PortableVertexDeclaration& b) const
{
return memcmp(this, &b, sizeof(PortableVertexDeclaration)) == 0;
}
};
namespace std
{
template <>
struct hash<PortableVertexDeclaration>
{
size_t operator()(const PortableVertexDeclaration& decl) const
{
return HashFletcher((u8 *) &decl, sizeof(decl));
}
size_t operator()(const PortableVertexDeclaration& decl) const
{
return HashFletcher((u8*)&decl, sizeof(decl));
}
};
}
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
@ -106,16 +104,13 @@ struct hash<PortableVertexDeclaration>
class NativeVertexFormat : NonCopyable
{
public:
virtual ~NativeVertexFormat() {}
virtual void SetupVertexPointers() = 0;
u32 GetVertexStride() const { return vtx_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
virtual ~NativeVertexFormat() {}
virtual void SetupVertexPointers() = 0;
u32 GetVertexStride() const { return vtx_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
protected:
// Let subclasses construct.
NativeVertexFormat() {}
PortableVertexDeclaration vtx_decl;
// Let subclasses construct.
NativeVertexFormat() {}
PortableVertexDeclaration vtx_decl;
};

View file

@ -15,20 +15,15 @@
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/RenderBase.h"
namespace OSD
{
struct Message
{
Message() {}
Message(const std::string& s, u32 ts, u32 rgba) : m_str(s), m_timestamp(ts), m_rgba(rgba)
{
}
std::string m_str;
u32 m_timestamp;
u32 m_rgba;
Message() {}
Message(const std::string& s, u32 ts, u32 rgba) : m_str(s), m_timestamp(ts), m_rgba(rgba) {}
std::string m_str;
u32 m_timestamp;
u32 m_rgba;
};
static std::multimap<CallbackType, Callback> s_callbacks;
@ -36,55 +31,55 @@ static std::list<Message> s_msgList;
void AddMessage(const std::string& str, u32 ms, u32 rgba)
{
s_msgList.emplace_back(str, Common::Timer::GetTimeMs() + ms, rgba);
s_msgList.emplace_back(str, Common::Timer::GetTimeMs() + ms, rgba);
}
void DrawMessages()
{
if (!SConfig::GetInstance().bOnScreenDisplayMessages)
return;
if (!SConfig::GetInstance().bOnScreenDisplayMessages)
return;
int left = 25, top = 15;
auto it = s_msgList.begin();
while (it != s_msgList.end())
{
int time_left = (int)(it->m_timestamp - Common::Timer::GetTimeMs());
float alpha = std::max(1.0f, std::min(0.0f, time_left / 1024.0f));
u32 color = (it->m_rgba & 0xFFFFFF) | ((u32)((it->m_rgba >> 24) * alpha) << 24);
int left = 25, top = 15;
auto it = s_msgList.begin();
while (it != s_msgList.end())
{
int time_left = (int)(it->m_timestamp - Common::Timer::GetTimeMs());
float alpha = std::max(1.0f, std::min(0.0f, time_left / 1024.0f));
u32 color = (it->m_rgba & 0xFFFFFF) | ((u32)((it->m_rgba >> 24) * alpha) << 24);
g_renderer->RenderText(it->m_str, left, top, color);
g_renderer->RenderText(it->m_str, left, top, color);
top += 15;
top += 15;
if (time_left <= 0)
it = s_msgList.erase(it);
else
++it;
}
if (time_left <= 0)
it = s_msgList.erase(it);
else
++it;
}
}
void ClearMessages()
{
s_msgList.clear();
s_msgList.clear();
}
// On-Screen Display Callbacks
void AddCallback(CallbackType type, Callback cb)
{
s_callbacks.emplace(type, cb);
s_callbacks.emplace(type, cb);
}
void DoCallbacks(CallbackType type)
{
auto it_bounds = s_callbacks.equal_range(type);
for (auto it = it_bounds.first; it != it_bounds.second; ++it)
{
it->second();
}
auto it_bounds = s_callbacks.equal_range(type);
for (auto it = it_bounds.first; it != it_bounds.second; ++it)
{
it->second();
}
// Wipe all callbacks on shutdown
if (type == CallbackType::Shutdown)
s_callbacks.clear();
// Wipe all callbacks on shutdown
if (type == CallbackType::Shutdown)
s_callbacks.clear();
}
} // namespace

View file

@ -13,15 +13,15 @@ namespace OSD
{
// On-screen message display (colored yellow by default)
void AddMessage(const std::string& str, u32 ms = 2000, u32 rgba = 0xFFFFFF30);
void DrawMessages(); // draw the current messages on the screen. Only call once per frame.
void DrawMessages(); // draw the current messages on the screen. Only call once per frame.
void ClearMessages();
// On-screen callbacks
enum class CallbackType
{
Initialization,
OnFrame,
Shutdown
Initialization,
OnFrame,
Shutdown
};
using Callback = std::function<void()>;

View file

@ -2,27 +2,29 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
//DL facts:
// DL facts:
// Ikaruga uses (nearly) NO display lists!
// Zelda WW uses TONS of display lists
// Zelda TP uses almost 100% display lists except menus (we like this!)
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are
// while interpreting them, and hope that the vertex format doesn't change, though, if you do it right
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they
// are
// while interpreting them, and hope that the vertex format doesn't change, though, if you do it
// right
// when they are called. The reason is that the vertex format affects the sizes of the vertices.
#include "VideoCommon/OpcodeDecoding.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h"
@ -32,100 +34,91 @@ bool g_bRecordFifoData = false;
namespace OpcodeDecoder
{
static bool s_bFifoErrorSeen = false;
static u32 InterpretDisplayList(u32 address, u32 size)
{
u8* startAddress;
u8* startAddress;
if (Fifo::UseDeterministicGPUThread())
startAddress = (u8*)Fifo::PopFifoAuxBuffer(size);
else
startAddress = Memory::GetPointer(address);
if (Fifo::UseDeterministicGPUThread())
startAddress = (u8*)Fifo::PopFifoAuxBuffer(size);
else
startAddress = Memory::GetPointer(address);
u32 cycles = 0;
u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
// Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
Run(DataReader(startAddress, startAddress + size), &cycles, true);
INCSTAT(stats.thisFrame.numDListsCalled);
Run(DataReader(startAddress, startAddress + size), &cycles, true);
INCSTAT(stats.thisFrame.numDListsCalled);
// un-swap
Statistics::SwapDL();
}
// un-swap
Statistics::SwapDL();
}
return cycles;
return cycles;
}
static void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* startAddress = Memory::GetPointer(address);
u8* startAddress = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(startAddress, size);
Fifo::PushFifoAuxBuffer(startAddress, size);
if (startAddress != nullptr)
{
Run<true>(DataReader(startAddress, startAddress + size), nullptr, true);
}
if (startAddress != nullptr)
{
Run<true>(DataReader(startAddress, startAddress + size), nullptr, true);
}
}
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
static void UnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
{
// TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertT(
"GFX FIFO: Unknown Opcode (0x%02x @ %p, %s).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy." ,
cmd_byte,
buffer,
preprocess ? "preprocess=true" : "preprocess=false");
// TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertT("GFX FIFO: Unknown Opcode (0x%02x @ %p, %s).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy.",
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
{
SCPFifoStruct& fifo = CommandProcessor::fifo;
PanicAlert(
"Illegal command %02x\n"
"CPBase: 0x%08x\n"
"CPEnd: 0x%08x\n"
"CPHiWatermark: 0x%08x\n"
"CPLoWatermark: 0x%08x\n"
"CPReadWriteDistance: 0x%08x\n"
"CPWritePointer: 0x%08x\n"
"CPReadPointer: 0x%08x\n"
"CPBreakpoint: 0x%08x\n"
"bFF_GPReadEnable: %s\n"
"bFF_BPEnable: %s\n"
"bFF_BPInt: %s\n"
"bFF_Breakpoint: %s\n"
"bFF_GPLinkEnable: %s\n"
"bFF_HiWatermarkInt: %s\n"
"bFF_LoWatermarkInt: %s\n"
,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint
,fifo.bFF_GPReadEnable ? "true" : "false"
,fifo.bFF_BPEnable ? "true" : "false"
,fifo.bFF_BPInt ? "true" : "false"
,fifo.bFF_Breakpoint ? "true" : "false"
,fifo.bFF_GPLinkEnable ? "true" : "false"
,fifo.bFF_HiWatermarkInt ? "true" : "false"
,fifo.bFF_LoWatermarkInt ? "true" : "false"
);
}
PanicAlert("Illegal command %02x\n"
"CPBase: 0x%08x\n"
"CPEnd: 0x%08x\n"
"CPHiWatermark: 0x%08x\n"
"CPLoWatermark: 0x%08x\n"
"CPReadWriteDistance: 0x%08x\n"
"CPWritePointer: 0x%08x\n"
"CPReadPointer: 0x%08x\n"
"CPBreakpoint: 0x%08x\n"
"bFF_GPReadEnable: %s\n"
"bFF_BPEnable: %s\n"
"bFF_BPInt: %s\n"
"bFF_Breakpoint: %s\n"
"bFF_GPLinkEnable: %s\n"
"bFF_HiWatermarkInt: %s\n"
"bFF_LoWatermarkInt: %s\n",
cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark,
fifo.CPReadWriteDistance, fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint,
fifo.bFF_GPReadEnable ? "true" : "false", fifo.bFF_BPEnable ? "true" : "false",
fifo.bFF_BPInt ? "true" : "false", fifo.bFF_Breakpoint ? "true" : "false",
fifo.bFF_GPLinkEnable ? "true" : "false", fifo.bFF_HiWatermarkInt ? "true" : "false",
fifo.bFF_LoWatermarkInt ? "true" : "false");
}
}
void Init()
{
s_bFifoErrorSeen = false;
s_bFifoErrorSeen = false;
}
void Shutdown()
@ -135,188 +128,187 @@ void Shutdown()
template <bool is_preprocess>
u8* Run(DataReader src, u32* cycles, bool in_display_list)
{
u32 totalCycles = 0;
u8* opcodeStart;
while (true)
{
opcodeStart = src.GetPointer();
u32 totalCycles = 0;
u8* opcodeStart;
while (true)
{
opcodeStart = src.GetPointer();
if (!src.size())
goto end;
if (!src.size())
goto end;
u8 cmd_byte = src.Read<u8>();
int refarray;
switch (cmd_byte)
{
case GX_NOP:
totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
u8 cmd_byte = src.Read<u8>();
int refarray;
switch (cmd_byte)
{
case GX_NOP:
totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
case GX_UNKNOWN_RESET:
totalCycles += 6; // Datel software uses this command
DEBUG_LOG(VIDEO, "GX Reset?: %08x", cmd_byte);
break;
case GX_UNKNOWN_RESET:
totalCycles += 6; // Datel software uses this command
DEBUG_LOG(VIDEO, "GX Reset?: %08x", cmd_byte);
break;
case GX_LOAD_CP_REG:
{
if (src.size() < 1 + 4)
goto end;
totalCycles += 12;
u8 sub_cmd = src.Read<u8>();
u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_CP_REG:
{
if (src.size() < 1 + 4)
goto end;
totalCycles += 12;
u8 sub_cmd = src.Read<u8>();
u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
if (src.size() < 4)
goto end;
u32 Cmd2 = src.Read<u32>();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if (src.size() < transfer_size * sizeof(u32))
goto end;
totalCycles += 18 + 6 * transfer_size;
if (!is_preprocess)
{
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address, src);
case GX_LOAD_XF_REG:
{
if (src.size() < 4)
goto end;
u32 Cmd2 = src.Read<u32>();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if (src.size() < transfer_size * sizeof(u32))
goto end;
totalCycles += 18 + 6 * transfer_size;
if (!is_preprocess)
{
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address, src);
INCSTAT(stats.thisFrame.numXFLoads);
}
src.Skip<u32>(transfer_size);
}
break;
INCSTAT(stats.thisFrame.numXFLoads);
}
src.Skip<u32>(transfer_size);
}
break;
case GX_LOAD_INDX_A: //used for position matrices
refarray = 0xC;
goto load_indx;
case GX_LOAD_INDX_B: //used for normal matrices
refarray = 0xD;
goto load_indx;
case GX_LOAD_INDX_C: //used for postmatrices
refarray = 0xE;
goto load_indx;
case GX_LOAD_INDX_D: //used for lights
refarray = 0xF;
goto load_indx;
load_indx:
if (src.size() < 4)
goto end;
totalCycles += 6;
if (is_preprocess)
PreprocessIndexedXF(src.Read<u32>(), refarray);
else
LoadIndexedXF(src.Read<u32>(), refarray);
break;
case GX_LOAD_INDX_A: // used for position matrices
refarray = 0xC;
goto load_indx;
case GX_LOAD_INDX_B: // used for normal matrices
refarray = 0xD;
goto load_indx;
case GX_LOAD_INDX_C: // used for postmatrices
refarray = 0xE;
goto load_indx;
case GX_LOAD_INDX_D: // used for lights
refarray = 0xF;
goto load_indx;
load_indx:
if (src.size() < 4)
goto end;
totalCycles += 6;
if (is_preprocess)
PreprocessIndexedXF(src.Read<u32>(), refarray);
else
LoadIndexedXF(src.Read<u32>(), refarray);
break;
case GX_CMD_CALL_DL:
{
if (src.size() < 8)
goto end;
u32 address = src.Read<u32>();
u32 count = src.Read<u32>();
case GX_CMD_CALL_DL:
{
if (src.size() < 8)
goto end;
u32 address = src.Read<u32>();
u32 count = src.Read<u32>();
if (in_display_list)
{
totalCycles += 6;
WARN_LOG(VIDEO,"recursive display list detected");
}
else
{
if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
totalCycles += 6 + InterpretDisplayList(address, count);
}
}
break;
if (in_display_list)
{
totalCycles += 6;
WARN_LOG(VIDEO, "recursive display list detected");
}
else
{
if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
totalCycles += 6 + InterpretDisplayList(address, count);
}
}
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
totalCycles += 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after
// that
totalCycles += 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
totalCycles += 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
totalCycles += 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG:
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (src.size() < 4)
goto end;
totalCycles += 12;
u32 bp_cmd = src.Read<u32>();
if (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
}
break;
case GX_LOAD_BP_REG:
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (src.size() < 4)
goto end;
totalCycles += 12;
u32 bp_cmd = src.Read<u32>();
if (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices
if (src.size() < 2)
goto end;
u16 num_vertices = src.Read<u16>();
int bytes = VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices,
src,
Fifo::WillSkipCurrentFrame(),
is_preprocess);
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices
if (src.size() < 2)
goto end;
u16 num_vertices = src.Read<u16>();
int bytes = VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src,
Fifo::WillSkipCurrentFrame(), is_preprocess);
if (bytes < 0)
goto end;
if (bytes < 0)
goto end;
src.Skip(bytes);
src.Skip(bytes);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
totalCycles += num_vertices * 4 * 3 + 6;
}
else
{
if (!s_bFifoErrorSeen)
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
ERROR_LOG(VIDEO, "FIFO: Unknown Opcode(0x%02x @ %p, preprocessing = %s)", cmd_byte, opcodeStart, is_preprocess ? "yes" : "no");
s_bFifoErrorSeen = true;
totalCycles += 1;
}
break;
}
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
totalCycles += num_vertices * 4 * 3 + 6;
}
else
{
if (!s_bFifoErrorSeen)
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
ERROR_LOG(VIDEO, "FIFO: Unknown Opcode(0x%02x @ %p, preprocessing = %s)", cmd_byte,
opcodeStart, is_preprocess ? "yes" : "no");
s_bFifoErrorSeen = true;
totalCycles += 1;
}
break;
}
// Display lists get added directly into the FIFO stream
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
{
u8* opcodeEnd;
opcodeEnd = src.GetPointer();
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
}
}
// Display lists get added directly into the FIFO stream
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
{
u8* opcodeEnd;
opcodeEnd = src.GetPointer();
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
}
}
end:
if (cycles)
{
*cycles = totalCycles;
}
return opcodeStart;
if (cycles)
{
*cycles = totalCycles;
}
return opcodeStart;
}
template u8* Run<true>(DataReader src, u32* cycles, bool in_display_list);
template u8* Run<false>(DataReader src, u32* cycles, bool in_display_list);
} // namespace OpcodeDecoder
} // namespace OpcodeDecoder

View file

@ -8,44 +8,43 @@
class DataReader;
#define GX_NOP 0x00
#define GX_UNKNOWN_RESET 0x01
#define GX_NOP 0x00
#define GX_UNKNOWN_RESET 0x01
#define GX_LOAD_BP_REG 0x61
#define GX_LOAD_CP_REG 0x08
#define GX_LOAD_XF_REG 0x10
#define GX_LOAD_INDX_A 0x20
#define GX_LOAD_INDX_B 0x28
#define GX_LOAD_INDX_C 0x30
#define GX_LOAD_INDX_D 0x38
#define GX_LOAD_BP_REG 0x61
#define GX_LOAD_CP_REG 0x08
#define GX_LOAD_XF_REG 0x10
#define GX_LOAD_INDX_A 0x20
#define GX_LOAD_INDX_B 0x28
#define GX_LOAD_INDX_C 0x30
#define GX_LOAD_INDX_D 0x38
#define GX_CMD_CALL_DL 0x40
#define GX_CMD_UNKNOWN_METRICS 0x44
#define GX_CMD_INVL_VC 0x48
#define GX_CMD_CALL_DL 0x40
#define GX_CMD_UNKNOWN_METRICS 0x44
#define GX_CMD_INVL_VC 0x48
#define GX_PRIMITIVE_MASK 0x78
#define GX_PRIMITIVE_SHIFT 3
#define GX_VAT_MASK 0x07
#define GX_PRIMITIVE_MASK 0x78
#define GX_PRIMITIVE_SHIFT 3
#define GX_VAT_MASK 0x07
// These values are the values extracted using GX_PRIMITIVE_MASK
// and GX_PRIMITIVE_SHIFT.
// GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS.
#define GX_DRAW_QUADS 0x0 // 0x80
#define GX_DRAW_QUADS_2 0x1 // 0x88
#define GX_DRAW_TRIANGLES 0x2 // 0x90
#define GX_DRAW_TRIANGLE_STRIP 0x3 // 0x98
#define GX_DRAW_TRIANGLE_FAN 0x4 // 0xA0
#define GX_DRAW_LINES 0x5 // 0xA8
#define GX_DRAW_LINE_STRIP 0x6 // 0xB0
#define GX_DRAW_POINTS 0x7 // 0xB8
#define GX_DRAW_QUADS 0x0 // 0x80
#define GX_DRAW_QUADS_2 0x1 // 0x88
#define GX_DRAW_TRIANGLES 0x2 // 0x90
#define GX_DRAW_TRIANGLE_STRIP 0x3 // 0x98
#define GX_DRAW_TRIANGLE_FAN 0x4 // 0xA0
#define GX_DRAW_LINES 0x5 // 0xA8
#define GX_DRAW_LINE_STRIP 0x6 // 0xB0
#define GX_DRAW_POINTS 0x7 // 0xB8
namespace OpcodeDecoder
{
void Init();
void Shutdown();
template <bool is_preprocess = false>
u8* Run(DataReader src, u32* cycles, bool in_display_list);
} // namespace OpcodeDecoder
} // namespace OpcodeDecoder

View file

@ -2,13 +2,13 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <memory>
#include "VideoCommon/PerfQueryBase.h"
#include <memory>
#include "VideoCommon/VideoConfig.h"
std::unique_ptr<PerfQueryBase> g_perf_query;
bool PerfQueryBase::ShouldEmulate()
{
return g_ActiveConfig.bPerfQueriesEnable;
return g_ActiveConfig.bPerfQueriesEnable;
}

View file

@ -9,61 +9,51 @@
enum PerfQueryType
{
PQ_ZCOMP_INPUT_ZCOMPLOC = 0,
PQ_ZCOMP_OUTPUT_ZCOMPLOC,
PQ_ZCOMP_INPUT,
PQ_ZCOMP_OUTPUT,
PQ_BLEND_INPUT,
PQ_EFB_COPY_CLOCKS,
PQ_NUM_MEMBERS
PQ_ZCOMP_INPUT_ZCOMPLOC = 0,
PQ_ZCOMP_OUTPUT_ZCOMPLOC,
PQ_ZCOMP_INPUT,
PQ_ZCOMP_OUTPUT,
PQ_BLEND_INPUT,
PQ_EFB_COPY_CLOCKS,
PQ_NUM_MEMBERS
};
enum PerfQueryGroup
{
PQG_ZCOMP_ZCOMPLOC,
PQG_ZCOMP,
PQG_EFB_COPY_CLOCKS,
PQG_NUM_MEMBERS,
PQG_ZCOMP_ZCOMPLOC,
PQG_ZCOMP,
PQG_EFB_COPY_CLOCKS,
PQG_NUM_MEMBERS,
};
class PerfQueryBase
{
public:
PerfQueryBase()
: m_query_count(0)
{
}
virtual ~PerfQueryBase() {}
// Checks if performance queries are enabled in the gameini configuration.
// NOTE: Called from CPU+GPU thread
static bool ShouldEmulate();
// Begin querying the specified value for the following host GPU commands
virtual void EnableQuery(PerfQueryGroup type) {}
// Stop querying the specified value for the following host GPU commands
virtual void DisableQuery(PerfQueryGroup type) {}
// Reset query counters to zero and drop any pending queries
virtual void ResetQuery() {}
// Return the measured value for the specified query type
// NOTE: Called from CPU thread
virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
// Request the value of any pending queries - causes a pipeline flush and thus should be used carefully!
virtual void FlushResults() {}
// True if there are no further pending query results
// NOTE: Called from CPU thread
virtual bool IsFlushed() const { return true; }
PerfQueryBase() : m_query_count(0) {}
virtual ~PerfQueryBase() {}
// Checks if performance queries are enabled in the gameini configuration.
// NOTE: Called from CPU+GPU thread
static bool ShouldEmulate();
// Begin querying the specified value for the following host GPU commands
virtual void EnableQuery(PerfQueryGroup type) {}
// Stop querying the specified value for the following host GPU commands
virtual void DisableQuery(PerfQueryGroup type) {}
// Reset query counters to zero and drop any pending queries
virtual void ResetQuery() {}
// Return the measured value for the specified query type
// NOTE: Called from CPU thread
virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
// Request the value of any pending queries - causes a pipeline flush and thus should be used
// carefully!
virtual void FlushResults() {}
// True if there are no further pending query results
// NOTE: Called from CPU thread
virtual bool IsFlushed() const { return true; }
protected:
// TODO: sloppy
volatile u32 m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
// TODO: sloppy
volatile u32 m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};
extern std::unique_ptr<PerfQueryBase> g_perf_query;

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
// http://www.nvidia.com/object/General_FAQ.html#t6 !!!!!
#include <atomic>
@ -23,81 +22,75 @@
namespace PixelEngine
{
union UPEZConfReg
{
u16 Hex;
struct
{
u16 ZCompEnable : 1; // Z Comparator Enable
u16 Function : 3;
u16 ZUpdEnable : 1;
u16 : 11;
};
union UPEZConfReg {
u16 Hex;
struct
{
u16 ZCompEnable : 1; // Z Comparator Enable
u16 Function : 3;
u16 ZUpdEnable : 1;
u16 : 11;
};
};
union UPEAlphaConfReg
{
u16 Hex;
struct
{
u16 BMMath : 1; // GX_BM_BLEND || GX_BM_SUBSTRACT
u16 BMLogic : 1; // GX_BM_LOGIC
u16 Dither : 1;
u16 ColorUpdEnable : 1;
u16 AlphaUpdEnable : 1;
u16 DstFactor : 3;
u16 SrcFactor : 3;
u16 Substract : 1; // Additive mode by default
u16 BlendOperator : 4;
};
union UPEAlphaConfReg {
u16 Hex;
struct
{
u16 BMMath : 1; // GX_BM_BLEND || GX_BM_SUBSTRACT
u16 BMLogic : 1; // GX_BM_LOGIC
u16 Dither : 1;
u16 ColorUpdEnable : 1;
u16 AlphaUpdEnable : 1;
u16 DstFactor : 3;
u16 SrcFactor : 3;
u16 Substract : 1; // Additive mode by default
u16 BlendOperator : 4;
};
};
union UPEDstAlphaConfReg
{
u16 Hex;
struct
{
u16 DstAlpha : 8;
u16 Enable : 1;
u16 : 7;
};
union UPEDstAlphaConfReg {
u16 Hex;
struct
{
u16 DstAlpha : 8;
u16 Enable : 1;
u16 : 7;
};
};
union UPEAlphaModeConfReg
{
u16 Hex;
struct
{
u16 Threshold : 8;
u16 CompareMode : 8;
};
union UPEAlphaModeConfReg {
u16 Hex;
struct
{
u16 Threshold : 8;
u16 CompareMode : 8;
};
};
// fifo Control Register
union UPECtrlReg
{
struct
{
u16 PETokenEnable : 1;
u16 PEFinishEnable : 1;
u16 PEToken : 1; // write only
u16 PEFinish : 1; // write only
u16 : 12;
};
u16 Hex;
UPECtrlReg() {Hex = 0; }
UPECtrlReg(u16 _hex) {Hex = _hex; }
union UPECtrlReg {
struct
{
u16 PETokenEnable : 1;
u16 PEFinishEnable : 1;
u16 PEToken : 1; // write only
u16 PEFinish : 1; // write only
u16 : 12;
};
u16 Hex;
UPECtrlReg() { Hex = 0; }
UPECtrlReg(u16 _hex) { Hex = _hex; }
};
// STATE_TO_SAVE
static UPEZConfReg m_ZConf;
static UPEAlphaConfReg m_AlphaConf;
static UPEDstAlphaConfReg m_DstAlphaConf;
static UPEZConfReg m_ZConf;
static UPEAlphaConfReg m_AlphaConf;
static UPEDstAlphaConfReg m_DstAlphaConf;
static UPEAlphaModeConfReg m_AlphaModeConf;
static UPEAlphaReadReg m_AlphaRead;
static UPECtrlReg m_Control;
//static u16 m_Token; // token value most recently encountered
static UPEAlphaReadReg m_AlphaRead;
static UPECtrlReg m_Control;
// static u16 m_Token; // token value most recently encountered
static std::atomic<u32> s_signal_token_interrupt;
static std::atomic<u32> s_signal_finish_interrupt;
@ -107,21 +100,21 @@ static int et_SetFinishOnMainThread;
enum
{
INT_CAUSE_PE_TOKEN = 0x200, // GP Token
INT_CAUSE_PE_FINISH = 0x400, // GP Finished
INT_CAUSE_PE_TOKEN = 0x200, // GP Token
INT_CAUSE_PE_FINISH = 0x400, // GP Finished
};
void DoState(PointerWrap &p)
void DoState(PointerWrap& p)
{
p.Do(m_ZConf);
p.Do(m_AlphaConf);
p.Do(m_DstAlphaConf);
p.Do(m_AlphaModeConf);
p.Do(m_AlphaRead);
p.DoPOD(m_Control);
p.Do(m_ZConf);
p.Do(m_AlphaConf);
p.Do(m_DstAlphaConf);
p.Do(m_AlphaModeConf);
p.Do(m_AlphaRead);
p.DoPOD(m_Control);
p.Do(s_signal_token_interrupt);
p.Do(s_signal_finish_interrupt);
p.Do(s_signal_token_interrupt);
p.Do(s_signal_finish_interrupt);
}
static void UpdateInterrupts();
@ -132,128 +125,118 @@ static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate);
void Init()
{
m_Control.Hex = 0;
m_ZConf.Hex = 0;
m_AlphaConf.Hex = 0;
m_DstAlphaConf.Hex = 0;
m_AlphaModeConf.Hex = 0;
m_AlphaRead.Hex = 0;
m_Control.Hex = 0;
m_ZConf.Hex = 0;
m_AlphaConf.Hex = 0;
m_DstAlphaConf.Hex = 0;
m_AlphaModeConf.Hex = 0;
m_AlphaRead.Hex = 0;
s_signal_token_interrupt.store(0);
s_signal_finish_interrupt.store(0);
s_signal_token_interrupt.store(0);
s_signal_finish_interrupt.store(0);
et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread);
et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread);
et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread);
et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread);
}
void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{
// Directly mapped registers.
struct {
u32 addr;
u16* ptr;
} directly_mapped_vars[] = {
{ PE_ZCONF, &m_ZConf.Hex },
{ PE_ALPHACONF, &m_AlphaConf.Hex },
{ PE_DSTALPHACONF, &m_DstAlphaConf.Hex },
{ PE_ALPHAMODE, &m_AlphaModeConf.Hex },
{ PE_ALPHAREAD, &m_AlphaRead.Hex },
};
for (auto& mapped_var : directly_mapped_vars)
{
mmio->Register(base | mapped_var.addr,
MMIO::DirectRead<u16>(mapped_var.ptr),
MMIO::DirectWrite<u16>(mapped_var.ptr)
);
}
// Directly mapped registers.
struct
{
u32 addr;
u16* ptr;
} directly_mapped_vars[] = {
{PE_ZCONF, &m_ZConf.Hex},
{PE_ALPHACONF, &m_AlphaConf.Hex},
{PE_DSTALPHACONF, &m_DstAlphaConf.Hex},
{PE_ALPHAMODE, &m_AlphaModeConf.Hex},
{PE_ALPHAREAD, &m_AlphaRead.Hex},
};
for (auto& mapped_var : directly_mapped_vars)
{
mmio->Register(base | mapped_var.addr, MMIO::DirectRead<u16>(mapped_var.ptr),
MMIO::DirectWrite<u16>(mapped_var.ptr));
}
// Performance queries registers: read only, need to call the video backend
// to get the results.
struct {
u32 addr;
PerfQueryType pqtype;
} pq_regs[] = {
{ PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L, PQ_ZCOMP_INPUT_ZCOMPLOC },
{ PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L, PQ_ZCOMP_OUTPUT_ZCOMPLOC },
{ PE_PERF_ZCOMP_INPUT_L, PQ_ZCOMP_INPUT },
{ PE_PERF_ZCOMP_OUTPUT_L, PQ_ZCOMP_OUTPUT },
{ PE_PERF_BLEND_INPUT_L, PQ_BLEND_INPUT },
{ PE_PERF_EFB_COPY_CLOCKS_L, PQ_EFB_COPY_CLOCKS },
};
for (auto& pq_reg : pq_regs)
{
mmio->Register(base | pq_reg.addr,
MMIO::ComplexRead<u16>([pq_reg](u32) {
return g_video_backend->Video_GetQueryResult(pq_reg.pqtype) & 0xFFFF;
}),
MMIO::InvalidWrite<u16>()
);
mmio->Register(base | (pq_reg.addr + 2),
MMIO::ComplexRead<u16>([pq_reg](u32) {
return g_video_backend->Video_GetQueryResult(pq_reg.pqtype) >> 16;
}),
MMIO::InvalidWrite<u16>()
);
}
// Performance queries registers: read only, need to call the video backend
// to get the results.
struct
{
u32 addr;
PerfQueryType pqtype;
} pq_regs[] = {
{PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L, PQ_ZCOMP_INPUT_ZCOMPLOC},
{PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L, PQ_ZCOMP_OUTPUT_ZCOMPLOC},
{PE_PERF_ZCOMP_INPUT_L, PQ_ZCOMP_INPUT},
{PE_PERF_ZCOMP_OUTPUT_L, PQ_ZCOMP_OUTPUT},
{PE_PERF_BLEND_INPUT_L, PQ_BLEND_INPUT},
{PE_PERF_EFB_COPY_CLOCKS_L, PQ_EFB_COPY_CLOCKS},
};
for (auto& pq_reg : pq_regs)
{
mmio->Register(base | pq_reg.addr, MMIO::ComplexRead<u16>([pq_reg](u32) {
return g_video_backend->Video_GetQueryResult(pq_reg.pqtype) & 0xFFFF;
}),
MMIO::InvalidWrite<u16>());
mmio->Register(base | (pq_reg.addr + 2), MMIO::ComplexRead<u16>([pq_reg](u32) {
return g_video_backend->Video_GetQueryResult(pq_reg.pqtype) >> 16;
}),
MMIO::InvalidWrite<u16>());
}
// Control register
mmio->Register(base | PE_CTRL_REGISTER,
MMIO::DirectRead<u16>(&m_Control.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UPECtrlReg tmpCtrl(val);
// Control register
mmio->Register(base | PE_CTRL_REGISTER, MMIO::DirectRead<u16>(&m_Control.Hex),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
UPECtrlReg tmpCtrl(val);
if (tmpCtrl.PEToken)
s_signal_token_interrupt.store(0);
if (tmpCtrl.PEToken)
s_signal_token_interrupt.store(0);
if (tmpCtrl.PEFinish)
s_signal_finish_interrupt.store(0);
if (tmpCtrl.PEFinish)
s_signal_finish_interrupt.store(0);
m_Control.PETokenEnable = tmpCtrl.PETokenEnable;
m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable;
m_Control.PEToken = 0; // this flag is write only
m_Control.PEFinish = 0; // this flag is write only
m_Control.PETokenEnable = tmpCtrl.PETokenEnable;
m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable;
m_Control.PEToken = 0; // this flag is write only
m_Control.PEFinish = 0; // this flag is write only
DEBUG_LOG(PIXELENGINE, "(w16) CTRL_REGISTER: 0x%04x", val);
UpdateInterrupts();
})
);
DEBUG_LOG(PIXELENGINE, "(w16) CTRL_REGISTER: 0x%04x", val);
UpdateInterrupts();
}));
// Token register, readonly.
mmio->Register(base | PE_TOKEN_REG,
MMIO::DirectRead<u16>(&CommandProcessor::fifo.PEToken),
MMIO::InvalidWrite<u16>()
);
// Token register, readonly.
mmio->Register(base | PE_TOKEN_REG, MMIO::DirectRead<u16>(&CommandProcessor::fifo.PEToken),
MMIO::InvalidWrite<u16>());
// BBOX registers, readonly and need to update a flag.
for (int i = 0; i < 4; ++i)
{
mmio->Register(base | (PE_BBOX_LEFT + 2 * i),
MMIO::ComplexRead<u16>([i](u32) {
BoundingBox::active = false;
return g_video_backend->Video_GetBoundingBox(i);
}),
MMIO::InvalidWrite<u16>()
);
}
// BBOX registers, readonly and need to update a flag.
for (int i = 0; i < 4; ++i)
{
mmio->Register(base | (PE_BBOX_LEFT + 2 * i), MMIO::ComplexRead<u16>([i](u32) {
BoundingBox::active = false;
return g_video_backend->Video_GetBoundingBox(i);
}),
MMIO::InvalidWrite<u16>());
}
}
static void UpdateInterrupts()
{
// check if there is a token-interrupt
UpdateTokenInterrupt((s_signal_token_interrupt.load() & m_Control.PETokenEnable) != 0);
// check if there is a token-interrupt
UpdateTokenInterrupt((s_signal_token_interrupt.load() & m_Control.PETokenEnable) != 0);
// check if there is a finish-interrupt
UpdateFinishInterrupt((s_signal_finish_interrupt.load() & m_Control.PEFinishEnable) != 0);
// check if there is a finish-interrupt
UpdateFinishInterrupt((s_signal_finish_interrupt.load() & m_Control.PEFinishEnable) != 0);
}
static void UpdateTokenInterrupt(bool active)
{
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_TOKEN, active);
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_TOKEN, active);
}
static void UpdateFinishInterrupt(bool active)
{
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_FINISH, active);
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_FINISH, active);
}
// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate).
@ -263,62 +246,64 @@ static void UpdateFinishInterrupt(bool active)
// Called only if BPMEM_PE_TOKEN_INT_ID is ack by GP
static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate)
{
// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
// That's what we've always done. We're counting on fifo.PEToken to be
// 4-byte padded.
Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, userdata & 0xffff);
INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", CommandProcessor::fifo.PEToken);
if (userdata >> 16)
{
s_signal_token_interrupt.store(1);
UpdateInterrupts();
}
CommandProcessor::SetInterruptTokenWaiting(false);
// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
// That's what we've always done. We're counting on fifo.PEToken to be
// 4-byte padded.
Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, userdata & 0xffff);
INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)",
CommandProcessor::fifo.PEToken);
if (userdata >> 16)
{
s_signal_token_interrupt.store(1);
UpdateInterrupts();
}
CommandProcessor::SetInterruptTokenWaiting(false);
}
static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate)
{
s_signal_finish_interrupt.store(1);
UpdateInterrupts();
CommandProcessor::SetInterruptFinishWaiting(false);
s_signal_finish_interrupt.store(1);
UpdateInterrupts();
CommandProcessor::SetInterruptFinishWaiting(false);
Core::FrameUpdateOnCPUThread();
Core::FrameUpdateOnCPUThread();
}
// SetToken
// THIS IS EXECUTED FROM VIDEO THREAD
void SetToken(const u16 _token, const int _bSetTokenAcknowledge)
{
if (_bSetTokenAcknowledge) // set token INT
{
s_signal_token_interrupt.store(1);
}
if (_bSetTokenAcknowledge) // set token INT
{
s_signal_token_interrupt.store(1);
}
CommandProcessor::SetInterruptTokenWaiting(true);
CommandProcessor::SetInterruptTokenWaiting(true);
if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
else
CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
else
CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenOnMainThread,
_token | (_bSetTokenAcknowledge << 16));
}
// SetFinish
// THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn
void SetFinish()
{
CommandProcessor::SetInterruptFinishWaiting(true);
CommandProcessor::SetInterruptFinishWaiting(true);
if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent(0, et_SetFinishOnMainThread, 0);
else
CoreTiming::ScheduleEvent_Threadsafe(0, et_SetFinishOnMainThread, 0);
if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread())
CoreTiming::ScheduleEvent(0, et_SetFinishOnMainThread, 0);
else
CoreTiming::ScheduleEvent_Threadsafe(0, et_SetFinishOnMainThread, 0);
INFO_LOG(PIXELENGINE, "VIDEO Set Finish");
INFO_LOG(PIXELENGINE, "VIDEO Set Finish");
}
UPEAlphaReadReg GetAlphaReadMode()
{
return m_AlphaRead;
return m_AlphaRead;
}
} // end of namespace PixelEngine
} // end of namespace PixelEngine

View file

@ -6,55 +6,57 @@
#include "Common/CommonTypes.h"
class PointerWrap;
namespace MMIO { class Mapping; }
namespace MMIO
{
class Mapping;
}
// internal hardware addresses
enum
{
PE_ZCONF = 0x00, // Z Config
PE_ALPHACONF = 0x02, // Alpha Config
PE_DSTALPHACONF = 0x04, // Destination Alpha Config
PE_ALPHAMODE = 0x06, // Alpha Mode Config
PE_ALPHAREAD = 0x08, // Alpha Read
PE_CTRL_REGISTER = 0x0a, // Control
PE_TOKEN_REG = 0x0e, // Token
PE_BBOX_LEFT = 0x10, // Bounding Box Left Pixel
PE_BBOX_RIGHT = 0x12, // Bounding Box Right Pixel
PE_BBOX_TOP = 0x14, // Bounding Box Top Pixel
PE_BBOX_BOTTOM = 0x16, // Bounding Box Bottom Pixel
PE_ZCONF = 0x00, // Z Config
PE_ALPHACONF = 0x02, // Alpha Config
PE_DSTALPHACONF = 0x04, // Destination Alpha Config
PE_ALPHAMODE = 0x06, // Alpha Mode Config
PE_ALPHAREAD = 0x08, // Alpha Read
PE_CTRL_REGISTER = 0x0a, // Control
PE_TOKEN_REG = 0x0e, // Token
PE_BBOX_LEFT = 0x10, // Bounding Box Left Pixel
PE_BBOX_RIGHT = 0x12, // Bounding Box Right Pixel
PE_BBOX_TOP = 0x14, // Bounding Box Top Pixel
PE_BBOX_BOTTOM = 0x16, // Bounding Box Bottom Pixel
// NOTE: Order not verified
// These indicate the number of quads that are being used as input/output for each particular stage
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18,
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e,
PE_PERF_ZCOMP_INPUT_L = 0x20,
PE_PERF_ZCOMP_INPUT_H = 0x22,
PE_PERF_ZCOMP_OUTPUT_L = 0x24,
PE_PERF_ZCOMP_OUTPUT_H = 0x26,
PE_PERF_BLEND_INPUT_L = 0x28,
PE_PERF_BLEND_INPUT_H = 0x2a,
PE_PERF_EFB_COPY_CLOCKS_L = 0x2c,
PE_PERF_EFB_COPY_CLOCKS_H = 0x2e,
// NOTE: Order not verified
// These indicate the number of quads that are being used as input/output for each particular
// stage
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18,
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e,
PE_PERF_ZCOMP_INPUT_L = 0x20,
PE_PERF_ZCOMP_INPUT_H = 0x22,
PE_PERF_ZCOMP_OUTPUT_L = 0x24,
PE_PERF_ZCOMP_OUTPUT_H = 0x26,
PE_PERF_BLEND_INPUT_L = 0x28,
PE_PERF_BLEND_INPUT_H = 0x2a,
PE_PERF_EFB_COPY_CLOCKS_L = 0x2c,
PE_PERF_EFB_COPY_CLOCKS_H = 0x2e,
};
namespace PixelEngine
{
// ReadMode specifies the returned alpha channel for EFB peeks
union UPEAlphaReadReg
{
u16 Hex;
struct
{
u16 ReadMode : 2;
u16 : 14;
};
union UPEAlphaReadReg {
u16 Hex;
struct
{
u16 ReadMode : 2;
u16 : 14;
};
};
void Init();
void DoState(PointerWrap &p);
void DoState(PointerWrap& p);
void RegisterMMIO(MMIO::Mapping* mmio, u32 base);
@ -63,4 +65,4 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge);
void SetFinish();
UPEAlphaReadReg GetAlphaReadMode();
} // end of namespace PixelEngine
} // end of namespace PixelEngine

File diff suppressed because it is too large Load diff

View file

@ -12,104 +12,133 @@
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
#pragma pack(1)
struct pixel_shader_uid_data
{
// TODO: Optimize field order for easy access!
// TODO: Optimize field order for easy access!
u32 num_values; // TODO: Shouldn't be a u32
u32 NumValues() const { return num_values; }
u32 num_values; // TODO: Shouldn't be a u32
u32 NumValues() const { return num_values; }
u32 components : 23;
u32 dstAlphaMode : 2;
u32 Pretest : 2;
u32 nIndirectStagesUsed : 4;
u32 stereo : 1;
u32 components : 23;
u32 dstAlphaMode : 2;
u32 Pretest : 2;
u32 nIndirectStagesUsed : 4;
u32 stereo : 1;
u32 genMode_numtexgens : 4;
u32 genMode_numtevstages : 4;
u32 genMode_numindstages : 3;
u32 alpha_test_comp0 : 3;
u32 alpha_test_comp1 : 3;
u32 alpha_test_logic : 2;
u32 alpha_test_use_zcomploc_hack : 1;
u32 fog_proj : 1;
u32 fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1;
u32 ztex_op : 2;
u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1;
u32 forced_early_z : 1;
u32 early_ztest : 1;
u32 bounding_box : 1;
u32 genMode_numtexgens : 4;
u32 genMode_numtevstages : 4;
u32 genMode_numindstages : 3;
u32 alpha_test_comp0 : 3;
u32 alpha_test_comp1 : 3;
u32 alpha_test_logic : 2;
u32 alpha_test_use_zcomploc_hack : 1;
u32 fog_proj : 1;
u32 fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1;
u32 ztex_op : 2;
u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1;
u32 forced_early_z : 1;
u32 early_ztest : 1;
u32 bounding_box : 1;
// TODO: 29 bits of padding is a waste. Can we free up some bits elseware?
u32 zfreeze : 1;
u32 msaa : 1;
u32 ssaa : 1;
u32 pad : 29;
// TODO: 29 bits of padding is a waste. Can we free up some bits elseware?
u32 zfreeze : 1;
u32 msaa : 1;
u32 ssaa : 1;
u32 pad : 29;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
if (index == 0)
{
tevindref_bc0 = texcoord;
tevindref_bi0 = texmap;
}
else if (index == 1)
{
tevindref_bc1 = texcoord;
tevindref_bi1 = texmap;
}
else if (index == 2)
{
tevindref_bc3 = texcoord;
tevindref_bi2 = texmap;
}
else if (index == 3)
{
tevindref_bc4 = texcoord;
tevindref_bi4 = texmap;
}
}
inline void SetTevindrefTexmap(int index, u32 texmap)
{
if (index == 0)
{
tevindref_bi0 = texmap;
}
else if (index == 1)
{
tevindref_bi1 = texmap;
}
else if (index == 2)
{
tevindref_bi2 = texmap;
}
else if (index == 3)
{
tevindref_bi4 = texmap;
}
}
inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; }
}
inline void SetTevindrefTexmap(int index, u32 texmap)
{
if (index == 0) { tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bi4 = texmap; }
}
struct
{
// TODO: Can save a lot space by removing the padding bits
u32 cc : 24;
u32 ac : 24;
struct {
// TODO: Can save a lot space by removing the padding bits
u32 cc : 24;
u32 ac : 24;
u32 tevorders_texmap : 3;
u32 tevorders_texcoord : 3;
u32 tevorders_enable : 1;
u32 tevorders_colorchan : 3;
u32 pad1 : 6;
u32 tevorders_texmap : 3;
u32 tevorders_texcoord : 3;
u32 tevorders_enable : 1;
u32 tevorders_colorchan : 3;
u32 pad1 : 6;
// TODO: Clean up the swapXY mess
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2;
u32 tevksel_swap2a : 2;
u32 tevksel_swap1b : 2;
u32 tevksel_swap2b : 2;
u32 pad2 : 2;
// TODO: Clean up the swapXY mess
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2;
u32 tevksel_swap2a : 2;
u32 tevksel_swap1b : 2;
u32 tevksel_swap2b : 2;
u32 pad2 : 2;
u32 tevksel_swap1c : 2;
u32 tevksel_swap2c : 2;
u32 tevksel_swap1d : 2;
u32 tevksel_swap2d : 2;
u32 tevksel_kc : 5;
u32 tevksel_ka : 5;
u32 pad3 : 14;
} stagehash[16];
u32 tevksel_swap1c : 2;
u32 tevksel_swap2c : 2;
u32 tevksel_swap1d : 2;
u32 tevksel_swap2d : 2;
u32 tevksel_kc : 5;
u32 tevksel_ka : 5;
u32 pad3 : 14;
} stagehash[16];
// TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
LightingUidData lighting;
// TODO: I think we're fine without an enablePixelLighting field, should probably double check,
// though..
LightingUidData lighting;
};
#pragma pack()

View file

@ -21,280 +21,281 @@ bool PixelShaderManager::dirty;
void PixelShaderManager::Init()
{
memset(&constants, 0, sizeof(constants));
memset(&constants, 0, sizeof(constants));
// Init any intial constants which aren't zero when bpmem is zero.
s_bFogRangeAdjustChanged = true;
s_bViewPortChanged = false;
// Init any intial constants which aren't zero when bpmem is zero.
s_bFogRangeAdjustChanged = true;
s_bViewPortChanged = false;
SetEfbScaleChanged();
SetIndMatrixChanged(0);
SetIndMatrixChanged(1);
SetIndMatrixChanged(2);
SetZTextureTypeChanged();
SetTexCoordChanged(0);
SetTexCoordChanged(1);
SetTexCoordChanged(2);
SetTexCoordChanged(3);
SetTexCoordChanged(4);
SetTexCoordChanged(5);
SetTexCoordChanged(6);
SetTexCoordChanged(7);
SetEfbScaleChanged();
SetIndMatrixChanged(0);
SetIndMatrixChanged(1);
SetIndMatrixChanged(2);
SetZTextureTypeChanged();
SetTexCoordChanged(0);
SetTexCoordChanged(1);
SetTexCoordChanged(2);
SetTexCoordChanged(3);
SetTexCoordChanged(4);
SetTexCoordChanged(5);
SetTexCoordChanged(6);
SetTexCoordChanged(7);
dirty = true;
dirty = true;
}
void PixelShaderManager::Dirty()
{
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_bFogRangeAdjustChanged = true;
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_bFogRangeAdjustChanged = true;
SetEfbScaleChanged();
SetFogParamChanged();
SetEfbScaleChanged();
SetFogParamChanged();
dirty = true;
dirty = true;
}
void PixelShaderManager::Shutdown()
{
}
void PixelShaderManager::SetConstants()
{
if (s_bFogRangeAdjustChanged)
{
// set by two components, so keep changed flag here
// TODO: try to split both registers and move this logic to the shader
if (!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{
//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
int center = ((u32)bpmem.fogRange.Base.Center) - 342;
// normalize center to make calculations easy
float ScreenSpaceCenter = center / (2.0f * xfmem.viewport.wd);
ScreenSpaceCenter = (ScreenSpaceCenter * 2.0f) - 1.0f;
//bpmem.fogRange.K seems to be a table of precalculated coefficients for the adjust factor
//observations: bpmem.fogRange.K[0].LO appears to be the lowest value and bpmem.fogRange.K[4].HI the largest
// they always seems to be larger than 256 so my theory is :
// they are the coefficients from the center to the border of the screen
// so to simplify I use the hi coefficient as K in the shader taking 256 as the scale
// TODO: Shouldn't this be EFBToScaledXf?
constants.fogf[0][0] = ScreenSpaceCenter;
constants.fogf[0][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfmem.viewport.wd));
constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f;
}
else
{
constants.fogf[0][0] = 0;
constants.fogf[0][1] = 1;
constants.fogf[0][2] = 1;
}
dirty = true;
if (s_bFogRangeAdjustChanged)
{
// set by two components, so keep changed flag here
// TODO: try to split both registers and move this logic to the shader
if (!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{
// bpmem.fogRange.Base.Center : center of the viewport in x axis. observation:
// bpmem.fogRange.Base.Center = realcenter + 342;
int center = ((u32)bpmem.fogRange.Base.Center) - 342;
// normalize center to make calculations easy
float ScreenSpaceCenter = center / (2.0f * xfmem.viewport.wd);
ScreenSpaceCenter = (ScreenSpaceCenter * 2.0f) - 1.0f;
// bpmem.fogRange.K seems to be a table of precalculated coefficients for the adjust factor
// observations: bpmem.fogRange.K[0].LO appears to be the lowest value and
// bpmem.fogRange.K[4].HI the largest
// they always seems to be larger than 256 so my theory is :
// they are the coefficients from the center to the border of the screen
// so to simplify I use the hi coefficient as K in the shader taking 256 as the scale
// TODO: Shouldn't this be EFBToScaledXf?
constants.fogf[0][0] = ScreenSpaceCenter;
constants.fogf[0][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfmem.viewport.wd));
constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f;
}
else
{
constants.fogf[0][0] = 0;
constants.fogf[0][1] = 1;
constants.fogf[0][2] = 1;
}
dirty = true;
s_bFogRangeAdjustChanged = false;
}
s_bFogRangeAdjustChanged = false;
}
if (s_bViewPortChanged)
{
constants.zbias[1][0] = (u32)xfmem.viewport.farZ;
constants.zbias[1][1] = (u32)xfmem.viewport.zRange;
dirty = true;
s_bViewPortChanged = false;
}
if (s_bViewPortChanged)
{
constants.zbias[1][0] = (u32)xfmem.viewport.farZ;
constants.zbias[1][1] = (u32)xfmem.viewport.zRange;
dirty = true;
s_bViewPortChanged = false;
}
}
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
{
auto& c = constants.colors[index];
c[component] = value;
dirty = true;
auto& c = constants.colors[index];
c[component] = value;
dirty = true;
PRIM_LOG("tev color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
PRIM_LOG("tev color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
}
void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value)
{
auto& c = constants.kcolors[index];
c[component] = value;
dirty = true;
auto& c = constants.kcolors[index];
c[component] = value;
dirty = true;
PRIM_LOG("tev konst color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
PRIM_LOG("tev konst color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
}
void PixelShaderManager::SetAlpha()
{
constants.alpha[0] = bpmem.alpha_test.ref0;
constants.alpha[1] = bpmem.alpha_test.ref1;
dirty = true;
constants.alpha[0] = bpmem.alpha_test.ref0;
constants.alpha[1] = bpmem.alpha_test.ref1;
dirty = true;
}
void PixelShaderManager::SetDestAlpha()
{
constants.alpha[3] = bpmem.dstalpha.alpha;
dirty = true;
constants.alpha[3] = bpmem.dstalpha.alpha;
dirty = true;
}
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height)
{
float rwidth = 1.0f / (width * 128.0f);
float rheight = 1.0f / (height * 128.0f);
float rwidth = 1.0f / (width * 128.0f);
float rheight = 1.0f / (height * 128.0f);
// TODO: move this check out to callee. There we could just call this function on texture changes
// or better, use textureSize() in glsl
if (constants.texdims[texmapid][0] != rwidth || constants.texdims[texmapid][1] != rheight)
dirty = true;
// TODO: move this check out to callee. There we could just call this function on texture changes
// or better, use textureSize() in glsl
if (constants.texdims[texmapid][0] != rwidth || constants.texdims[texmapid][1] != rheight)
dirty = true;
constants.texdims[texmapid][0] = rwidth;
constants.texdims[texmapid][1] = rheight;
constants.texdims[texmapid][0] = rwidth;
constants.texdims[texmapid][1] = rheight;
}
void PixelShaderManager::SetZTextureBias()
{
constants.zbias[1][3] = bpmem.ztex1.bias;
dirty = true;
constants.zbias[1][3] = bpmem.ztex1.bias;
dirty = true;
}
void PixelShaderManager::SetViewportChanged()
{
s_bViewPortChanged = true;
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
s_bViewPortChanged = true;
s_bFogRangeAdjustChanged =
true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
}
void PixelShaderManager::SetEfbScaleChanged()
{
constants.efbscale[0] = 1.0f / Renderer::EFBToScaledXf(1);
constants.efbscale[1] = 1.0f / Renderer::EFBToScaledYf(1);
dirty = true;
constants.efbscale[0] = 1.0f / Renderer::EFBToScaledXf(1);
constants.efbscale[1] = 1.0f / Renderer::EFBToScaledYf(1);
dirty = true;
}
void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0)
{
constants.zslope[0] = dfdx;
constants.zslope[1] = dfdy;
constants.zslope[2] = f0;
dirty = true;
constants.zslope[0] = dfdx;
constants.zslope[1] = dfdy;
constants.zslope[2] = f0;
dirty = true;
}
void PixelShaderManager::SetIndTexScaleChanged(bool high)
{
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
constants.indtexscale[high][1] = bpmem.texscale[high].ts0;
constants.indtexscale[high][2] = bpmem.texscale[high].ss1;
constants.indtexscale[high][3] = bpmem.texscale[high].ts1;
dirty = true;
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
constants.indtexscale[high][1] = bpmem.texscale[high].ts0;
constants.indtexscale[high][2] = bpmem.texscale[high].ss1;
constants.indtexscale[high][3] = bpmem.texscale[high].ts1;
dirty = true;
}
void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
{
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
// xyz - static matrix
// w - dynamic matrix scale / 128
constants.indtexmtx[2*matrixidx ][0] = bpmem.indmtx[matrixidx].col0.ma;
constants.indtexmtx[2*matrixidx ][1] = bpmem.indmtx[matrixidx].col1.mc;
constants.indtexmtx[2*matrixidx ][2] = bpmem.indmtx[matrixidx].col2.me;
constants.indtexmtx[2*matrixidx ][3] = 17 - scale;
constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb;
constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md;
constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf;
constants.indtexmtx[2*matrixidx+1][3] = 17 - scale;
dirty = true;
PRIM_LOG("indmtx%d: scale=%d, mat=(%d %d %d; %d %d %d)\n",
matrixidx, scale,
bpmem.indmtx[matrixidx].col0.ma, bpmem.indmtx[matrixidx].col1.mc, bpmem.indmtx[matrixidx].col2.me,
bpmem.indmtx[matrixidx].col0.mb, bpmem.indmtx[matrixidx].col1.md, bpmem.indmtx[matrixidx].col2.mf);
// xyz - static matrix
// w - dynamic matrix scale / 128
constants.indtexmtx[2 * matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma;
constants.indtexmtx[2 * matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc;
constants.indtexmtx[2 * matrixidx][2] = bpmem.indmtx[matrixidx].col2.me;
constants.indtexmtx[2 * matrixidx][3] = 17 - scale;
constants.indtexmtx[2 * matrixidx + 1][0] = bpmem.indmtx[matrixidx].col0.mb;
constants.indtexmtx[2 * matrixidx + 1][1] = bpmem.indmtx[matrixidx].col1.md;
constants.indtexmtx[2 * matrixidx + 1][2] = bpmem.indmtx[matrixidx].col2.mf;
constants.indtexmtx[2 * matrixidx + 1][3] = 17 - scale;
dirty = true;
PRIM_LOG("indmtx%d: scale=%d, mat=(%d %d %d; %d %d %d)\n", matrixidx, scale,
bpmem.indmtx[matrixidx].col0.ma, bpmem.indmtx[matrixidx].col1.mc,
bpmem.indmtx[matrixidx].col2.me, bpmem.indmtx[matrixidx].col0.mb,
bpmem.indmtx[matrixidx].col1.md, bpmem.indmtx[matrixidx].col2.mf);
}
void PixelShaderManager::SetZTextureTypeChanged()
{
switch (bpmem.ztex2.type)
{
case TEV_ZTEX_TYPE_U8:
constants.zbias[0][0] = 0;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
constants.zbias[0][3] = 1;
break;
case TEV_ZTEX_TYPE_U16:
constants.zbias[0][0] = 1;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
constants.zbias[0][3] = 256;
break;
case TEV_ZTEX_TYPE_U24:
constants.zbias[0][0] = 65536;
constants.zbias[0][1] = 256;
constants.zbias[0][2] = 1;
constants.zbias[0][3] = 0;
break;
default:
break;
}
dirty = true;
switch (bpmem.ztex2.type)
{
case TEV_ZTEX_TYPE_U8:
constants.zbias[0][0] = 0;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
constants.zbias[0][3] = 1;
break;
case TEV_ZTEX_TYPE_U16:
constants.zbias[0][0] = 1;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
constants.zbias[0][3] = 256;
break;
case TEV_ZTEX_TYPE_U24:
constants.zbias[0][0] = 65536;
constants.zbias[0][1] = 256;
constants.zbias[0][2] = 1;
constants.zbias[0][3] = 0;
break;
default:
break;
}
dirty = true;
}
void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
{
TCoordInfo& tc = bpmem.texcoords[texmapid];
constants.texdims[texmapid][2] = (float)(tc.s.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][3] = (float)(tc.t.scale_minus_1 + 1) * 128.0f;
dirty = true;
TCoordInfo& tc = bpmem.texcoords[texmapid];
constants.texdims[texmapid][2] = (float)(tc.s.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][3] = (float)(tc.t.scale_minus_1 + 1) * 128.0f;
dirty = true;
}
void PixelShaderManager::SetFogColorChanged()
{
if (g_ActiveConfig.bDisableFog)
return;
if (g_ActiveConfig.bDisableFog)
return;
constants.fogcolor[0] = bpmem.fog.color.r;
constants.fogcolor[1] = bpmem.fog.color.g;
constants.fogcolor[2] = bpmem.fog.color.b;
dirty = true;
constants.fogcolor[0] = bpmem.fog.color.r;
constants.fogcolor[1] = bpmem.fog.color.g;
constants.fogcolor[2] = bpmem.fog.color.b;
dirty = true;
}
void PixelShaderManager::SetFogParamChanged()
{
if (!g_ActiveConfig.bDisableFog)
{
constants.fogf[1][0] = bpmem.fog.a.GetA();
constants.fogi[1] = bpmem.fog.b_magnitude;
constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC();
constants.fogi[3] = bpmem.fog.b_shift;
}
else
{
constants.fogf[1][0] = 0.f;
constants.fogi[1] = 1;
constants.fogf[1][2] = 0.f;
constants.fogi[3] = 1;
}
dirty = true;
if (!g_ActiveConfig.bDisableFog)
{
constants.fogf[1][0] = bpmem.fog.a.GetA();
constants.fogi[1] = bpmem.fog.b_magnitude;
constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC();
constants.fogi[3] = bpmem.fog.b_shift;
}
else
{
constants.fogf[1][0] = 0.f;
constants.fogi[1] = 1;
constants.fogf[1][2] = 0.f;
constants.fogi[3] = 1;
}
dirty = true;
}
void PixelShaderManager::SetFogRangeAdjustChanged()
{
if (g_ActiveConfig.bDisableFog)
return;
if (g_ActiveConfig.bDisableFog)
return;
s_bFogRangeAdjustChanged = true;
s_bFogRangeAdjustChanged = true;
}
void PixelShaderManager::DoState(PointerWrap &p)
void PixelShaderManager::DoState(PointerWrap& p)
{
p.Do(s_bFogRangeAdjustChanged);
p.Do(s_bViewPortChanged);
p.Do(s_bFogRangeAdjustChanged);
p.Do(s_bViewPortChanged);
p.Do(constants);
p.Do(constants);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}
}

View file

@ -13,36 +13,36 @@ class PointerWrap;
class PixelShaderManager
{
public:
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap &p);
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap& p);
static void SetConstants(); // sets pixel shader constants
static void SetConstants(); // sets pixel shader constants
// constant management
// Some of these functions grab the constant values from global state,
// so make sure to call them after memory is committed
static void SetTevColor(int index, int component, s32 value);
static void SetTevKonstColor(int index, int component, s32 value);
static void SetAlpha();
static void SetDestAlpha();
static void SetTexDims(int texmapid, u32 width, u32 height);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged();
static void SetZSlope(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetZTextureTypeChanged();
static void SetIndTexScaleChanged(bool high);
static void SetTexCoordChanged(u8 texmapid);
static void SetFogColorChanged();
static void SetFogParamChanged();
static void SetFogRangeAdjustChanged();
// constant management
// Some of these functions grab the constant values from global state,
// so make sure to call them after memory is committed
static void SetTevColor(int index, int component, s32 value);
static void SetTevKonstColor(int index, int component, s32 value);
static void SetAlpha();
static void SetDestAlpha();
static void SetTexDims(int texmapid, u32 width, u32 height);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged();
static void SetZSlope(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetZTextureTypeChanged();
static void SetIndTexScaleChanged(bool high);
static void SetTexCoordChanged(u8 texmapid);
static void SetFogColorChanged();
static void SetFogParamChanged();
static void SetFogRangeAdjustChanged();
static PixelShaderConstants constants;
static bool dirty;
static PixelShaderConstants constants;
static bool dirty;
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
};

View file

@ -9,316 +9,316 @@
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/IniFile.h"
#include "Common/StringUtil.h"
#include "Common/Logging/Log.h"
#include "Common/StringUtil.h"
#include "VideoCommon/PostProcessing.h"
#include "VideoCommon/VideoConfig.h"
static const char s_default_shader[] = "void main() { SetOutput(Sample()); }\n";
PostProcessingShaderImplementation::PostProcessingShaderImplementation()
{
m_timer.Start();
m_timer.Start();
}
PostProcessingShaderImplementation::~PostProcessingShaderImplementation()
{
m_timer.Stop();
m_timer.Stop();
}
std::string PostProcessingShaderConfiguration::LoadShader(std::string shader)
{
// Load the shader from the configuration if there isn't one sent to us.
if (shader == "")
shader = g_ActiveConfig.sPostProcessingShader;
m_current_shader = shader;
// Load the shader from the configuration if there isn't one sent to us.
if (shader == "")
shader = g_ActiveConfig.sPostProcessingShader;
m_current_shader = shader;
const std::string sub_dir = (g_Config.iStereoMode == STEREO_ANAGLYPH) ? ANAGLYPH_DIR DIR_SEP : "";
const std::string sub_dir = (g_Config.iStereoMode == STEREO_ANAGLYPH) ? ANAGLYPH_DIR DIR_SEP : "";
// loading shader code
std::string code;
std::string path = File::GetUserPath(D_SHADERS_IDX) + sub_dir + shader + ".glsl";
// loading shader code
std::string code;
std::string path = File::GetUserPath(D_SHADERS_IDX) + sub_dir + shader + ".glsl";
if (shader == "")
{
code = s_default_shader;
}
else
{
if (!File::Exists(path))
{
// Fallback to shared user dir
path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl";
}
if (shader == "")
{
code = s_default_shader;
}
else
{
if (!File::Exists(path))
{
// Fallback to shared user dir
path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl";
}
if (!File::ReadFileToString(path, code))
{
ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str());
code = s_default_shader;
}
}
if (!File::ReadFileToString(path, code))
{
ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str());
code = s_default_shader;
}
}
LoadOptions(code);
LoadOptionsConfiguration();
LoadOptions(code);
LoadOptionsConfiguration();
return code;
return code;
}
void PostProcessingShaderConfiguration::LoadOptions(const std::string& code)
{
const std::string config_start_delimiter = "[configuration]";
const std::string config_end_delimiter = "[/configuration]";
size_t configuration_start = code.find(config_start_delimiter);
size_t configuration_end = code.find(config_end_delimiter);
const std::string config_start_delimiter = "[configuration]";
const std::string config_end_delimiter = "[/configuration]";
size_t configuration_start = code.find(config_start_delimiter);
size_t configuration_end = code.find(config_end_delimiter);
m_options.clear();
m_any_options_dirty = true;
m_options.clear();
m_any_options_dirty = true;
if (configuration_start == std::string::npos ||
configuration_end == std::string::npos)
{
// Issue loading configuration or there isn't one.
return;
}
if (configuration_start == std::string::npos || configuration_end == std::string::npos)
{
// Issue loading configuration or there isn't one.
return;
}
std::string configuration_string = code.substr(configuration_start + config_start_delimiter.size(),
configuration_end - configuration_start - config_start_delimiter.size());
std::string configuration_string =
code.substr(configuration_start + config_start_delimiter.size(),
configuration_end - configuration_start - config_start_delimiter.size());
std::istringstream in(configuration_string);
std::istringstream in(configuration_string);
struct GLSLStringOption
{
std::string m_type;
std::vector<std::pair<std::string, std::string>> m_options;
};
struct GLSLStringOption
{
std::string m_type;
std::vector<std::pair<std::string, std::string>> m_options;
};
std::vector<GLSLStringOption> option_strings;
GLSLStringOption* current_strings = nullptr;
while (!in.eof())
{
std::string line;
std::vector<GLSLStringOption> option_strings;
GLSLStringOption* current_strings = nullptr;
while (!in.eof())
{
std::string line;
if (std::getline(in, line))
{
if (std::getline(in, line))
{
#ifndef _WIN32
// Check for CRLF eol and convert it to LF
if (!line.empty() && line.at(line.size()-1) == '\r')
{
line.erase(line.size()-1);
}
// Check for CRLF eol and convert it to LF
if (!line.empty() && line.at(line.size() - 1) == '\r')
{
line.erase(line.size() - 1);
}
#endif
if (line.size() > 0)
{
if (line[0] == '[')
{
size_t endpos = line.find("]");
if (line.size() > 0)
{
if (line[0] == '[')
{
size_t endpos = line.find("]");
if (endpos != std::string::npos)
{
// New section!
std::string sub = line.substr(1, endpos - 1);
option_strings.push_back({ sub });
current_strings = &option_strings.back();
}
}
else
{
if (current_strings)
{
std::string key, value;
IniFile::ParseLine(line, &key, &value);
if (endpos != std::string::npos)
{
// New section!
std::string sub = line.substr(1, endpos - 1);
option_strings.push_back({sub});
current_strings = &option_strings.back();
}
}
else
{
if (current_strings)
{
std::string key, value;
IniFile::ParseLine(line, &key, &value);
if (!(key == "" && value == ""))
current_strings->m_options.emplace_back(key, value);
}
}
}
}
}
if (!(key == "" && value == ""))
current_strings->m_options.emplace_back(key, value);
}
}
}
}
}
for (const auto& it : option_strings)
{
ConfigurationOption option;
option.m_dirty = true;
for (const auto& it : option_strings)
{
ConfigurationOption option;
option.m_dirty = true;
if (it.m_type == "OptionBool")
option.m_type = ConfigurationOption::OptionType::OPTION_BOOL;
else if (it.m_type == "OptionRangeFloat")
option.m_type = ConfigurationOption::OptionType::OPTION_FLOAT;
else if (it.m_type == "OptionRangeInteger")
option.m_type = ConfigurationOption::OptionType::OPTION_INTEGER;
if (it.m_type == "OptionBool")
option.m_type = ConfigurationOption::OptionType::OPTION_BOOL;
else if (it.m_type == "OptionRangeFloat")
option.m_type = ConfigurationOption::OptionType::OPTION_FLOAT;
else if (it.m_type == "OptionRangeInteger")
option.m_type = ConfigurationOption::OptionType::OPTION_INTEGER;
for (const auto& string_option : it.m_options)
{
if (string_option.first == "GUIName")
{
option.m_gui_name = string_option.second;
}
else if (string_option.first == "OptionName")
{
option.m_option_name = string_option.second;
}
else if (string_option.first == "DependentOption")
{
option.m_dependent_option = string_option.second;
}
else if (string_option.first == "MinValue" ||
string_option.first == "MaxValue" ||
string_option.first == "DefaultValue" ||
string_option.first == "StepAmount")
{
std::vector<s32>* output_integer = nullptr;
std::vector<float>* output_float = nullptr;
for (const auto& string_option : it.m_options)
{
if (string_option.first == "GUIName")
{
option.m_gui_name = string_option.second;
}
else if (string_option.first == "OptionName")
{
option.m_option_name = string_option.second;
}
else if (string_option.first == "DependentOption")
{
option.m_dependent_option = string_option.second;
}
else if (string_option.first == "MinValue" || string_option.first == "MaxValue" ||
string_option.first == "DefaultValue" || string_option.first == "StepAmount")
{
std::vector<s32>* output_integer = nullptr;
std::vector<float>* output_float = nullptr;
if (string_option.first == "MinValue")
{
output_integer = &option.m_integer_min_values;
output_float = &option.m_float_min_values;
}
else if (string_option.first == "MaxValue")
{
output_integer = &option.m_integer_max_values;
output_float = &option.m_float_max_values;
}
else if (string_option.first == "DefaultValue")
{
output_integer = &option.m_integer_values;
output_float = &option.m_float_values;
}
else if (string_option.first == "StepAmount")
{
output_integer = &option.m_integer_step_values;
output_float = &option.m_float_step_values;
}
if (string_option.first == "MinValue")
{
output_integer = &option.m_integer_min_values;
output_float = &option.m_float_min_values;
}
else if (string_option.first == "MaxValue")
{
output_integer = &option.m_integer_max_values;
output_float = &option.m_float_max_values;
}
else if (string_option.first == "DefaultValue")
{
output_integer = &option.m_integer_values;
output_float = &option.m_float_values;
}
else if (string_option.first == "StepAmount")
{
output_integer = &option.m_integer_step_values;
output_float = &option.m_float_step_values;
}
if (option.m_type == ConfigurationOption::OptionType::OPTION_BOOL)
{
TryParse(string_option.second, &option.m_bool_value);
}
else if (option.m_type == ConfigurationOption::OptionType::OPTION_INTEGER)
{
TryParseVector(string_option.second, output_integer);
if (output_integer->size() > 4)
output_integer->erase(output_integer->begin() + 4, output_integer->end());
}
else if (option.m_type == ConfigurationOption::OptionType::OPTION_FLOAT)
{
TryParseVector(string_option.second, output_float);
if (output_float->size() > 4)
output_float->erase(output_float->begin() + 4, output_float->end());
}
}
}
m_options[option.m_option_name] = option;
}
if (option.m_type == ConfigurationOption::OptionType::OPTION_BOOL)
{
TryParse(string_option.second, &option.m_bool_value);
}
else if (option.m_type == ConfigurationOption::OptionType::OPTION_INTEGER)
{
TryParseVector(string_option.second, output_integer);
if (output_integer->size() > 4)
output_integer->erase(output_integer->begin() + 4, output_integer->end());
}
else if (option.m_type == ConfigurationOption::OptionType::OPTION_FLOAT)
{
TryParseVector(string_option.second, output_float);
if (output_float->size() > 4)
output_float->erase(output_float->begin() + 4, output_float->end());
}
}
}
m_options[option.m_option_name] = option;
}
}
void PostProcessingShaderConfiguration::LoadOptionsConfiguration()
{
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
std::string section = m_current_shader + "-options";
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
std::string section = m_current_shader + "-options";
for (auto& it : m_options)
{
switch (it.second.m_type)
{
case ConfigurationOption::OptionType::OPTION_BOOL:
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &it.second.m_bool_value, it.second.m_bool_value);
break;
case ConfigurationOption::OptionType::OPTION_INTEGER:
{
std::string value;
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &value);
if (value != "")
TryParseVector(value, &it.second.m_integer_values);
}
break;
case ConfigurationOption::OptionType::OPTION_FLOAT:
{
std::string value;
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &value);
if (value != "")
TryParseVector(value, &it.second.m_float_values);
}
break;
}
}
for (auto& it : m_options)
{
switch (it.second.m_type)
{
case ConfigurationOption::OptionType::OPTION_BOOL:
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &it.second.m_bool_value,
it.second.m_bool_value);
break;
case ConfigurationOption::OptionType::OPTION_INTEGER:
{
std::string value;
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &value);
if (value != "")
TryParseVector(value, &it.second.m_integer_values);
}
break;
case ConfigurationOption::OptionType::OPTION_FLOAT:
{
std::string value;
ini.GetOrCreateSection(section)->Get(it.second.m_option_name, &value);
if (value != "")
TryParseVector(value, &it.second.m_float_values);
}
break;
}
}
}
void PostProcessingShaderConfiguration::SaveOptionsConfiguration()
{
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
std::string section = m_current_shader + "-options";
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
std::string section = m_current_shader + "-options";
for (auto& it : m_options)
{
switch (it.second.m_type)
{
case ConfigurationOption::OptionType::OPTION_BOOL:
{
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, it.second.m_bool_value);
}
break;
case ConfigurationOption::OptionType::OPTION_INTEGER:
{
std::string value = "";
for (size_t i = 0; i < it.second.m_integer_values.size(); ++i)
value += StringFromFormat("%d%s", it.second.m_integer_values[i], i == (it.second.m_integer_values.size() - 1) ? "": ", ");
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, value);
}
break;
case ConfigurationOption::OptionType::OPTION_FLOAT:
{
std::ostringstream value;
value.imbue(std::locale("C"));
for (auto& it : m_options)
{
switch (it.second.m_type)
{
case ConfigurationOption::OptionType::OPTION_BOOL:
{
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, it.second.m_bool_value);
}
break;
case ConfigurationOption::OptionType::OPTION_INTEGER:
{
std::string value = "";
for (size_t i = 0; i < it.second.m_integer_values.size(); ++i)
value += StringFromFormat("%d%s", it.second.m_integer_values[i],
i == (it.second.m_integer_values.size() - 1) ? "" : ", ");
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, value);
}
break;
case ConfigurationOption::OptionType::OPTION_FLOAT:
{
std::ostringstream value;
value.imbue(std::locale("C"));
for (size_t i = 0; i < it.second.m_float_values.size(); ++i)
{
value << it.second.m_float_values[i];
if (i != (it.second.m_float_values.size() - 1))
value << ", ";
}
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, value.str());
}
break;
}
}
ini.Save(File::GetUserPath(F_DOLPHINCONFIG_IDX));
for (size_t i = 0; i < it.second.m_float_values.size(); ++i)
{
value << it.second.m_float_values[i];
if (i != (it.second.m_float_values.size() - 1))
value << ", ";
}
ini.GetOrCreateSection(section)->Set(it.second.m_option_name, value.str());
}
break;
}
}
ini.Save(File::GetUserPath(F_DOLPHINCONFIG_IDX));
}
void PostProcessingShaderConfiguration::ReloadShader()
{
m_current_shader = "";
m_current_shader = "";
}
void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, int index, float value)
void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, int index,
float value)
{
auto it = m_options.find(option);
auto it = m_options.find(option);
it->second.m_float_values[index] = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
it->second.m_float_values[index] = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
}
void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, int index, s32 value)
{
auto it = m_options.find(option);
auto it = m_options.find(option);
it->second.m_integer_values[index] = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
it->second.m_integer_values[index] = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
}
void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bool value)
{
auto it = m_options.find(option);
auto it = m_options.find(option);
it->second.m_bool_value = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
it->second.m_bool_value = value;
it->second.m_dirty = true;
m_any_options_dirty = true;
}

View file

@ -15,87 +15,82 @@
class PostProcessingShaderConfiguration
{
public:
struct ConfigurationOption
{
enum OptionType
{
OPTION_BOOL = 0,
OPTION_FLOAT,
OPTION_INTEGER,
};
struct ConfigurationOption
{
enum OptionType
{
OPTION_BOOL = 0,
OPTION_FLOAT,
OPTION_INTEGER,
};
bool m_bool_value;
bool m_bool_value;
std::vector<float> m_float_values;
std::vector<s32> m_integer_values;
std::vector<float> m_float_values;
std::vector<s32> m_integer_values;
std::vector<float> m_float_min_values;
std::vector<s32> m_integer_min_values;
std::vector<float> m_float_min_values;
std::vector<s32> m_integer_min_values;
std::vector<float> m_float_max_values;
std::vector<s32> m_integer_max_values;
std::vector<float> m_float_max_values;
std::vector<s32> m_integer_max_values;
std::vector<float> m_float_step_values;
std::vector<s32> m_integer_step_values;
std::vector<float> m_float_step_values;
std::vector<s32> m_integer_step_values;
OptionType m_type;
OptionType m_type;
std::string m_gui_name;
std::string m_option_name;
std::string m_dependent_option;
bool m_dirty;
};
std::string m_gui_name;
std::string m_option_name;
std::string m_dependent_option;
bool m_dirty;
};
typedef std::map<std::string, ConfigurationOption> ConfigMap;
typedef std::map<std::string, ConfigurationOption> ConfigMap;
PostProcessingShaderConfiguration() : m_current_shader("") {}
virtual ~PostProcessingShaderConfiguration() {}
// Loads the configuration with a shader
// If the argument is "" the class will load the shader from the g_activeConfig option.
// Returns the loaded shader source from file
std::string LoadShader(std::string shader = "");
void SaveOptionsConfiguration();
void ReloadShader();
std::string GetShader() { return m_current_shader; }
bool IsDirty() { return m_any_options_dirty; }
void SetDirty(bool dirty) { m_any_options_dirty = dirty; }
bool HasOptions() { return m_options.size() > 0; }
ConfigMap& GetOptions() { return m_options; }
const ConfigurationOption& GetOption(const std::string& option) { return m_options[option]; }
// For updating option's values
void SetOptionf(const std::string& option, int index, float value);
void SetOptioni(const std::string& option, int index, s32 value);
void SetOptionb(const std::string& option, bool value);
PostProcessingShaderConfiguration() : m_current_shader("") {}
virtual ~PostProcessingShaderConfiguration() {}
// Loads the configuration with a shader
// If the argument is "" the class will load the shader from the g_activeConfig option.
// Returns the loaded shader source from file
std::string LoadShader(std::string shader = "");
void SaveOptionsConfiguration();
void ReloadShader();
std::string GetShader() { return m_current_shader; }
bool IsDirty() { return m_any_options_dirty; }
void SetDirty(bool dirty) { m_any_options_dirty = dirty; }
bool HasOptions() { return m_options.size() > 0; }
ConfigMap& GetOptions() { return m_options; }
const ConfigurationOption& GetOption(const std::string& option) { return m_options[option]; }
// For updating option's values
void SetOptionf(const std::string& option, int index, float value);
void SetOptioni(const std::string& option, int index, s32 value);
void SetOptionb(const std::string& option, bool value);
private:
bool m_any_options_dirty;
std::string m_current_shader;
ConfigMap m_options;
bool m_any_options_dirty;
std::string m_current_shader;
ConfigMap m_options;
void LoadOptions(const std::string& code);
void LoadOptionsConfiguration();
void LoadOptions(const std::string& code);
void LoadOptionsConfiguration();
};
class PostProcessingShaderImplementation
{
public:
PostProcessingShaderImplementation();
virtual ~PostProcessingShaderImplementation();
PostProcessingShaderImplementation();
virtual ~PostProcessingShaderImplementation();
PostProcessingShaderConfiguration* GetConfig() { return &m_config; }
// Should be implemented by the backends for backend specific code
virtual void BlitFromTexture(TargetRectangle src, TargetRectangle dst,
int src_texture, int src_width, int src_height, int layer = 0) = 0;
virtual void ApplyShader() = 0;
PostProcessingShaderConfiguration* GetConfig() { return &m_config; }
// Should be implemented by the backends for backend specific code
virtual void BlitFromTexture(TargetRectangle src, TargetRectangle dst, int src_texture,
int src_width, int src_height, int layer = 0) = 0;
virtual void ApplyShader() = 0;
protected:
// Timer for determining our time value
Common::Timer m_timer;
// Timer for determining our time value
Common::Timer m_timer;
PostProcessingShaderConfiguration m_config;
PostProcessingShaderConfiguration m_config;
};

View file

@ -27,15 +27,15 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/Host.h"
#include "Core/Movie.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/VideoInterface.h"
#include "Core/Host.h"
#include "Core/Movie.h"
#include "VideoCommon/AVIDump.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FPSCounter.h"
#include "VideoCommon/FramebufferManagerBase.h"
@ -61,7 +61,7 @@ Common::Event Renderer::s_screenshotCompleted;
volatile bool Renderer::s_bScreenshot;
// Final surface changing
Common::Flag Renderer::s_SurfaceNeedsChanged;
Common::Flag Renderer::s_SurfaceNeedsChanged;
Common::Event Renderer::s_ChangedSurface;
// The framebuffer size
@ -86,541 +86,554 @@ unsigned int Renderer::efb_scale_numeratorY = 1;
unsigned int Renderer::efb_scale_denominatorX = 1;
unsigned int Renderer::efb_scale_denominatorY = 1;
static float AspectToWidescreen(float aspect) { return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f)); }
Renderer::Renderer()
: frame_data()
, bLastFrameDumped(false)
static float AspectToWidescreen(float aspect)
{
UpdateActiveConfig();
TextureCacheBase::OnConfigChanged(g_ActiveConfig);
return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f));
}
Renderer::Renderer() : frame_data(), bLastFrameDumped(false)
{
UpdateActiveConfig();
TextureCacheBase::OnConfigChanged(g_ActiveConfig);
#if defined _WIN32 || defined HAVE_LIBAV
bAVIDumping = false;
bAVIDumping = false;
#endif
OSDChoice = 0;
OSDTime = 0;
OSDChoice = 0;
OSDTime = 0;
}
Renderer::~Renderer()
{
// invalidate previous efb format
prev_efb_format = PEControl::INVALID_FMT;
// invalidate previous efb format
prev_efb_format = PEControl::INVALID_FMT;
efb_scale_numeratorX = efb_scale_numeratorY = efb_scale_denominatorX = efb_scale_denominatorY = 1;
efb_scale_numeratorX = efb_scale_numeratorY = efb_scale_denominatorX = efb_scale_denominatorY = 1;
#if defined _WIN32 || defined HAVE_LIBAV
if (SConfig::GetInstance().m_DumpFrames && bLastFrameDumped && bAVIDumping)
AVIDump::Stop();
if (SConfig::GetInstance().m_DumpFrames && bLastFrameDumped && bAVIDumping)
AVIDump::Stop();
#endif
}
void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, float Gamma)
void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight,
float Gamma)
{
CheckFifoRecording();
CheckFifoRecording();
if (!fbStride || !fbHeight)
return;
if (!fbStride || !fbHeight)
return;
XFBWrited = true;
XFBWrited = true;
if (g_ActiveConfig.bUseXFB)
{
FramebufferManagerBase::CopyToXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
else
{
// below div two to convert from bytes to pixels - it expects width, not stride
Swap(xfbAddr, fbStride/2, fbStride/2, fbHeight, sourceRc, Gamma);
}
if (g_ActiveConfig.bUseXFB)
{
FramebufferManagerBase::CopyToXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
else
{
// below div two to convert from bytes to pixels - it expects width, not stride
Swap(xfbAddr, fbStride / 2, fbStride / 2, fbHeight, sourceRc, Gamma);
}
}
int Renderer::EFBToScaledX(int x)
{
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO: // fractional
return FramebufferManagerBase::ScaleToVirtualXfbWidth(x);
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO: // fractional
return FramebufferManagerBase::ScaleToVirtualXfbWidth(x);
default:
return x * (int)efb_scale_numeratorX / (int)efb_scale_denominatorX;
};
default:
return x * (int)efb_scale_numeratorX / (int)efb_scale_denominatorX;
};
}
int Renderer::EFBToScaledY(int y)
{
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO: // fractional
return FramebufferManagerBase::ScaleToVirtualXfbHeight(y);
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO: // fractional
return FramebufferManagerBase::ScaleToVirtualXfbHeight(y);
default:
return y * (int)efb_scale_numeratorY / (int)efb_scale_denominatorY;
};
default:
return y * (int)efb_scale_numeratorY / (int)efb_scale_denominatorY;
};
}
void Renderer::CalculateTargetScale(int x, int y, int* scaledX, int* scaledY)
{
if (g_ActiveConfig.iEFBScale == SCALE_AUTO || g_ActiveConfig.iEFBScale == SCALE_AUTO_INTEGRAL)
{
*scaledX = x;
*scaledY = y;
}
else
{
*scaledX = x * (int)efb_scale_numeratorX / (int)efb_scale_denominatorX;
*scaledY = y * (int)efb_scale_numeratorY / (int)efb_scale_denominatorY;
}
if (g_ActiveConfig.iEFBScale == SCALE_AUTO || g_ActiveConfig.iEFBScale == SCALE_AUTO_INTEGRAL)
{
*scaledX = x;
*scaledY = y;
}
else
{
*scaledX = x * (int)efb_scale_numeratorX / (int)efb_scale_denominatorX;
*scaledY = y * (int)efb_scale_numeratorY / (int)efb_scale_denominatorY;
}
}
// return true if target size changed
bool Renderer::CalculateTargetSize(unsigned int framebuffer_width, unsigned int framebuffer_height)
{
int newEFBWidth, newEFBHeight;
newEFBWidth = newEFBHeight = 0;
int newEFBWidth, newEFBHeight;
newEFBWidth = newEFBHeight = 0;
// TODO: Ugly. Clean up
switch (s_last_efb_scale)
{
case SCALE_AUTO:
case SCALE_AUTO_INTEGRAL:
newEFBWidth = FramebufferManagerBase::ScaleToVirtualXfbWidth(EFB_WIDTH);
newEFBHeight = FramebufferManagerBase::ScaleToVirtualXfbHeight(EFB_HEIGHT);
// TODO: Ugly. Clean up
switch (s_last_efb_scale)
{
case SCALE_AUTO:
case SCALE_AUTO_INTEGRAL:
newEFBWidth = FramebufferManagerBase::ScaleToVirtualXfbWidth(EFB_WIDTH);
newEFBHeight = FramebufferManagerBase::ScaleToVirtualXfbHeight(EFB_HEIGHT);
if (s_last_efb_scale == SCALE_AUTO_INTEGRAL)
{
efb_scale_numeratorX = efb_scale_numeratorY = std::max((newEFBWidth - 1) / EFB_WIDTH + 1, (newEFBHeight - 1) / EFB_HEIGHT + 1);
efb_scale_denominatorX = efb_scale_denominatorY = 1;
newEFBWidth = EFBToScaledX(EFB_WIDTH);
newEFBHeight = EFBToScaledY(EFB_HEIGHT);
}
else
{
efb_scale_numeratorX = newEFBWidth;
efb_scale_denominatorX = EFB_WIDTH;
efb_scale_numeratorY = newEFBHeight;
efb_scale_denominatorY = EFB_HEIGHT;
}
break;
if (s_last_efb_scale == SCALE_AUTO_INTEGRAL)
{
efb_scale_numeratorX = efb_scale_numeratorY =
std::max((newEFBWidth - 1) / EFB_WIDTH + 1, (newEFBHeight - 1) / EFB_HEIGHT + 1);
efb_scale_denominatorX = efb_scale_denominatorY = 1;
newEFBWidth = EFBToScaledX(EFB_WIDTH);
newEFBHeight = EFBToScaledY(EFB_HEIGHT);
}
else
{
efb_scale_numeratorX = newEFBWidth;
efb_scale_denominatorX = EFB_WIDTH;
efb_scale_numeratorY = newEFBHeight;
efb_scale_denominatorY = EFB_HEIGHT;
}
break;
case SCALE_1X:
efb_scale_numeratorX = efb_scale_numeratorY = 1;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
break;
case SCALE_1X:
efb_scale_numeratorX = efb_scale_numeratorY = 1;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
break;
case SCALE_1_5X:
efb_scale_numeratorX = efb_scale_numeratorY = 3;
efb_scale_denominatorX = efb_scale_denominatorY = 2;
break;
case SCALE_1_5X:
efb_scale_numeratorX = efb_scale_numeratorY = 3;
efb_scale_denominatorX = efb_scale_denominatorY = 2;
break;
case SCALE_2X:
efb_scale_numeratorX = efb_scale_numeratorY = 2;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
break;
case SCALE_2X:
efb_scale_numeratorX = efb_scale_numeratorY = 2;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
break;
case SCALE_2_5X:
efb_scale_numeratorX = efb_scale_numeratorY = 5;
efb_scale_denominatorX = efb_scale_denominatorY = 2;
break;
case SCALE_2_5X:
efb_scale_numeratorX = efb_scale_numeratorY = 5;
efb_scale_denominatorX = efb_scale_denominatorY = 2;
break;
default:
efb_scale_numeratorX = efb_scale_numeratorY = s_last_efb_scale - 3;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
default:
efb_scale_numeratorX = efb_scale_numeratorY = s_last_efb_scale - 3;
efb_scale_denominatorX = efb_scale_denominatorY = 1;
int maxSize;
maxSize = GetMaxTextureSize();
if ((unsigned)maxSize < EFB_WIDTH * efb_scale_numeratorX / efb_scale_denominatorX)
{
efb_scale_numeratorX = efb_scale_numeratorY = (maxSize / EFB_WIDTH);
efb_scale_denominatorX = efb_scale_denominatorY = 1;
}
int maxSize;
maxSize = GetMaxTextureSize();
if ((unsigned)maxSize < EFB_WIDTH * efb_scale_numeratorX / efb_scale_denominatorX)
{
efb_scale_numeratorX = efb_scale_numeratorY = (maxSize / EFB_WIDTH);
efb_scale_denominatorX = efb_scale_denominatorY = 1;
}
break;
}
if (s_last_efb_scale > SCALE_AUTO_INTEGRAL)
CalculateTargetScale(EFB_WIDTH, EFB_HEIGHT, &newEFBWidth, &newEFBHeight);
break;
}
if (s_last_efb_scale > SCALE_AUTO_INTEGRAL)
CalculateTargetScale(EFB_WIDTH, EFB_HEIGHT, &newEFBWidth, &newEFBHeight);
if (newEFBWidth != s_target_width || newEFBHeight != s_target_height)
{
s_target_width = newEFBWidth;
s_target_height = newEFBHeight;
return true;
}
return false;
if (newEFBWidth != s_target_width || newEFBHeight != s_target_height)
{
s_target_width = newEFBWidth;
s_target_height = newEFBHeight;
return true;
}
return false;
}
void Renderer::ConvertStereoRectangle(const TargetRectangle& rc, TargetRectangle& leftRc, TargetRectangle& rightRc)
void Renderer::ConvertStereoRectangle(const TargetRectangle& rc, TargetRectangle& leftRc,
TargetRectangle& rightRc)
{
// Resize target to half its original size
TargetRectangle drawRc = rc;
if (g_ActiveConfig.iStereoMode == STEREO_TAB)
{
// The height may be negative due to flipped rectangles
int height = rc.bottom - rc.top;
drawRc.top += height / 4;
drawRc.bottom -= height / 4;
}
else
{
int width = rc.right - rc.left;
drawRc.left += width / 4;
drawRc.right -= width / 4;
}
// Resize target to half its original size
TargetRectangle drawRc = rc;
if (g_ActiveConfig.iStereoMode == STEREO_TAB)
{
// The height may be negative due to flipped rectangles
int height = rc.bottom - rc.top;
drawRc.top += height / 4;
drawRc.bottom -= height / 4;
}
else
{
int width = rc.right - rc.left;
drawRc.left += width / 4;
drawRc.right -= width / 4;
}
// Create two target rectangle offset to the sides of the backbuffer
leftRc = drawRc, rightRc = drawRc;
if (g_ActiveConfig.iStereoMode == STEREO_TAB)
{
leftRc.top -= s_backbuffer_height / 4;
leftRc.bottom -= s_backbuffer_height / 4;
rightRc.top += s_backbuffer_height / 4;
rightRc.bottom += s_backbuffer_height / 4;
}
else
{
leftRc.left -= s_backbuffer_width / 4;
leftRc.right -= s_backbuffer_width / 4;
rightRc.left += s_backbuffer_width / 4;
rightRc.right += s_backbuffer_width / 4;
}
// Create two target rectangle offset to the sides of the backbuffer
leftRc = drawRc, rightRc = drawRc;
if (g_ActiveConfig.iStereoMode == STEREO_TAB)
{
leftRc.top -= s_backbuffer_height / 4;
leftRc.bottom -= s_backbuffer_height / 4;
rightRc.top += s_backbuffer_height / 4;
rightRc.bottom += s_backbuffer_height / 4;
}
else
{
leftRc.left -= s_backbuffer_width / 4;
leftRc.right -= s_backbuffer_width / 4;
rightRc.left += s_backbuffer_width / 4;
rightRc.right += s_backbuffer_width / 4;
}
}
void Renderer::SetScreenshot(const std::string& filename)
{
std::lock_guard<std::mutex> lk(s_criticalScreenshot);
s_sScreenshotName = filename;
s_bScreenshot = true;
std::lock_guard<std::mutex> lk(s_criticalScreenshot);
s_sScreenshotName = filename;
s_bScreenshot = true;
}
// Create On-Screen-Messages
void Renderer::DrawDebugText()
{
std::string final_yellow, final_cyan;
std::string final_yellow, final_cyan;
if (g_ActiveConfig.bShowFPS || SConfig::GetInstance().m_ShowFrameCount)
{
if (g_ActiveConfig.bShowFPS)
final_cyan += StringFromFormat("FPS: %u", g_renderer->m_fps_counter.GetFPS());
if (g_ActiveConfig.bShowFPS || SConfig::GetInstance().m_ShowFrameCount)
{
if (g_ActiveConfig.bShowFPS)
final_cyan += StringFromFormat("FPS: %u", g_renderer->m_fps_counter.GetFPS());
if (g_ActiveConfig.bShowFPS && SConfig::GetInstance().m_ShowFrameCount)
final_cyan += " - ";
if (SConfig::GetInstance().m_ShowFrameCount)
{
final_cyan += StringFromFormat("Frame: %llu", (unsigned long long) Movie::g_currentFrame);
if (Movie::IsPlayingInput())
final_cyan += StringFromFormat(" / %llu", (unsigned long long) Movie::g_totalFrames);
}
if (g_ActiveConfig.bShowFPS && SConfig::GetInstance().m_ShowFrameCount)
final_cyan += " - ";
if (SConfig::GetInstance().m_ShowFrameCount)
{
final_cyan += StringFromFormat("Frame: %llu", (unsigned long long)Movie::g_currentFrame);
if (Movie::IsPlayingInput())
final_cyan += StringFromFormat(" / %llu", (unsigned long long)Movie::g_totalFrames);
}
final_cyan += "\n";
final_yellow += "\n";
}
final_cyan += "\n";
final_yellow += "\n";
}
if (SConfig::GetInstance().m_ShowLag)
{
final_cyan += StringFromFormat("Lag: %" PRIu64 "\n", Movie::g_currentLagCount);
final_yellow += "\n";
}
if (SConfig::GetInstance().m_ShowLag)
{
final_cyan += StringFromFormat("Lag: %" PRIu64 "\n", Movie::g_currentLagCount);
final_yellow += "\n";
}
if (SConfig::GetInstance().m_ShowInputDisplay)
{
final_cyan += Movie::GetInputDisplay();
final_yellow += "\n";
}
if (SConfig::GetInstance().m_ShowInputDisplay)
{
final_cyan += Movie::GetInputDisplay();
final_yellow += "\n";
}
// OSD Menu messages
if (OSDChoice > 0)
{
OSDTime = Common::Timer::GetTimeMs() + 3000;
OSDChoice = -OSDChoice;
}
// OSD Menu messages
if (OSDChoice > 0)
{
OSDTime = Common::Timer::GetTimeMs() + 3000;
OSDChoice = -OSDChoice;
}
if ((u32)OSDTime > Common::Timer::GetTimeMs())
{
std::string res_text;
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO:
res_text = "Auto (fractional)";
break;
case SCALE_AUTO_INTEGRAL:
res_text = "Auto (integral)";
break;
case SCALE_1X:
res_text = "Native";
break;
case SCALE_1_5X:
res_text = "1.5x";
break;
case SCALE_2X:
res_text = "2x";
break;
case SCALE_2_5X:
res_text = "2.5x";
break;
default:
res_text = StringFromFormat("%dx", g_ActiveConfig.iEFBScale - 3);
break;
}
const char* ar_text = "";
switch (g_ActiveConfig.iAspectRatio)
{
case ASPECT_AUTO:
ar_text = "Auto";
break;
case ASPECT_STRETCH:
ar_text = "Stretch";
break;
case ASPECT_ANALOG:
ar_text = "Force 4:3";
break;
case ASPECT_ANALOG_WIDE:
ar_text = "Force 16:9";
}
if ((u32)OSDTime > Common::Timer::GetTimeMs())
{
std::string res_text;
switch (g_ActiveConfig.iEFBScale)
{
case SCALE_AUTO:
res_text = "Auto (fractional)";
break;
case SCALE_AUTO_INTEGRAL:
res_text = "Auto (integral)";
break;
case SCALE_1X:
res_text = "Native";
break;
case SCALE_1_5X:
res_text = "1.5x";
break;
case SCALE_2X:
res_text = "2x";
break;
case SCALE_2_5X:
res_text = "2.5x";
break;
default:
res_text = StringFromFormat("%dx", g_ActiveConfig.iEFBScale - 3);
break;
}
const char* ar_text = "";
switch (g_ActiveConfig.iAspectRatio)
{
case ASPECT_AUTO:
ar_text = "Auto";
break;
case ASPECT_STRETCH:
ar_text = "Stretch";
break;
case ASPECT_ANALOG:
ar_text = "Force 4:3";
break;
case ASPECT_ANALOG_WIDE:
ar_text = "Force 16:9";
}
const char* const efbcopy_text = g_ActiveConfig.bSkipEFBCopyToRam ? "to Texture" : "to RAM";
const char* const efbcopy_text = g_ActiveConfig.bSkipEFBCopyToRam ? "to Texture" : "to RAM";
// The rows
const std::string lines[] =
{
std::string("Internal Resolution: ") + res_text,
std::string("Aspect Ratio: ") + ar_text + (g_ActiveConfig.bCrop ? " (crop)" : ""),
std::string("Copy EFB: ") + efbcopy_text,
std::string("Fog: ") + (g_ActiveConfig.bDisableFog ? "Disabled" : "Enabled"),
SConfig::GetInstance().m_EmulationSpeed <= 0 ? "Speed Limit: Unlimited" :
StringFromFormat("Speed Limit: %li%%", std::lround(SConfig::GetInstance().m_EmulationSpeed * 100.f)),
};
// The rows
const std::string lines[] = {
std::string("Internal Resolution: ") + res_text,
std::string("Aspect Ratio: ") + ar_text + (g_ActiveConfig.bCrop ? " (crop)" : ""),
std::string("Copy EFB: ") + efbcopy_text,
std::string("Fog: ") + (g_ActiveConfig.bDisableFog ? "Disabled" : "Enabled"),
SConfig::GetInstance().m_EmulationSpeed <= 0 ?
"Speed Limit: Unlimited" :
StringFromFormat("Speed Limit: %li%%",
std::lround(SConfig::GetInstance().m_EmulationSpeed * 100.f)),
};
enum { lines_count = sizeof(lines) / sizeof(*lines) };
enum
{
lines_count = sizeof(lines) / sizeof(*lines)
};
// The latest changed setting in yellow
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice == -i - 1)
final_yellow += lines[i];
final_yellow += '\n';
}
// The latest changed setting in yellow
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice == -i - 1)
final_yellow += lines[i];
final_yellow += '\n';
}
// The other settings in cyan
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice != -i - 1)
final_cyan += lines[i];
final_cyan += '\n';
}
}
// The other settings in cyan
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice != -i - 1)
final_cyan += lines[i];
final_cyan += '\n';
}
}
final_cyan += Common::Profiler::ToString();
final_cyan += Common::Profiler::ToString();
if (g_ActiveConfig.bOverlayStats)
final_cyan += Statistics::ToString();
if (g_ActiveConfig.bOverlayStats)
final_cyan += Statistics::ToString();
if (g_ActiveConfig.bOverlayProjStats)
final_cyan += Statistics::ToStringProj();
if (g_ActiveConfig.bOverlayProjStats)
final_cyan += Statistics::ToStringProj();
//and then the text
g_renderer->RenderText(final_cyan, 20, 20, 0xFF00FFFF);
g_renderer->RenderText(final_yellow, 20, 20, 0xFFFFFF00);
// and then the text
g_renderer->RenderText(final_cyan, 20, 20, 0xFF00FFFF);
g_renderer->RenderText(final_yellow, 20, 20, 0xFFFFFF00);
}
void Renderer::UpdateDrawRectangle(int backbuffer_width, int backbuffer_height)
{
float FloatGLWidth = (float)backbuffer_width;
float FloatGLHeight = (float)backbuffer_height;
float FloatXOffset = 0;
float FloatYOffset = 0;
float FloatGLWidth = (float)backbuffer_width;
float FloatGLHeight = (float)backbuffer_height;
float FloatXOffset = 0;
float FloatYOffset = 0;
// The rendering window size
const float WinWidth = FloatGLWidth;
const float WinHeight = FloatGLHeight;
// The rendering window size
const float WinWidth = FloatGLWidth;
const float WinHeight = FloatGLHeight;
// Update aspect ratio hack values
// Won't take effect until next frame
// Don't know if there is a better place for this code so there isn't a 1 frame delay
if (g_ActiveConfig.bWidescreenHack)
{
float source_aspect = VideoInterface::GetAspectRatio();
if (Core::g_aspect_wide)
source_aspect = AspectToWidescreen(source_aspect);
float target_aspect;
// Update aspect ratio hack values
// Won't take effect until next frame
// Don't know if there is a better place for this code so there isn't a 1 frame delay
if (g_ActiveConfig.bWidescreenHack)
{
float source_aspect = VideoInterface::GetAspectRatio();
if (Core::g_aspect_wide)
source_aspect = AspectToWidescreen(source_aspect);
float target_aspect;
switch (g_ActiveConfig.iAspectRatio)
{
case ASPECT_STRETCH:
target_aspect = WinWidth / WinHeight;
break;
case ASPECT_ANALOG:
target_aspect = VideoInterface::GetAspectRatio();
break;
case ASPECT_ANALOG_WIDE:
target_aspect = AspectToWidescreen(VideoInterface::GetAspectRatio());
break;
default:
// ASPECT_AUTO
target_aspect = source_aspect;
break;
}
switch (g_ActiveConfig.iAspectRatio)
{
case ASPECT_STRETCH:
target_aspect = WinWidth / WinHeight;
break;
case ASPECT_ANALOG:
target_aspect = VideoInterface::GetAspectRatio();
break;
case ASPECT_ANALOG_WIDE:
target_aspect = AspectToWidescreen(VideoInterface::GetAspectRatio());
break;
default:
// ASPECT_AUTO
target_aspect = source_aspect;
break;
}
float adjust = source_aspect / target_aspect;
if (adjust > 1)
{
// Vert+
g_Config.fAspectRatioHackW = 1;
g_Config.fAspectRatioHackH = 1 / adjust;
}
else
{
// Hor+
g_Config.fAspectRatioHackW = adjust;
g_Config.fAspectRatioHackH = 1;
}
}
else
{
// Hack is disabled
g_Config.fAspectRatioHackW = 1;
g_Config.fAspectRatioHackH = 1;
}
float adjust = source_aspect / target_aspect;
if (adjust > 1)
{
// Vert+
g_Config.fAspectRatioHackW = 1;
g_Config.fAspectRatioHackH = 1 / adjust;
}
else
{
// Hor+
g_Config.fAspectRatioHackW = adjust;
g_Config.fAspectRatioHackH = 1;
}
}
else
{
// Hack is disabled
g_Config.fAspectRatioHackW = 1;
g_Config.fAspectRatioHackH = 1;
}
// Check for force-settings and override.
// Check for force-settings and override.
// The rendering window aspect ratio as a proportion of the 4:3 or 16:9 ratio
float Ratio;
if (g_ActiveConfig.iAspectRatio == ASPECT_ANALOG_WIDE || (g_ActiveConfig.iAspectRatio != ASPECT_ANALOG && Core::g_aspect_wide))
{
Ratio = (WinWidth / WinHeight) / AspectToWidescreen(VideoInterface::GetAspectRatio());
}
else
{
Ratio = (WinWidth / WinHeight) / VideoInterface::GetAspectRatio();
}
// The rendering window aspect ratio as a proportion of the 4:3 or 16:9 ratio
float Ratio;
if (g_ActiveConfig.iAspectRatio == ASPECT_ANALOG_WIDE ||
(g_ActiveConfig.iAspectRatio != ASPECT_ANALOG && Core::g_aspect_wide))
{
Ratio = (WinWidth / WinHeight) / AspectToWidescreen(VideoInterface::GetAspectRatio());
}
else
{
Ratio = (WinWidth / WinHeight) / VideoInterface::GetAspectRatio();
}
if (g_ActiveConfig.iAspectRatio != ASPECT_STRETCH)
{
if (Ratio > 1.0f)
{
// Scale down and center in the X direction.
FloatGLWidth /= Ratio;
FloatXOffset = (WinWidth - FloatGLWidth) / 2.0f;
}
// The window is too high, we have to limit the height
else
{
// Scale down and center in the Y direction.
FloatGLHeight *= Ratio;
FloatYOffset = FloatYOffset + (WinHeight - FloatGLHeight) / 2.0f;
}
}
if (g_ActiveConfig.iAspectRatio != ASPECT_STRETCH)
{
if (Ratio > 1.0f)
{
// Scale down and center in the X direction.
FloatGLWidth /= Ratio;
FloatXOffset = (WinWidth - FloatGLWidth) / 2.0f;
}
// The window is too high, we have to limit the height
else
{
// Scale down and center in the Y direction.
FloatGLHeight *= Ratio;
FloatYOffset = FloatYOffset + (WinHeight - FloatGLHeight) / 2.0f;
}
}
// -----------------------------------------------------------------------
// Crop the picture from Analog to 4:3 or from Analog (Wide) to 16:9.
// Output: FloatGLWidth, FloatGLHeight, FloatXOffset, FloatYOffset
// ------------------
if (g_ActiveConfig.iAspectRatio != ASPECT_STRETCH && g_ActiveConfig.bCrop)
{
Ratio = (4.0f / 3.0f) / VideoInterface::GetAspectRatio();
if (Ratio <= 1.0f)
{
Ratio = 1.0f / Ratio;
}
// The width and height we will add (calculate this before FloatGLWidth and FloatGLHeight is adjusted)
float IncreasedWidth = (Ratio - 1.0f) * FloatGLWidth;
float IncreasedHeight = (Ratio - 1.0f) * FloatGLHeight;
// The new width and height
FloatGLWidth = FloatGLWidth * Ratio;
FloatGLHeight = FloatGLHeight * Ratio;
// Adjust the X and Y offset
FloatXOffset = FloatXOffset - (IncreasedWidth * 0.5f);
FloatYOffset = FloatYOffset - (IncreasedHeight * 0.5f);
}
// -----------------------------------------------------------------------
// Crop the picture from Analog to 4:3 or from Analog (Wide) to 16:9.
// Output: FloatGLWidth, FloatGLHeight, FloatXOffset, FloatYOffset
// ------------------
if (g_ActiveConfig.iAspectRatio != ASPECT_STRETCH && g_ActiveConfig.bCrop)
{
Ratio = (4.0f / 3.0f) / VideoInterface::GetAspectRatio();
if (Ratio <= 1.0f)
{
Ratio = 1.0f / Ratio;
}
// The width and height we will add (calculate this before FloatGLWidth and FloatGLHeight is
// adjusted)
float IncreasedWidth = (Ratio - 1.0f) * FloatGLWidth;
float IncreasedHeight = (Ratio - 1.0f) * FloatGLHeight;
// The new width and height
FloatGLWidth = FloatGLWidth * Ratio;
FloatGLHeight = FloatGLHeight * Ratio;
// Adjust the X and Y offset
FloatXOffset = FloatXOffset - (IncreasedWidth * 0.5f);
FloatYOffset = FloatYOffset - (IncreasedHeight * 0.5f);
}
int XOffset = (int)(FloatXOffset + 0.5f);
int YOffset = (int)(FloatYOffset + 0.5f);
int iWhidth = (int)ceil(FloatGLWidth);
int iHeight = (int)ceil(FloatGLHeight);
iWhidth -= iWhidth % 4; // ensure divisibility by 4 to make it compatible with all the video encoders
iHeight -= iHeight % 4;
int XOffset = (int)(FloatXOffset + 0.5f);
int YOffset = (int)(FloatYOffset + 0.5f);
int iWhidth = (int)ceil(FloatGLWidth);
int iHeight = (int)ceil(FloatGLHeight);
iWhidth -=
iWhidth % 4; // ensure divisibility by 4 to make it compatible with all the video encoders
iHeight -= iHeight % 4;
target_rc.left = XOffset;
target_rc.top = YOffset;
target_rc.right = XOffset + iWhidth;
target_rc.bottom = YOffset + iHeight;
target_rc.left = XOffset;
target_rc.top = YOffset;
target_rc.right = XOffset + iWhidth;
target_rc.bottom = YOffset + iHeight;
}
void Renderer::SetWindowSize(int width, int height)
{
if (width < 1)
width = 1;
if (height < 1)
height = 1;
if (width < 1)
width = 1;
if (height < 1)
height = 1;
// Scale the window size by the EFB scale.
CalculateTargetScale(width, height, &width, &height);
// Scale the window size by the EFB scale.
CalculateTargetScale(width, height, &width, &height);
Host_RequestRenderWindowSize(width, height);
Host_RequestRenderWindowSize(width, height);
}
void Renderer::CheckFifoRecording()
{
bool wasRecording = g_bRecordFifoData;
g_bRecordFifoData = FifoRecorder::GetInstance().IsRecording();
bool wasRecording = g_bRecordFifoData;
g_bRecordFifoData = FifoRecorder::GetInstance().IsRecording();
if (g_bRecordFifoData)
{
if (!wasRecording)
{
RecordVideoMemory();
}
if (g_bRecordFifoData)
{
if (!wasRecording)
{
RecordVideoMemory();
}
FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase, CommandProcessor::fifo.CPEnd);
}
FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase,
CommandProcessor::fifo.CPEnd);
}
}
void Renderer::RecordVideoMemory()
{
u32 *bpmem_ptr = (u32*)&bpmem;
u32 cpmem[256];
// The FIFO recording format splits XF memory into xfmem and xfregs; follow
// that split here.
u32 *xfmem_ptr = (u32*)&xfmem;
u32 *xfregs_ptr = (u32*)&xfmem + FifoDataFile::XF_MEM_SIZE;
u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE;
u32* bpmem_ptr = (u32*)&bpmem;
u32 cpmem[256];
// The FIFO recording format splits XF memory into xfmem and xfregs; follow
// that split here.
u32* xfmem_ptr = (u32*)&xfmem;
u32* xfregs_ptr = (u32*)&xfmem + FifoDataFile::XF_MEM_SIZE;
u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE;
memset(cpmem, 0, 256 * 4);
FillCPMemoryArray(cpmem);
memset(cpmem, 0, 256 * 4);
FillCPMemoryArray(cpmem);
FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size);
FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size);
}
void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, float Gamma)
void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
float Gamma)
{
// TODO: merge more generic parts into VideoCommon
g_renderer->SwapImpl(xfbAddr, fbWidth, fbStride, fbHeight, rc, Gamma);
// TODO: merge more generic parts into VideoCommon
g_renderer->SwapImpl(xfbAddr, fbWidth, fbStride, fbHeight, rc, Gamma);
if (XFBWrited)
g_renderer->m_fps_counter.Update();
if (XFBWrited)
g_renderer->m_fps_counter.Update();
frameCount++;
GFX_DEBUGGER_PAUSE_AT(NEXT_FRAME, true);
frameCount++;
GFX_DEBUGGER_PAUSE_AT(NEXT_FRAME, true);
// Begin new frame
// Set default viewport and scissor, for the clear to work correctly
// New frame
stats.ResetFrame();
// Begin new frame
// Set default viewport and scissor, for the clear to work correctly
// New frame
stats.ResetFrame();
Core::Callback_VideoCopiedToXFB(XFBWrited || (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB));
XFBWrited = false;
Core::Callback_VideoCopiedToXFB(XFBWrited ||
(g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB));
XFBWrited = false;
}
void Renderer::FlipImageData(u8* data, int w, int h, int pixel_width)
{
for (int y = 0; y < h / 2; ++y)
{
for (int x = 0; x < w; ++x)
{
for (int delta = 0; delta < pixel_width; ++delta)
std::swap(data[(y * w + x) * pixel_width + delta], data[((h - 1 - y) * w + x) * pixel_width + delta]);
}
}
for (int y = 0; y < h / 2; ++y)
{
for (int x = 0; x < w; ++x)
{
for (int delta = 0; delta < pixel_width; ++delta)
std::swap(data[(y * w + x) * pixel_width + delta],
data[((h - 1 - y) * w + x) * pixel_width + delta]);
}
}
}

View file

@ -32,8 +32,8 @@ class PostProcessingShaderImplementation;
struct EfbPokeData
{
u16 x,y;
u32 data;
u16 x, y;
u32 data;
};
// TODO: Move these out of here.
@ -46,144 +46,142 @@ extern int OSDChoice;
class Renderer
{
public:
Renderer();
virtual ~Renderer();
Renderer();
virtual ~Renderer();
enum PixelPerfQuery {
PP_ZCOMP_INPUT_ZCOMPLOC,
PP_ZCOMP_OUTPUT_ZCOMPLOC,
PP_ZCOMP_INPUT,
PP_ZCOMP_OUTPUT,
PP_BLEND_INPUT,
PP_EFB_COPY_CLOCKS
};
enum PixelPerfQuery
{
PP_ZCOMP_INPUT_ZCOMPLOC,
PP_ZCOMP_OUTPUT_ZCOMPLOC,
PP_ZCOMP_INPUT,
PP_ZCOMP_OUTPUT,
PP_BLEND_INPUT,
PP_EFB_COPY_CLOCKS
};
virtual void SetColorMask() {}
virtual void SetBlendMode(bool forceUpdate) {}
virtual void SetScissorRect(const EFBRectangle& rc) {}
virtual void SetGenerationMode() {}
virtual void SetDepthMode() {}
virtual void SetLogicOpMode() {}
virtual void SetDitherMode() {}
virtual void SetSamplerState(int stage, int texindex, bool custom_tex) {}
virtual void SetInterlacingMode() {}
virtual void SetViewport() {}
virtual void SetColorMask() {}
virtual void SetBlendMode(bool forceUpdate) {}
virtual void SetScissorRect(const EFBRectangle& rc) {}
virtual void SetGenerationMode() {}
virtual void SetDepthMode() {}
virtual void SetLogicOpMode() {}
virtual void SetDitherMode() {}
virtual void SetSamplerState(int stage, int texindex, bool custom_tex) {}
virtual void SetInterlacingMode() {}
virtual void SetViewport() {}
virtual void ApplyState(bool bUseDstAlpha) {}
virtual void RestoreState() {}
virtual void ResetAPIState() {}
virtual void RestoreAPIState() {}
// Ideal internal resolution - determined by display resolution (automatic scaling) and/or a
// multiple of the native EFB resolution
static int GetTargetWidth() { return s_target_width; }
static int GetTargetHeight() { return s_target_height; }
// Display resolution
static int GetBackbufferWidth() { return s_backbuffer_width; }
static int GetBackbufferHeight() { return s_backbuffer_height; }
static void SetWindowSize(int width, int height);
virtual void ApplyState(bool bUseDstAlpha) {}
virtual void RestoreState() {}
// EFB coordinate conversion functions
virtual void ResetAPIState() {}
virtual void RestoreAPIState() {}
// Use this to convert a whole native EFB rect to backbuffer coordinates
virtual TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) = 0;
// Ideal internal resolution - determined by display resolution (automatic scaling) and/or a multiple of the native EFB resolution
static int GetTargetWidth() { return s_target_width; }
static int GetTargetHeight() { return s_target_height; }
static const TargetRectangle& GetTargetRectangle() { return target_rc; }
static void UpdateDrawRectangle(int backbuffer_width, int backbuffer_height);
// Display resolution
static int GetBackbufferWidth() { return s_backbuffer_width; }
static int GetBackbufferHeight() { return s_backbuffer_height; }
// Use this to convert a single target rectangle to two stereo rectangles
static void ConvertStereoRectangle(const TargetRectangle& rc, TargetRectangle& leftRc,
TargetRectangle& rightRc);
static void SetWindowSize(int width, int height);
// Use this to upscale native EFB coordinates to IDEAL internal resolution
static int EFBToScaledX(int x);
static int EFBToScaledY(int y);
// EFB coordinate conversion functions
// Floating point versions of the above - only use them if really necessary
static float EFBToScaledXf(float x) { return x * ((float)GetTargetWidth() / (float)EFB_WIDTH); }
static float EFBToScaledYf(float y) { return y * ((float)GetTargetHeight() / (float)EFB_HEIGHT); }
// Random utilities
static void SetScreenshot(const std::string& filename);
static void DrawDebugText();
// Use this to convert a whole native EFB rect to backbuffer coordinates
virtual TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) = 0;
virtual void RenderText(const std::string& text, int left, int top, u32 color) = 0;
static const TargetRectangle& GetTargetRectangle() { return target_rc; }
static void UpdateDrawRectangle(int backbuffer_width, int backbuffer_height);
virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) = 0;
virtual void ReinterpretPixelData(unsigned int convtype) = 0;
static void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight,
float Gamma = 1.0f);
// Use this to convert a single target rectangle to two stereo rectangles
static void ConvertStereoRectangle(const TargetRectangle& rc, TargetRectangle& leftRc, TargetRectangle& rightRc);
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0;
virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0;
// Use this to upscale native EFB coordinates to IDEAL internal resolution
static int EFBToScaledX(int x);
static int EFBToScaledY(int y);
virtual u16 BBoxRead(int index) = 0;
virtual void BBoxWrite(int index, u16 value) = 0;
// Floating point versions of the above - only use them if really necessary
static float EFBToScaledXf(float x) { return x * ((float)GetTargetWidth() / (float)EFB_WIDTH); }
static float EFBToScaledYf(float y) { return y * ((float)GetTargetHeight() / (float)EFB_HEIGHT); }
static void FlipImageData(u8* data, int w, int h, int pixel_width = 3);
// Random utilities
static void SetScreenshot(const std::string& filename);
static void DrawDebugText();
// Finish up the current frame, print some stats
static void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
float Gamma = 1.0f);
virtual void SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
const EFBRectangle& rc, float Gamma = 1.0f) = 0;
virtual void RenderText(const std::string& text, int left, int top, u32 color) = 0;
virtual bool SaveScreenshot(const std::string& filename, const TargetRectangle& rc) = 0;
virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) = 0;
virtual void ReinterpretPixelData(unsigned int convtype) = 0;
static void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, float Gamma = 1.0f);
static PEControl::PixelFormat GetPrevPixelFormat() { return prev_efb_format; }
static void StorePixelFormat(PEControl::PixelFormat new_format) { prev_efb_format = new_format; }
PostProcessingShaderImplementation* GetPostProcessor() { return m_post_processor.get(); }
// Max height/width
virtual int GetMaxTextureSize() = 0;
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0;
virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0;
static Common::Event s_screenshotCompleted;
virtual u16 BBoxRead(int index) = 0;
virtual void BBoxWrite(int index, u16 value) = 0;
static void FlipImageData(u8* data, int w, int h, int pixel_width = 3);
// Finish up the current frame, print some stats
static void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,float Gamma = 1.0f);
virtual void SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, float Gamma = 1.0f) = 0;
virtual bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc) = 0;
static PEControl::PixelFormat GetPrevPixelFormat() { return prev_efb_format; }
static void StorePixelFormat(PEControl::PixelFormat new_format) { prev_efb_format = new_format; }
PostProcessingShaderImplementation* GetPostProcessor() { return m_post_processor.get(); }
// Max height/width
virtual int GetMaxTextureSize() = 0;
static Common::Event s_screenshotCompleted;
// Final surface changing
static Common::Flag s_SurfaceNeedsChanged;
static Common::Event s_ChangedSurface;
// Final surface changing
static Common::Flag s_SurfaceNeedsChanged;
static Common::Event s_ChangedSurface;
protected:
static void CalculateTargetScale(int x, int y, int* scaledX, int* scaledY);
bool CalculateTargetSize(unsigned int framebuffer_width, unsigned int framebuffer_height);
static void CalculateTargetScale(int x, int y, int* scaledX, int* scaledY);
bool CalculateTargetSize(unsigned int framebuffer_width, unsigned int framebuffer_height);
static void CheckFifoRecording();
static void RecordVideoMemory();
static void CheckFifoRecording();
static void RecordVideoMemory();
static volatile bool s_bScreenshot;
static std::mutex s_criticalScreenshot;
static std::string s_sScreenshotName;
static volatile bool s_bScreenshot;
static std::mutex s_criticalScreenshot;
static std::string s_sScreenshotName;
bool bAVIDumping;
bool bAVIDumping;
std::vector<u8> frame_data;
bool bLastFrameDumped;
std::vector<u8> frame_data;
bool bLastFrameDumped;
// The framebuffer size
static int s_target_width;
static int s_target_height;
// The framebuffer size
static int s_target_width;
static int s_target_height;
// TODO: Add functionality to reinit all the render targets when the window is resized.
static int s_backbuffer_width;
static int s_backbuffer_height;
// TODO: Add functionality to reinit all the render targets when the window is resized.
static int s_backbuffer_width;
static int s_backbuffer_height;
static TargetRectangle target_rc;
static TargetRectangle target_rc;
// TODO: Can probably eliminate this static var.
static int s_last_efb_scale;
// TODO: Can probably eliminate this static var.
static int s_last_efb_scale;
static bool XFBWrited;
static bool XFBWrited;
FPSCounter m_fps_counter;
FPSCounter m_fps_counter;
static std::unique_ptr<PostProcessingShaderImplementation> m_post_processor;
static std::unique_ptr<PostProcessingShaderImplementation> m_post_processor;
private:
static PEControl::PixelFormat prev_efb_format;
static unsigned int efb_scale_numeratorX;
static unsigned int efb_scale_numeratorY;
static unsigned int efb_scale_denominatorX;
static unsigned int efb_scale_denominatorY;
static PEControl::PixelFormat prev_efb_format;
static unsigned int efb_scale_numeratorX;
static unsigned int efb_scale_numeratorY;
static unsigned int efb_scale_denominatorX;
static unsigned int efb_scale_denominatorY;
};
extern std::unique_ptr<Renderer> g_renderer;

View file

@ -6,7 +6,6 @@
namespace SamplerCommon
{
// Helper for checking if a BPMemory TexMode0 register is set to Point
// Filtering modes. This is used to decide whether Anisotropic enhancements
// are (mostly) safe in the VideoBackends.
@ -14,17 +13,16 @@ namespace SamplerCommon
// then applying anisotropic filtering is equivalent to forced filtering. Point
// mode textures are usually some sort of 2D UI billboard which will end up
// misaligned from the correct pixels when filtered anisotropically.
template<class T>
template <class T>
constexpr bool IsBpTexMode0PointFiltering(const T& tm0)
{
return tm0.min_filter < 4 && !tm0.mag_filter;
return tm0.min_filter < 4 && !tm0.mag_filter;
}
// Check if the minification filter has mipmap based filtering modes enabled.
template<class T>
template <class T>
constexpr bool AreBpTexMode0MipmapsEnabled(const T& tm0)
{
return (tm0.min_filter & 3) != 0;
return (tm0.min_filter & 3) != 0;
}
}

View file

@ -14,260 +14,275 @@
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/StringUtil.h"
#include "Common/Logging/Log.h"
#include "Common/StringUtil.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
/**
* Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GenerateGeometryShader, GeneratePixelShader)
* Common interface for classes that need to go through the shader generation path
* (GenerateVertexShader, GenerateGeometryShader, GeneratePixelShader)
* In particular, this includes the shader code generator (ShaderCode).
* A different class (ShaderUid) can be used to uniquely identify each ShaderCode object.
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management.
* If the class does not use one or more of these methods (e.g. Uid class does not need code), the method will be defined as a no-op by the base class, and the call
* should be optimized out. The reason for this implementation is so that shader selection/generation can be done in two passes, with only a cache lookup being
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader
* constants are being used. This can be used to optimize buffer management.
* If the class does not use one or more of these methods (e.g. Uid class does not need code), the
* method will be defined as a no-op by the base class, and the call
* should be optimized out. The reason for this implementation is so that shader
* selection/generation can be done in two passes, with only a cache lookup being
* required if the shader has already been generated.
*/
class ShaderGeneratorInterface
{
public:
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char*, ...)
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an
* internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char*, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
__attribute__((format(printf, 2, 3)))
#endif
{
}
{
}
/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
/*
* Returns a pointer to an internally stored object of the uid_data type.
* @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against nullptr access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators)
*/
template<class uid_data>
uid_data* GetUidData() { return nullptr; }
/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
/*
* Returns a pointer to an internally stored object of the uid_data type.
* @warning since most child classes use the default implementation you shouldn't access this
* directly without adding precautions against nullptr access (e.g. via adding a dummy structure,
* cf. the vertex/pixel shader generators)
*/
template <class uid_data>
uid_data* GetUidData()
{
return nullptr;
}
};
/*
* Shader UID class used to uniquely identify the ShaderCode output written in the shader generator.
* uid_data can be any struct of parameters that uniquely identify each shader code output.
* Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union.
* NOTE: Because LinearDiskCache reads and writes the storage associated with a ShaderUid instance, ShaderUid must be trivially copyable.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw
* u32 values from a union.
* NOTE: Because LinearDiskCache reads and writes the storage associated with a ShaderUid instance,
* ShaderUid must be trivially copyable.
*/
template<class uid_data>
template <class uid_data>
class ShaderUid : public ShaderGeneratorInterface
{
public:
bool operator == (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) == 0;
}
bool operator==(const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) == 0;
}
bool operator != (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) != 0;
}
bool operator!=(const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) != 0;
}
// determines the storage order inside STL containers
bool operator < (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) < 0;
}
template<class uid_data2>
uid_data2* GetUidData() { return &data; }
const uid_data* GetUidData() const { return &data; }
const u8* GetUidDataRaw() const { return &values[0]; }
size_t GetUidDataSize() const { return sizeof(values); }
// determines the storage order inside STL containers
bool operator<(const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, data.NumValues() * sizeof(*values)) < 0;
}
template <class uid_data2>
uid_data2* GetUidData()
{
return &data;
}
const uid_data* GetUidData() const { return &data; }
const u8* GetUidDataRaw() const { return &values[0]; }
size_t GetUidDataSize() const { return sizeof(values); }
private:
union
{
uid_data data;
u8 values[sizeof(uid_data)];
};
union {
uid_data data;
u8 values[sizeof(uid_data)];
};
};
class ShaderCode : public ShaderGeneratorInterface
{
public:
ShaderCode()
{
m_buffer.reserve(16384);
}
const std::string& GetBuffer() const { return m_buffer; }
void Write(const char* fmt, ...)
ShaderCode() { m_buffer.reserve(16384); }
const std::string& GetBuffer() const { return m_buffer; }
void Write(const char* fmt, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
__attribute__((format(printf, 2, 3)))
#endif
{
va_list arglist;
va_start(arglist, fmt);
m_buffer += StringFromFormatV(fmt, arglist);
va_end(arglist);
}
{
va_list arglist;
va_start(arglist, fmt);
m_buffer += StringFromFormatV(fmt, arglist);
va_end(arglist);
}
protected:
std::string m_buffer;
std::string m_buffer;
};
/**
* Generates a shader constant profile which can be used to query which constants are used in a shader
* Generates a shader constant profile which can be used to query which constants are used in a
* shader
*/
class ShaderConstantProfile : public ShaderGeneratorInterface
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index + 1; ++i)
constant_usage[i] = true;
}
void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index + 1; ++i)
constant_usage[i] = true;
}
bool ConstantIsUsed(unsigned int index) const
{
// TODO: Not ready for usage yet
return true;
//return constant_usage[index];
}
bool ConstantIsUsed(unsigned int index) const
{
// TODO: Not ready for usage yet
return true;
// return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};
/**
* Checks if there has been
*/
template<class UidT, class CodeT>
template <class UidT, class CodeT>
class UidChecker
{
public:
void Invalidate()
{
m_shaders.clear();
m_uids.clear();
}
void Invalidate()
{
m_shaders.clear();
m_uids.clear();
}
void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix)
{
bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end();
if (!uid_is_indexed)
{
m_uids.push_back(new_uid);
m_shaders[new_uid] = new_code.GetBuffer();
}
else
{
// uid is already in the index => check if there's a shader with the same uid but different code
auto& old_code = m_shaders[new_uid];
if (old_code != new_code.GetBuffer())
{
static int num_failures = 0;
void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type,
const char* dump_prefix)
{
bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end();
if (!uid_is_indexed)
{
m_uids.push_back(new_uid);
m_shaders[new_uid] = new_code.GetBuffer();
}
else
{
// uid is already in the index => check if there's a shader with the same uid but different
// code
auto& old_code = m_shaders[new_uid];
if (old_code != new_code.GetBuffer())
{
static int num_failures = 0;
std::string temp = StringFromFormat("%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
dump_prefix, ++num_failures);
std::string temp =
StringFromFormat("%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
dump_prefix, ++num_failures);
// TODO: Should also dump uids
std::ofstream file;
OpenFStream(file, temp, std::ios_base::out);
file << "Old shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code.GetBuffer();
file << "\n\nShader uid:\n";
for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i)
{
u8 value = new_uid.GetUidDataRaw()[i];
if ((i % 4) == 0)
{
auto last_value = (i + 3 < new_uid.GetUidDataSize() - 1) ? i + 3 : new_uid.GetUidDataSize();
file << std::setfill(' ') << std::dec;
file << "Values " << std::setw(2) << i << " - " << last_value << ": ";
}
// TODO: Should also dump uids
std::ofstream file;
OpenFStream(file, temp, std::ios_base::out);
file << "Old shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code.GetBuffer();
file << "\n\nShader uid:\n";
for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i)
{
u8 value = new_uid.GetUidDataRaw()[i];
if ((i % 4) == 0)
{
auto last_value =
(i + 3 < new_uid.GetUidDataSize() - 1) ? i + 3 : new_uid.GetUidDataSize();
file << std::setfill(' ') << std::dec;
file << "Values " << std::setw(2) << i << " - " << last_value << ": ";
}
file << std::setw(2) << std::setfill('0') << std::hex << value << std::setw(1);
if ((i % 4) < 3)
file << ' ';
else
file << std::endl;
}
file << std::setw(2) << std::setfill('0') << std::hex << value << std::setw(1);
if ((i % 4) < 3)
file << ' ';
else
file << std::endl;
}
ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, temp.c_str());
}
}
}
ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, temp.c_str());
}
}
}
private:
std::map<UidT, std::string> m_shaders;
std::vector<UidT> m_uids;
std::map<UidT, std::string> m_shaders;
std::vector<UidT> m_uids;
};
template<class T>
inline void DefineOutputMember(T& object, API_TYPE api_type, const char* qualifier, const char* type, const char* name, int var_index, const char* semantic = "", int semantic_index = -1)
template <class T>
inline void DefineOutputMember(T& object, API_TYPE api_type, const char* qualifier,
const char* type, const char* name, int var_index,
const char* semantic = "", int semantic_index = -1)
{
object.Write("\t%s %s %s", qualifier, type, name);
object.Write("\t%s %s %s", qualifier, type, name);
if (var_index != -1)
object.Write("%d", var_index);
if (var_index != -1)
object.Write("%d", var_index);
if (api_type == API_D3D && strlen(semantic) > 0)
{
if (semantic_index != -1)
object.Write(" : %s%d", semantic, semantic_index);
else
object.Write(" : %s", semantic);
}
if (api_type == API_D3D && strlen(semantic) > 0)
{
if (semantic_index != -1)
object.Write(" : %s%d", semantic, semantic_index);
else
object.Write(" : %s", semantic);
}
object.Write(";\n");
object.Write(";\n");
}
template<class T>
template <class T>
inline void GenerateVSOutputMembers(T& object, API_TYPE api_type, const char* qualifier)
{
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1);
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1);
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, "TEXCOORD", i);
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, "TEXCOORD", i);
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD", xfmem.numTexGen.numTexGens);
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD",
xfmem.numTexGen.numTexGens);
if (g_ActiveConfig.bEnablePixelLighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, "TEXCOORD", xfmem.numTexGen.numTexGens + 1);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD", xfmem.numTexGen.numTexGens + 2);
}
if (g_ActiveConfig.bEnablePixelLighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, "TEXCOORD",
xfmem.numTexGen.numTexGens + 1);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD",
xfmem.numTexGen.numTexGens + 2);
}
}
template<class T>
template <class T>
inline void AssignVSOutputMembers(T& object, const char* a, const char* b)
{
object.Write("\t%s.pos = %s.pos;\n", a, b);
object.Write("\t%s.colors_0 = %s.colors_0;\n", a, b);
object.Write("\t%s.colors_1 = %s.colors_1;\n", a, b);
object.Write("\t%s.pos = %s.pos;\n", a, b);
object.Write("\t%s.colors_0 = %s.colors_0;\n", a, b);
object.Write("\t%s.colors_1 = %s.colors_1;\n", a, b);
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
object.Write("\t%s.tex%d = %s.tex%d;\n", a, i, b, i);
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
object.Write("\t%s.tex%d = %s.tex%d;\n", a, i, b, i);
object.Write("\t%s.clipPos = %s.clipPos;\n", a, b);
object.Write("\t%s.clipPos = %s.clipPos;\n", a, b);
if (g_ActiveConfig.bEnablePixelLighting)
{
object.Write("\t%s.Normal = %s.Normal;\n", a, b);
object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b);
}
if (g_ActiveConfig.bEnablePixelLighting)
{
object.Write("\t%s.Normal = %s.Normal;\n", a, b);
object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b);
}
}
// We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the
@ -280,62 +295,61 @@ inline void AssignVSOutputMembers(T& object, const char* a, const char* b)
// Without MSAA, this flag is defined to have no effect.
inline const char* GetInterpolationQualifier(bool in_glsl_interface_block = false, bool in = false)
{
if (g_ActiveConfig.iMultisamples <= 1)
return "";
if (g_ActiveConfig.iMultisamples <= 1)
return "";
// Without GL_ARB_shading_language_420pack support, the interpolation qualifier must be
// "centroid in" and not "centroid", even within an interface block.
if (in_glsl_interface_block && !g_ActiveConfig.backend_info.bSupportsBindingLayout)
{
if (!g_ActiveConfig.bSSAA)
return in ? "centroid in" : "centroid out";
else
return in ? "sample in" : "sample out";
}
else
{
if (!g_ActiveConfig.bSSAA)
return "centroid";
else
return "sample";
}
// Without GL_ARB_shading_language_420pack support, the interpolation qualifier must be
// "centroid in" and not "centroid", even within an interface block.
if (in_glsl_interface_block && !g_ActiveConfig.backend_info.bSupportsBindingLayout)
{
if (!g_ActiveConfig.bSSAA)
return in ? "centroid in" : "centroid out";
else
return in ? "sample in" : "sample out";
}
else
{
if (!g_ActiveConfig.bSSAA)
return "centroid";
else
return "sample";
}
}
// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"
#define I_ALPHA "alphaRef"
#define I_TEXDIMS "texdim"
#define I_ZBIAS "czbias"
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define I_FOGCOLOR "cfogcolor"
#define I_FOGI "cfogi"
#define I_FOGF "cfogf"
#define I_ZSLOPE "czslope"
#define I_EFBSCALE "cefbscale"
#define I_COLORS "color"
#define I_KCOLORS "k"
#define I_ALPHA "alphaRef"
#define I_TEXDIMS "texdim"
#define I_ZBIAS "czbias"
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define I_FOGCOLOR "cfogcolor"
#define I_FOGI "cfogi"
#define I_FOGF "cfogf"
#define I_ZSLOPE "czslope"
#define I_EFBSCALE "cefbscale"
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"
#define I_MATERIALS "cmtrl"
#define I_LIGHTS "clights"
#define I_TEXMATRICES "ctexmtx"
#define I_TRANSFORMMATRICES "ctrmtx"
#define I_NORMALMATRICES "cnmtx"
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"
#define I_MATERIALS "cmtrl"
#define I_LIGHTS "clights"
#define I_TEXMATRICES "ctexmtx"
#define I_TRANSFORMMATRICES "ctrmtx"
#define I_NORMALMATRICES "cnmtx"
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_PIXELCENTERCORRECTION "cpixelcenter"
#define I_STEREOPARAMS "cstereo"
#define I_LINEPTPARAMS "clinept"
#define I_TEXOFFSET "ctexoffset"
#define I_STEREOPARAMS "cstereo"
#define I_LINEPTPARAMS "clinept"
#define I_TEXOFFSET "ctexoffset"
static const char s_shader_uniforms[] =
"\tfloat4 " I_POSNORMALMATRIX"[6];\n"
"\tfloat4 " I_PROJECTION"[4];\n"
"\tint4 " I_MATERIALS"[4];\n"
"\tLight " I_LIGHTS"[8];\n"
"\tfloat4 " I_TEXMATRICES"[24];\n"
"\tfloat4 " I_TRANSFORMMATRICES"[64];\n"
"\tfloat4 " I_NORMALMATRICES"[32];\n"
"\tfloat4 " I_POSTTRANSFORMMATRICES"[64];\n"
"\tfloat4 " I_PIXELCENTERCORRECTION";\n";
static const char s_shader_uniforms[] = "\tfloat4 " I_POSNORMALMATRIX "[6];\n"
"\tfloat4 " I_PROJECTION "[4];\n"
"\tint4 " I_MATERIALS "[4];\n"
"\tLight " I_LIGHTS "[8];\n"
"\tfloat4 " I_TEXMATRICES "[24];\n"
"\tfloat4 " I_TRANSFORMMATRICES "[64];\n"
"\tfloat4 " I_NORMALMATRICES "[32];\n"
"\tfloat4 " I_POSTTRANSFORMMATRICES "[64];\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n";

View file

@ -15,95 +15,103 @@ Statistics stats;
void Statistics::ResetFrame()
{
memset(&thisFrame, 0, sizeof(ThisFrame));
memset(&thisFrame, 0, sizeof(ThisFrame));
}
void Statistics::SwapDL()
{
std::swap(stats.thisFrame.numDLPrims, stats.thisFrame.numPrims);
std::swap(stats.thisFrame.numXFLoadsInDL, stats.thisFrame.numXFLoads);
std::swap(stats.thisFrame.numCPLoadsInDL, stats.thisFrame.numCPLoads);
std::swap(stats.thisFrame.numBPLoadsInDL, stats.thisFrame.numBPLoads);
std::swap(stats.thisFrame.numDLPrims, stats.thisFrame.numPrims);
std::swap(stats.thisFrame.numXFLoadsInDL, stats.thisFrame.numXFLoads);
std::swap(stats.thisFrame.numCPLoadsInDL, stats.thisFrame.numCPLoads);
std::swap(stats.thisFrame.numBPLoadsInDL, stats.thisFrame.numBPLoads);
}
std::string Statistics::ToString()
{
std::string str;
std::string str;
if (g_ActiveConfig.backend_info.APIType == API_TYPE::API_NONE)
{
str += StringFromFormat("Objects: %i\n", stats.thisFrame.numDrawnObjects);
str += StringFromFormat("Vertices Loaded: %i\n", stats.thisFrame.numVerticesLoaded);
str += StringFromFormat("Triangles Input: %i\n", stats.thisFrame.numTrianglesIn);
str += StringFromFormat("Triangles Rejected: %i\n", stats.thisFrame.numTrianglesRejected);
str += StringFromFormat("Triangles Culled: %i\n", stats.thisFrame.numTrianglesCulled);
str += StringFromFormat("Triangles Clipped: %i\n", stats.thisFrame.numTrianglesClipped);
str += StringFromFormat("Triangles Drawn: %i\n", stats.thisFrame.numTrianglesDrawn);
str += StringFromFormat("Rasterized Pix: %i\n", stats.thisFrame.rasterizedPixels);
str += StringFromFormat("TEV Pix In: %i\n", stats.thisFrame.tevPixelsIn);
str += StringFromFormat("TEV Pix Out: %i\n", stats.thisFrame.tevPixelsOut);
}
if (g_ActiveConfig.backend_info.APIType == API_TYPE::API_NONE)
{
str += StringFromFormat("Objects: %i\n", stats.thisFrame.numDrawnObjects);
str += StringFromFormat("Vertices Loaded: %i\n", stats.thisFrame.numVerticesLoaded);
str += StringFromFormat("Triangles Input: %i\n", stats.thisFrame.numTrianglesIn);
str += StringFromFormat("Triangles Rejected: %i\n", stats.thisFrame.numTrianglesRejected);
str += StringFromFormat("Triangles Culled: %i\n", stats.thisFrame.numTrianglesCulled);
str += StringFromFormat("Triangles Clipped: %i\n", stats.thisFrame.numTrianglesClipped);
str += StringFromFormat("Triangles Drawn: %i\n", stats.thisFrame.numTrianglesDrawn);
str += StringFromFormat("Rasterized Pix: %i\n", stats.thisFrame.rasterizedPixels);
str += StringFromFormat("TEV Pix In: %i\n", stats.thisFrame.tevPixelsIn);
str += StringFromFormat("TEV Pix Out: %i\n", stats.thisFrame.tevPixelsOut);
}
str += StringFromFormat("Textures created: %i\n", stats.numTexturesCreated);
str += StringFromFormat("Textures uploaded: %i\n", stats.numTexturesUploaded);
str += StringFromFormat("Textures alive: %i\n", stats.numTexturesAlive);
str += StringFromFormat("pshaders created: %i\n", stats.numPixelShadersCreated);
str += StringFromFormat("pshaders alive: %i\n", stats.numPixelShadersAlive);
str += StringFromFormat("vshaders created: %i\n", stats.numVertexShadersCreated);
str += StringFromFormat("vshaders alive: %i\n", stats.numVertexShadersAlive);
str += StringFromFormat("shaders changes: %i\n", stats.thisFrame.numShaderChanges);
str += StringFromFormat("dlists called: %i\n", stats.thisFrame.numDListsCalled);
str += StringFromFormat("Primitive joins: %i\n", stats.thisFrame.numPrimitiveJoins);
str += StringFromFormat("Draw calls: %i\n", stats.thisFrame.numDrawCalls);
str += StringFromFormat("Primitives: %i\n", stats.thisFrame.numPrims);
str += StringFromFormat("Primitives (DL): %i\n", stats.thisFrame.numDLPrims);
str += StringFromFormat("XF loads: %i\n", stats.thisFrame.numXFLoads);
str += StringFromFormat("XF loads (DL): %i\n", stats.thisFrame.numXFLoadsInDL);
str += StringFromFormat("CP loads: %i\n", stats.thisFrame.numCPLoads);
str += StringFromFormat("CP loads (DL): %i\n", stats.thisFrame.numCPLoadsInDL);
str += StringFromFormat("BP loads: %i\n", stats.thisFrame.numBPLoads);
str += StringFromFormat("BP loads (DL): %i\n", stats.thisFrame.numBPLoadsInDL);
str += StringFromFormat("Vertex streamed: %i kB\n", stats.thisFrame.bytesVertexStreamed / 1024);
str += StringFromFormat("Index streamed: %i kB\n", stats.thisFrame.bytesIndexStreamed / 1024);
str += StringFromFormat("Uniform streamed: %i kB\n", stats.thisFrame.bytesUniformStreamed / 1024);
str += StringFromFormat("Vertex Loaders: %i\n", stats.numVertexLoaders);
str += StringFromFormat("Textures created: %i\n", stats.numTexturesCreated);
str += StringFromFormat("Textures uploaded: %i\n", stats.numTexturesUploaded);
str += StringFromFormat("Textures alive: %i\n", stats.numTexturesAlive);
str += StringFromFormat("pshaders created: %i\n", stats.numPixelShadersCreated);
str += StringFromFormat("pshaders alive: %i\n", stats.numPixelShadersAlive);
str += StringFromFormat("vshaders created: %i\n", stats.numVertexShadersCreated);
str += StringFromFormat("vshaders alive: %i\n", stats.numVertexShadersAlive);
str += StringFromFormat("shaders changes: %i\n", stats.thisFrame.numShaderChanges);
str += StringFromFormat("dlists called: %i\n", stats.thisFrame.numDListsCalled);
str += StringFromFormat("Primitive joins: %i\n", stats.thisFrame.numPrimitiveJoins);
str += StringFromFormat("Draw calls: %i\n", stats.thisFrame.numDrawCalls);
str += StringFromFormat("Primitives: %i\n", stats.thisFrame.numPrims);
str += StringFromFormat("Primitives (DL): %i\n", stats.thisFrame.numDLPrims);
str += StringFromFormat("XF loads: %i\n", stats.thisFrame.numXFLoads);
str += StringFromFormat("XF loads (DL): %i\n", stats.thisFrame.numXFLoadsInDL);
str += StringFromFormat("CP loads: %i\n", stats.thisFrame.numCPLoads);
str += StringFromFormat("CP loads (DL): %i\n", stats.thisFrame.numCPLoadsInDL);
str += StringFromFormat("BP loads: %i\n", stats.thisFrame.numBPLoads);
str += StringFromFormat("BP loads (DL): %i\n", stats.thisFrame.numBPLoadsInDL);
str += StringFromFormat("Vertex streamed: %i kB\n", stats.thisFrame.bytesVertexStreamed / 1024);
str += StringFromFormat("Index streamed: %i kB\n", stats.thisFrame.bytesIndexStreamed / 1024);
str += StringFromFormat("Uniform streamed: %i kB\n", stats.thisFrame.bytesUniformStreamed / 1024);
str += StringFromFormat("Vertex Loaders: %i\n", stats.numVertexLoaders);
std::string vertex_list;
VertexLoaderManager::AppendListToString(&vertex_list);
std::string vertex_list;
VertexLoaderManager::AppendListToString(&vertex_list);
// TODO : at some point text1 just becomes too huge and overflows, we can't even read the added stuff
// since it gets added at the far bottom of the screen anyway (actually outside the rendering window)
// we should really reset the list instead of using substr
if (vertex_list.size() + str.size() > 8170)
vertex_list = vertex_list.substr(0, 8170 - str.size());
// TODO : at some point text1 just becomes too huge and overflows, we can't even read the added
// stuff
// since it gets added at the far bottom of the screen anyway (actually outside the rendering
// window)
// we should really reset the list instead of using substr
if (vertex_list.size() + str.size() > 8170)
vertex_list = vertex_list.substr(0, 8170 - str.size());
str += vertex_list;
str += vertex_list;
return str;
return str;
}
// Is this really needed?
std::string Statistics::ToStringProj()
{
std::string projections;
std::string projections;
projections += "Projection #: X for Raw 6=0 (X for Raw 6!=0)\n\n";
projections += StringFromFormat("Projection 0: %f (%f) Raw 0: %f\n", stats.gproj_0, stats.g2proj_0, stats.proj_0);
projections += StringFromFormat("Projection 1: %f (%f)\n", stats.gproj_1, stats.g2proj_1);
projections += StringFromFormat("Projection 2: %f (%f) Raw 1: %f\n", stats.gproj_2, stats.g2proj_2, stats.proj_1);
projections += StringFromFormat("Projection 3: %f (%f)\n\n", stats.gproj_3, stats.g2proj_3);
projections += StringFromFormat("Projection 4: %f (%f)\n", stats.gproj_4, stats.g2proj_4);
projections += StringFromFormat("Projection 5: %f (%f) Raw 2: %f\n", stats.gproj_5, stats.g2proj_5, stats.proj_2);
projections += StringFromFormat("Projection 6: %f (%f) Raw 3: %f\n", stats.gproj_6, stats.g2proj_6, stats.proj_3);
projections += StringFromFormat("Projection 7: %f (%f)\n\n", stats.gproj_7, stats.g2proj_7);
projections += StringFromFormat("Projection 8: %f (%f)\n", stats.gproj_8, stats.g2proj_8);
projections += StringFromFormat("Projection 9: %f (%f)\n", stats.gproj_9, stats.g2proj_9);
projections += StringFromFormat("Projection 10: %f (%f) Raw 4: %f\n\n", stats.gproj_10, stats.g2proj_10, stats.proj_4);
projections += StringFromFormat("Projection 11: %f (%f) Raw 5: %f\n\n", stats.gproj_11, stats.g2proj_11, stats.proj_5);
projections += StringFromFormat("Projection 12: %f (%f)\n", stats.gproj_12, stats.g2proj_12);
projections += StringFromFormat("Projection 13: %f (%f)\n", stats.gproj_13, stats.g2proj_13);
projections += StringFromFormat("Projection 14: %f (%f)\n", stats.gproj_14, stats.g2proj_14);
projections += StringFromFormat("Projection 15: %f (%f)\n", stats.gproj_15, stats.g2proj_15);
projections += "Projection #: X for Raw 6=0 (X for Raw 6!=0)\n\n";
projections += StringFromFormat("Projection 0: %f (%f) Raw 0: %f\n", stats.gproj_0,
stats.g2proj_0, stats.proj_0);
projections += StringFromFormat("Projection 1: %f (%f)\n", stats.gproj_1, stats.g2proj_1);
projections += StringFromFormat("Projection 2: %f (%f) Raw 1: %f\n", stats.gproj_2,
stats.g2proj_2, stats.proj_1);
projections += StringFromFormat("Projection 3: %f (%f)\n\n", stats.gproj_3, stats.g2proj_3);
projections += StringFromFormat("Projection 4: %f (%f)\n", stats.gproj_4, stats.g2proj_4);
projections += StringFromFormat("Projection 5: %f (%f) Raw 2: %f\n", stats.gproj_5,
stats.g2proj_5, stats.proj_2);
projections += StringFromFormat("Projection 6: %f (%f) Raw 3: %f\n", stats.gproj_6,
stats.g2proj_6, stats.proj_3);
projections += StringFromFormat("Projection 7: %f (%f)\n\n", stats.gproj_7, stats.g2proj_7);
projections += StringFromFormat("Projection 8: %f (%f)\n", stats.gproj_8, stats.g2proj_8);
projections += StringFromFormat("Projection 9: %f (%f)\n", stats.gproj_9, stats.g2proj_9);
projections += StringFromFormat("Projection 10: %f (%f) Raw 4: %f\n\n", stats.gproj_10,
stats.g2proj_10, stats.proj_4);
projections += StringFromFormat("Projection 11: %f (%f) Raw 5: %f\n\n", stats.gproj_11,
stats.g2proj_11, stats.proj_5);
projections += StringFromFormat("Projection 12: %f (%f)\n", stats.gproj_12, stats.g2proj_12);
projections += StringFromFormat("Projection 13: %f (%f)\n", stats.gproj_13, stats.g2proj_13);
projections += StringFromFormat("Projection 14: %f (%f)\n", stats.gproj_14, stats.g2proj_14);
projections += StringFromFormat("Projection 15: %f (%f)\n", stats.gproj_15, stats.g2proj_15);
return projections;
return projections;
}

View file

@ -8,64 +8,66 @@
struct Statistics
{
int numPixelShadersCreated;
int numPixelShadersAlive;
int numVertexShadersCreated;
int numVertexShadersAlive;
int numPixelShadersCreated;
int numPixelShadersAlive;
int numVertexShadersCreated;
int numVertexShadersAlive;
int numTexturesCreated;
int numTexturesUploaded;
int numTexturesAlive;
int numTexturesCreated;
int numTexturesUploaded;
int numTexturesAlive;
int numVertexLoaders;
int numVertexLoaders;
float proj_0, proj_1, proj_2, proj_3, proj_4, proj_5;
float gproj_0, gproj_1, gproj_2, gproj_3, gproj_4, gproj_5;
float gproj_6, gproj_7, gproj_8, gproj_9, gproj_10, gproj_11, gproj_12, gproj_13, gproj_14, gproj_15;
float proj_0, proj_1, proj_2, proj_3, proj_4, proj_5;
float gproj_0, gproj_1, gproj_2, gproj_3, gproj_4, gproj_5;
float gproj_6, gproj_7, gproj_8, gproj_9, gproj_10, gproj_11, gproj_12, gproj_13, gproj_14,
gproj_15;
float g2proj_0, g2proj_1, g2proj_2, g2proj_3, g2proj_4, g2proj_5;
float g2proj_6, g2proj_7, g2proj_8, g2proj_9, g2proj_10, g2proj_11, g2proj_12, g2proj_13, g2proj_14, g2proj_15;
float g2proj_0, g2proj_1, g2proj_2, g2proj_3, g2proj_4, g2proj_5;
float g2proj_6, g2proj_7, g2proj_8, g2proj_9, g2proj_10, g2proj_11, g2proj_12, g2proj_13,
g2proj_14, g2proj_15;
struct ThisFrame
{
int numBPLoads;
int numCPLoads;
int numXFLoads;
struct ThisFrame
{
int numBPLoads;
int numCPLoads;
int numXFLoads;
int numBPLoadsInDL;
int numCPLoadsInDL;
int numXFLoadsInDL;
int numBPLoadsInDL;
int numCPLoadsInDL;
int numXFLoadsInDL;
int numPrims;
int numDLPrims;
int numShaderChanges;
int numPrims;
int numDLPrims;
int numShaderChanges;
int numPrimitiveJoins;
int numDrawCalls;
int numPrimitiveJoins;
int numDrawCalls;
int numDListsCalled;
int numDListsCalled;
int bytesVertexStreamed;
int bytesIndexStreamed;
int bytesUniformStreamed;
int bytesVertexStreamed;
int bytesIndexStreamed;
int bytesUniformStreamed;
int numTrianglesClipped;
int numTrianglesIn;
int numTrianglesRejected;
int numTrianglesCulled;
int numDrawnObjects;
int rasterizedPixels;
int numTrianglesDrawn;
int numVerticesLoaded;
int tevPixelsIn;
int tevPixelsOut;
};
ThisFrame thisFrame;
void ResetFrame();
static void SwapDL();
int numTrianglesClipped;
int numTrianglesIn;
int numTrianglesRejected;
int numTrianglesCulled;
int numDrawnObjects;
int rasterizedPixels;
int numTrianglesDrawn;
int numVerticesLoaded;
int tevPixelsIn;
int tevPixelsOut;
};
ThisFrame thisFrame;
void ResetFrame();
static void SwapDL();
static std::string ToString();
static std::string ToStringProj();
static std::string ToString();
static std::string ToStringProj();
};
extern Statistics stats;
@ -75,12 +77,12 @@ extern Statistics stats;
#ifdef STATISTICS
#define INCSTAT(a) (a)++;
#define DECSTAT(a) (a)--;
#define ADDSTAT(a,b) (a)+=(b);
#define SETSTAT(a,x) (a)=(int)(x);
#define SETSTAT_UINT(a,x) (a)=(u32)(x);
#define SETSTAT_FT(a,x) (a)=(float)(x);
#define ADDSTAT(a, b) (a) += (b);
#define SETSTAT(a, x) (a) = (int)(x);
#define SETSTAT_UINT(a, x) (a) = (u32)(x);
#define SETSTAT_FT(a, x) (a) = (float)(x);
#else
#define INCSTAT(a) ;
#define ADDSTAT(a,b) ;
#define SETSTAT(a,x) ;
#define ADDSTAT(a, b) ;
#define SETSTAT(a, x) ;
#endif

File diff suppressed because it is too large Load diff

View file

@ -20,194 +20,202 @@ struct VideoConfig;
class TextureCacheBase
{
public:
struct TCacheEntryConfig
{
constexpr TCacheEntryConfig() = default;
struct TCacheEntryConfig
{
constexpr TCacheEntryConfig() = default;
bool operator==(const TCacheEntryConfig& o) const
{
return std::tie(width, height, levels, layers, rendertarget) ==
std::tie(o.width, o.height, o.levels, o.layers, o.rendertarget);
}
bool operator==(const TCacheEntryConfig& o) const
{
return std::tie(width, height, levels, layers, rendertarget) ==
std::tie(o.width, o.height, o.levels, o.layers, o.rendertarget);
}
struct Hasher : std::hash<u64>
{
size_t operator()(const TCacheEntryConfig& c) const
{
u64 id = (u64)c.rendertarget << 63 | (u64)c.layers << 48 | (u64)c.levels << 32 | (u64)c.height << 16 | (u64)c.width;
return std::hash<u64>::operator()(id);
}
};
struct Hasher : std::hash<u64>
{
size_t operator()(const TCacheEntryConfig& c) const
{
u64 id = (u64)c.rendertarget << 63 | (u64)c.layers << 48 | (u64)c.levels << 32 |
(u64)c.height << 16 | (u64)c.width;
return std::hash<u64>::operator()(id);
}
};
u32 width = 0;
u32 height = 0;
u32 levels = 1;
u32 layers = 1;
bool rendertarget = false;
};
u32 width = 0;
u32 height = 0;
u32 levels = 1;
u32 layers = 1;
bool rendertarget = false;
};
struct TCacheEntryBase
{
const TCacheEntryConfig config;
struct TCacheEntryBase
{
const TCacheEntryConfig config;
// common members
u32 addr;
u32 size_in_bytes;
u64 base_hash;
u64 hash; // for paletted textures, hash = base_hash ^ palette_hash
u32 format; // bits 0-3 will contain the in-memory format.
bool is_efb_copy;
bool is_custom_tex;
u32 memory_stride;
// common members
u32 addr;
u32 size_in_bytes;
u64 base_hash;
u64 hash; // for paletted textures, hash = base_hash ^ palette_hash
u32 format; // bits 0-3 will contain the in-memory format.
bool is_efb_copy;
bool is_custom_tex;
u32 memory_stride;
unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view
unsigned int native_levels;
unsigned int native_width,
native_height; // Texture dimensions from the GameCube's point of view
unsigned int native_levels;
// used to delete textures which haven't been used for TEXTURE_KILL_THRESHOLD frames
int frameCount;
// used to delete textures which haven't been used for TEXTURE_KILL_THRESHOLD frames
int frameCount;
// Keep an iterator to the entry in textures_by_hash, so it does not need to be searched when removing the cache entry
std::multimap<u64, TCacheEntryBase*>::iterator textures_by_hash_iter;
// Keep an iterator to the entry in textures_by_hash, so it does not need to be searched when
// removing the cache entry
std::multimap<u64, TCacheEntryBase*>::iterator textures_by_hash_iter;
// This is used to keep track of both:
// * efb copies used by this partially updated texture
// * partially updated textures which refer to this efb copy
std::unordered_set<TCacheEntryBase*> references;
// This is used to keep track of both:
// * efb copies used by this partially updated texture
// * partially updated textures which refer to this efb copy
std::unordered_set<TCacheEntryBase*> references;
void SetGeneralParameters(u32 _addr, u32 _size, u32 _format)
{
addr = _addr;
size_in_bytes = _size;
format = _format;
}
void SetGeneralParameters(u32 _addr, u32 _size, u32 _format)
{
addr = _addr;
size_in_bytes = _size;
format = _format;
}
void SetDimensions(unsigned int _native_width, unsigned int _native_height, unsigned int _native_levels)
{
native_width = _native_width;
native_height = _native_height;
native_levels = _native_levels;
memory_stride = _native_width;
}
void SetDimensions(unsigned int _native_width, unsigned int _native_height,
unsigned int _native_levels)
{
native_width = _native_width;
native_height = _native_height;
native_levels = _native_levels;
memory_stride = _native_width;
}
void SetHashes(u64 _base_hash, u64 _hash)
{
base_hash = _base_hash;
hash = _hash;
}
void SetHashes(u64 _base_hash, u64 _hash)
{
base_hash = _base_hash;
hash = _hash;
}
// This texture entry is used by the other entry as a sub-texture
void CreateReference(TCacheEntryBase* other_entry)
{
// References are two-way, so they can easily be destroyed later
this->references.emplace(other_entry);
other_entry->references.emplace(this);
}
// This texture entry is used by the other entry as a sub-texture
void CreateReference(TCacheEntryBase* other_entry)
{
// References are two-way, so they can easily be destroyed later
this->references.emplace(other_entry);
other_entry->references.emplace(this);
}
void DestroyAllReferences()
{
for (auto& reference : references)
reference->references.erase(this);
void DestroyAllReferences()
{
for (auto& reference : references)
reference->references.erase(this);
references.clear();
}
references.clear();
}
void SetEfbCopy(u32 stride);
void SetEfbCopy(u32 stride);
TCacheEntryBase(const TCacheEntryConfig& c) : config(c) {}
virtual ~TCacheEntryBase();
TCacheEntryBase(const TCacheEntryConfig& c) : config(c) {}
virtual ~TCacheEntryBase();
virtual void Bind(unsigned int stage) = 0;
virtual bool Save(const std::string& filename, unsigned int level) = 0;
virtual void Bind(unsigned int stage) = 0;
virtual bool Save(const std::string& filename, unsigned int level) = 0;
virtual void CopyRectangleFromTexture(
const TCacheEntryBase* source,
const MathUtil::Rectangle<int> &srcrect,
const MathUtil::Rectangle<int> &dstrect) = 0;
virtual void CopyRectangleFromTexture(const TCacheEntryBase* source,
const MathUtil::Rectangle<int>& srcrect,
const MathUtil::Rectangle<int>& dstrect) = 0;
virtual void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level) = 0;
virtual void FromRenderTarget(u8* dst, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool scaleByHalf, unsigned int cbufid, const float *colmat) = 0;
virtual void Load(unsigned int width, unsigned int height, unsigned int expanded_width,
unsigned int level) = 0;
virtual void FromRenderTarget(u8* dst, PEControl::PixelFormat srcFormat,
const EFBRectangle& srcRect, bool scaleByHalf,
unsigned int cbufid, const float* colmat) = 0;
bool OverlapsMemoryRange(u32 range_address, u32 range_size) const;
bool OverlapsMemoryRange(u32 range_address, u32 range_size) const;
TextureCacheBase::TCacheEntryBase* ApplyPalette(u8* palette, u32 tlutfmt);
TextureCacheBase::TCacheEntryBase* ApplyPalette(u8* palette, u32 tlutfmt);
bool IsEfbCopy() const { return is_efb_copy; }
bool IsEfbCopy() const { return is_efb_copy; }
u32 NumBlocksY() const;
u32 BytesPerRow() const;
u32 NumBlocksY() const;
u32 BytesPerRow() const;
u64 CalculateHash() const;
};
u64 CalculateHash() const;
};
virtual ~TextureCacheBase(); // needs virtual for DX11 dtor
virtual ~TextureCacheBase(); // needs virtual for DX11 dtor
static void OnConfigChanged(VideoConfig& config);
static void OnConfigChanged(VideoConfig& config);
// Removes textures which aren't used for more than TEXTURE_KILL_THRESHOLD frames,
// frameCount is the current frame number.
static void Cleanup(int _frameCount);
// Removes textures which aren't used for more than TEXTURE_KILL_THRESHOLD frames,
// frameCount is the current frame number.
static void Cleanup(int _frameCount);
static void Invalidate();
static void Invalidate();
virtual TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) = 0;
virtual TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) = 0;
virtual void CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, PEControl::PixelFormat srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) = 0;
virtual void CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf) = 0;
virtual void CompileShaders() = 0; // currently only implemented by OGL
virtual void DeleteShaders() = 0; // currently only implemented by OGL
virtual void CompileShaders() = 0; // currently only implemented by OGL
virtual void DeleteShaders() = 0; // currently only implemented by OGL
static TCacheEntryBase* Load(const u32 stage);
static void UnbindTextures();
virtual void BindTextures();
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat,
const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf);
static TCacheEntryBase* Load(const u32 stage);
static void UnbindTextures();
virtual void BindTextures();
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf);
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) = 0;
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
TlutFormat format) = 0;
protected:
TextureCacheBase();
TextureCacheBase();
alignas(16) static u8* temp;
static size_t temp_size;
alignas(16) static u8* temp;
static size_t temp_size;
static TCacheEntryBase* bound_textures[8];
static TCacheEntryBase* bound_textures[8];
private:
typedef std::multimap<u64, TCacheEntryBase*> TexCache;
typedef std::unordered_multimap<TCacheEntryConfig, TCacheEntryBase*, TCacheEntryConfig::Hasher> TexPool;
static void ScaleTextureCacheEntryTo(TCacheEntryBase** entry, u32 new_width, u32 new_height);
static TCacheEntryBase* DoPartialTextureUpdates(TexCache::iterator iter, u8* palette, u32 tlutfmt);
static void DumpTexture(TCacheEntryBase* entry, std::string basename, unsigned int level);
static void CheckTempSize(size_t required_size);
typedef std::multimap<u64, TCacheEntryBase*> TexCache;
typedef std::unordered_multimap<TCacheEntryConfig, TCacheEntryBase*, TCacheEntryConfig::Hasher>
TexPool;
static void ScaleTextureCacheEntryTo(TCacheEntryBase** entry, u32 new_width, u32 new_height);
static TCacheEntryBase* DoPartialTextureUpdates(TexCache::iterator iter, u8* palette,
u32 tlutfmt);
static void DumpTexture(TCacheEntryBase* entry, std::string basename, unsigned int level);
static void CheckTempSize(size_t required_size);
static TCacheEntryBase* AllocateTexture(const TCacheEntryConfig& config);
static TexCache::iterator GetTexCacheIter(TCacheEntryBase* entry);
static TCacheEntryBase* AllocateTexture(const TCacheEntryConfig& config);
static TexCache::iterator GetTexCacheIter(TCacheEntryBase* entry);
// Removes and unlinks texture from texture cache and returns it to the pool
static TexCache::iterator InvalidateTexture(TexCache::iterator t_iter);
// Removes and unlinks texture from texture cache and returns it to the pool
static TexCache::iterator InvalidateTexture(TexCache::iterator t_iter);
static TCacheEntryBase* ReturnEntry(unsigned int stage, TCacheEntryBase* entry);
static TCacheEntryBase* ReturnEntry(unsigned int stage, TCacheEntryBase* entry);
static TexCache textures_by_address;
static TexCache textures_by_hash;
static TexPool texture_pool;
static TexCache textures_by_address;
static TexCache textures_by_hash;
static TexPool texture_pool;
// Backup configuration values
static struct BackupConfig
{
int s_colorsamples;
bool s_texfmt_overlay;
bool s_texfmt_overlay_center;
bool s_hires_textures;
bool s_cache_hires_textures;
bool s_copy_cache_enable;
bool s_stereo_3d;
bool s_efb_mono_depth;
} backup_config;
// Backup configuration values
static struct BackupConfig
{
int s_colorsamples;
bool s_texfmt_overlay;
bool s_texfmt_overlay_center;
bool s_hires_textures;
bool s_cache_hires_textures;
bool s_copy_cache_enable;
bool s_stereo_3d;
bool s_efb_mono_depth;
} backup_config;
};
extern std::unique_ptr<TextureCacheBase> g_texture_cache;

File diff suppressed because it is too large Load diff

View file

@ -11,6 +11,5 @@ namespace TextureConversionShader
{
u16 GetEncodedSampleCount(u32 format);
const char *GenerateEncodingShader(u32 format, API_TYPE ApiType = API_OPENGL);
const char* GenerateEncodingShader(u32 format, API_TYPE ApiType = API_OPENGL);
}

View file

@ -8,62 +8,63 @@
enum
{
TMEM_SIZE = 1024 * 1024,
TMEM_LINE_SIZE = 32,
TMEM_SIZE = 1024 * 1024,
TMEM_LINE_SIZE = 32,
};
alignas(16) extern u8 texMem[TMEM_SIZE];
enum TextureFormat
{
// These are the texture formats that can be read by the texture mapper.
GX_TF_I4 = 0x0,
GX_TF_I8 = 0x1,
GX_TF_IA4 = 0x2,
GX_TF_IA8 = 0x3,
GX_TF_RGB565 = 0x4,
GX_TF_RGB5A3 = 0x5,
GX_TF_RGBA8 = 0x6,
GX_TF_C4 = 0x8,
GX_TF_C8 = 0x9,
GX_TF_C14X2 = 0xA,
GX_TF_CMPR = 0xE,
// These are the texture formats that can be read by the texture mapper.
GX_TF_I4 = 0x0,
GX_TF_I8 = 0x1,
GX_TF_IA4 = 0x2,
GX_TF_IA8 = 0x3,
GX_TF_RGB565 = 0x4,
GX_TF_RGB5A3 = 0x5,
GX_TF_RGBA8 = 0x6,
GX_TF_C4 = 0x8,
GX_TF_C8 = 0x9,
GX_TF_C14X2 = 0xA,
GX_TF_CMPR = 0xE,
_GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin)
_GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin)
// Depth texture formats (which directly map to the equivalent colour format above.)
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
// Depth texture formats (which directly map to the equivalent colour format above.)
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
_GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin)
_GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin)
// These are extra formats that can be used when copying from efb,
// they use one of texel formats from above, but pack diffrent data into them.
GX_CTF_R4 = 0x0 | _GX_TF_CTF,
GX_CTF_RA4 = 0x2 | _GX_TF_CTF,
GX_CTF_RA8 = 0x3 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no commercial games use it
GX_CTF_A8 = 0x7 | _GX_TF_CTF,
GX_CTF_R8 = 0x8 | _GX_TF_CTF,
GX_CTF_G8 = 0x9 | _GX_TF_CTF,
GX_CTF_B8 = 0xA | _GX_TF_CTF,
GX_CTF_RG8 = 0xB | _GX_TF_CTF,
GX_CTF_GB8 = 0xC | _GX_TF_CTF,
// These are extra formats that can be used when copying from efb,
// they use one of texel formats from above, but pack diffrent data into them.
GX_CTF_R4 = 0x0 | _GX_TF_CTF,
GX_CTF_RA4 = 0x2 | _GX_TF_CTF,
GX_CTF_RA8 = 0x3 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no
// commercial games use it
GX_CTF_A8 = 0x7 | _GX_TF_CTF,
GX_CTF_R8 = 0x8 | _GX_TF_CTF,
GX_CTF_G8 = 0x9 | _GX_TF_CTF,
GX_CTF_B8 = 0xA | _GX_TF_CTF,
GX_CTF_RG8 = 0xB | _GX_TF_CTF,
GX_CTF_GB8 = 0xC | _GX_TF_CTF,
// extra depth texture formats that can be used for efb copies.
GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8
GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16
GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF,
// extra depth texture formats that can be used for efb copies.
GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8
GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16
GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF,
};
enum TlutFormat
{
GX_TL_IA8 = 0x0,
GX_TL_RGB565 = 0x1,
GX_TL_RGB5A3 = 0x2,
GX_TL_IA8 = 0x0,
GX_TL_RGB565 = 0x1,
GX_TL_RGB5A3 = 0x2,
};
int TexDecoder_GetTexelSizeInNibbles(int format);
@ -73,12 +74,17 @@ int TexDecoder_GetBlockHeightInTexels(u32 format);
int TexDecoder_GetPaletteSize(int fmt);
int TexDecoder_GetEfbCopyBaseFormat(int format);
void TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);
void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut, TlutFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth);
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, int texformat, const u8* tlut,
TlutFormat tlutfmt);
void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int width,
int height);
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth, int texformat,
const u8* tlut, TlutFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t,
int imageWidth);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
/* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */
void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt);

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@
#include <algorithm>
#include <cmath>
#include "Common/CPUDetect.h"
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/CPUDetect.h"
#include "VideoCommon/LookUpTables.h"
#include "VideoCommon/TextureDecoder.h"
//#include "VideoCommon/VideoCommon.h" // to get debug logs
@ -20,112 +20,112 @@
static inline u32 DecodePixel_IA8(u16 val)
{
int a = val & 0xFF;
int i = val >> 8;
return i | (i<<8) | (i<<16) | (a<<24);
int a = val & 0xFF;
int i = val >> 8;
return i | (i << 8) | (i << 16) | (a << 24);
}
static inline u32 DecodePixel_RGB565(u16 val)
{
int r,g,b,a;
r=Convert5To8((val>>11) & 0x1f);
g=Convert6To8((val>>5 ) & 0x3f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
return r | (g<<8) | (b << 16) | (a << 24);
int r, g, b, a;
r = Convert5To8((val >> 11) & 0x1f);
g = Convert6To8((val >> 5) & 0x3f);
b = Convert5To8((val)&0x1f);
a = 0xFF;
return r | (g << 8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_RGB5A3(u16 val)
{
int r,g,b,a;
if ((val&0x8000))
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return r | (g<<8) | (b << 16) | (a << 24);
int r, g, b, a;
if ((val & 0x8000))
{
r = Convert5To8((val >> 10) & 0x1f);
g = Convert5To8((val >> 5) & 0x1f);
b = Convert5To8((val)&0x1f);
a = 0xFF;
}
else
{
a = Convert3To8((val >> 12) & 0x7);
r = Convert4To8((val >> 8) & 0xf);
g = Convert4To8((val >> 4) & 0xf);
b = Convert4To8((val)&0xf);
}
return r | (g << 8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt)
{
switch (tlutfmt)
{
case GX_TL_IA8:
return DecodePixel_IA8(pixel);
case GX_TL_RGB565:
return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel));
default:
return 0;
}
switch (tlutfmt)
{
case GX_TL_IA8:
return DecodePixel_IA8(pixel);
case GX_TL_RGB565:
return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel));
default:
return 0;
}
}
static inline void DecodeBytes_C4(u32 *dst, const u8 *src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt)
{
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
*dst++ = DecodePixel_Paletted(tlut[val >> 4], tlutfmt);
*dst++ = DecodePixel_Paletted(tlut[val & 0xF], tlutfmt);
}
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
*dst++ = DecodePixel_Paletted(tlut[val >> 4], tlutfmt);
*dst++ = DecodePixel_Paletted(tlut[val & 0xF], tlutfmt);
}
}
static inline void DecodeBytes_C8(u32 *dst, const u8 *src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt)
{
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
u8 val = src[x];
*dst++ = DecodePixel_Paletted(tlut[val], tlutfmt);
}
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 8; x++)
{
u8 val = src[x];
*dst++ = DecodePixel_Paletted(tlut[val], tlutfmt);
}
}
static inline void DecodeBytes_C14X2(u32 *dst, const u16 *src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C14X2(u32* dst, const u16* src, const u8* tlut_, TlutFormat tlutfmt)
{
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
*dst++ = DecodePixel_Paletted(tlut[(val & 0x3FFF)], tlutfmt);
}
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
*dst++ = DecodePixel_Paletted(tlut[(val & 0x3FFF)], tlutfmt);
}
}
static inline void DecodeBytes_IA4(u32 *dst, const u8 *src)
static inline void DecodeBytes_IA4(u32* dst, const u8* src)
{
for (int x = 0; x < 8; x++)
{
const u8 val = src[x];
u8 a = Convert4To8(val >> 4);
u8 l = Convert4To8(val & 0xF);
dst[x] = (a << 24) | l << 16 | l << 8 | l;
}
for (int x = 0; x < 8; x++)
{
const u8 val = src[x];
u8 a = Convert4To8(val >> 4);
u8 l = Convert4To8(val & 0xF);
dst[x] = (a << 24) | l << 16 | l << 8 | l;
}
}
static inline void DecodeBytes_RGB5A3(u32 *dst, const u16 *src)
static inline void DecodeBytes_RGB5A3(u32* dst, const u16* src)
{
#if 0
for (int x = 0; x < 4; x++)
dst[x] = DecodePixel_RGB5A3(Common::swap16(src[x]));
#else
dst[0] = DecodePixel_RGB5A3(Common::swap16(src[0]));
dst[1] = DecodePixel_RGB5A3(Common::swap16(src[1]));
dst[2] = DecodePixel_RGB5A3(Common::swap16(src[2]));
dst[3] = DecodePixel_RGB5A3(Common::swap16(src[3]));
dst[0] = DecodePixel_RGB5A3(Common::swap16(src[0]));
dst[1] = DecodePixel_RGB5A3(Common::swap16(src[1]));
dst[2] = DecodePixel_RGB5A3(Common::swap16(src[2]));
dst[3] = DecodePixel_RGB5A3(Common::swap16(src[3]));
#endif
}
static inline void DecodeBytes_RGBA8(u32 *dst, const u16 *src, const u16 * src2)
static inline void DecodeBytes_RGBA8(u32* dst, const u16* src, const u16* src2)
{
#if 0
for (int x = 0; x < 4; x++)
@ -133,216 +133,228 @@ static inline void DecodeBytes_RGBA8(u32 *dst, const u16 *src, const u16 * src2)
dst[x] = ((src[x] & 0xFF) << 24) | ((src[x] & 0xFF00)>>8) | (src2[x] << 8);
}
#else
dst[0] = ((src[0] & 0xFF) << 24) | ((src[0] & 0xFF00)>>8) | (src2[0] << 8);
dst[1] = ((src[1] & 0xFF) << 24) | ((src[1] & 0xFF00)>>8) | (src2[1] << 8);
dst[2] = ((src[2] & 0xFF) << 24) | ((src[2] & 0xFF00)>>8) | (src2[2] << 8);
dst[3] = ((src[3] & 0xFF) << 24) | ((src[3] & 0xFF00)>>8) | (src2[3] << 8);
dst[0] = ((src[0] & 0xFF) << 24) | ((src[0] & 0xFF00) >> 8) | (src2[0] << 8);
dst[1] = ((src[1] & 0xFF) << 24) | ((src[1] & 0xFF00) >> 8) | (src2[1] << 8);
dst[2] = ((src[2] & 0xFF) << 24) | ((src[2] & 0xFF00) >> 8) | (src2[2] << 8);
dst[3] = ((src[3] & 0xFF) << 24) | ((src[3] & 0xFF00) >> 8) | (src2[3] << 8);
#endif
}
struct DXTBlock
{
u16 color1;
u16 color2;
u8 lines[4];
u16 color1;
u16 color2;
u8 lines[4];
};
static inline u32 MakeRGBA(int r, int g, int b, int a)
{
return (a<<24)|(b<<16)|(g<<8)|r;
return (a << 24) | (b << 16) | (g << 8) | r;
}
static void DecodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
{
// S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support)
// Needs more speed.
u16 c1 = Common::swap16(src->color1);
u16 c2 = Common::swap16(src->color2);
int blue1 = Convert5To8(c1 & 0x1F);
int blue2 = Convert5To8(c2 & 0x1F);
int green1 = Convert6To8((c1 >> 5) & 0x3F);
int green2 = Convert6To8((c2 >> 5) & 0x3F);
int red1 = Convert5To8((c1 >> 11) & 0x1F);
int red2 = Convert5To8((c2 >> 11) & 0x1F);
int colors[4];
colors[0] = MakeRGBA(red1, green1, blue1, 255);
colors[1] = MakeRGBA(red2, green2, blue2, 255);
if (c1 > c2)
{
int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3);
int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3);
int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3);
colors[2] = MakeRGBA(red1 + red3, green1 + green3, blue1 + blue3, 255);
colors[3] = MakeRGBA(red2 - red3, green2 - green3, blue2 - blue3, 255);
}
else
{
colors[2] = MakeRGBA((red1 + red2 + 1) / 2, // Average
(green1 + green2 + 1) / 2,
(blue1 + blue2 + 1) / 2, 255);
colors[3] = MakeRGBA(red2, green2, blue2, 0); // Color2 but transparent
}
// S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support)
// Needs more speed.
u16 c1 = Common::swap16(src->color1);
u16 c2 = Common::swap16(src->color2);
int blue1 = Convert5To8(c1 & 0x1F);
int blue2 = Convert5To8(c2 & 0x1F);
int green1 = Convert6To8((c1 >> 5) & 0x3F);
int green2 = Convert6To8((c2 >> 5) & 0x3F);
int red1 = Convert5To8((c1 >> 11) & 0x1F);
int red2 = Convert5To8((c2 >> 11) & 0x1F);
int colors[4];
colors[0] = MakeRGBA(red1, green1, blue1, 255);
colors[1] = MakeRGBA(red2, green2, blue2, 255);
if (c1 > c2)
{
int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3);
int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3);
int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3);
colors[2] = MakeRGBA(red1 + red3, green1 + green3, blue1 + blue3, 255);
colors[3] = MakeRGBA(red2 - red3, green2 - green3, blue2 - blue3, 255);
}
else
{
colors[2] = MakeRGBA((red1 + red2 + 1) / 2, // Average
(green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255);
colors[3] = MakeRGBA(red2, green2, blue2, 0); // Color2 but transparent
}
for (int y = 0; y < 4; y++)
{
int val = src->lines[y];
for (int x = 0; x < 4; x++)
{
dst[x] = colors[(val >> 6) & 3];
val <<= 2;
}
dst += pitch;
}
for (int y = 0; y < 4; y++)
{
int val = src->lines[y];
for (int x = 0; x < 4; x++)
{
dst[x] = colors[(val >> 6) & 3];
val <<= 2;
}
dst += pitch;
}
}
// JSD 01/06/11:
// TODO: we really should ensure BOTH the source and destination addresses are aligned to 16-byte boundaries to
// squeeze out a little more performance. _mm_loadu_si128/_mm_storeu_si128 is slower than _mm_load_si128/_mm_store_si128
// because they work on unaligned addresses. The processor is free to make the assumption that addresses are multiples
// TODO: we really should ensure BOTH the source and destination addresses are aligned to 16-byte
// boundaries to
// squeeze out a little more performance. _mm_loadu_si128/_mm_storeu_si128 is slower than
// _mm_load_si128/_mm_store_si128
// because they work on unaligned addresses. The processor is free to make the assumption that
// addresses are multiples
// of 16 in the aligned case.
// TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt)
{
const int Wsteps4 = (width + 3) / 4;
const int Wsteps8 = (width + 7) / 8;
const int Wsteps4 = (width + 3) / 4;
const int Wsteps8 = (width + 7) / 8;
switch (texformat)
{
case GX_TF_C4:
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
DecodeBytes_C4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt);
break;
case GX_TF_I4:
{
// Reference C implementation:
for (int y = 0; y < height; y += 8)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 8; iy++, src += 4)
for (int ix = 0; ix < 4; ix++)
{
int val = src[ix];
u8 i1 = Convert4To8(val >> 4);
u8 i2 = Convert4To8(val & 0xF);
memset(dst+(y + iy) * width + x + ix * 2 , i1,4);
memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4);
}
}
break;
case GX_TF_I8: // speed critical
{
// Reference C implementation
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 4; ++iy, src += 8)
{
u32 * newdst = dst + (y + iy)*width+x;
const u8 * newsrc = src;
u8 srcval;
switch (texformat)
{
case GX_TF_C4:
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++, xStep++)
DecodeBytes_C4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt);
break;
case GX_TF_I4:
{
// Reference C implementation:
for (int y = 0; y < height; y += 8)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 8; iy++, src += 4)
for (int ix = 0; ix < 4; ix++)
{
int val = src[ix];
u8 i1 = Convert4To8(val >> 4);
u8 i2 = Convert4To8(val & 0xF);
memset(dst + (y + iy) * width + x + ix * 2, i1, 4);
memset(dst + (y + iy) * width + x + ix * 2 + 1, i2, 4);
}
}
break;
case GX_TF_I8: // speed critical
{
// Reference C implementation
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 4; ++iy, src += 8)
{
u32* newdst = dst + (y + iy) * width + x;
const u8* newsrc = src;
u8 srcval;
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
}
}
break;
case GX_TF_C8:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt);
break;
case GX_TF_IA4:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_IA4(dst + (y + iy) * width + x, src + 8 * xStep);
}
break;
case GX_TF_IA8:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
ptr[0] = DecodePixel_IA8(s[0]);
ptr[1] = DecodePixel_IA8(s[1]);
ptr[2] = DecodePixel_IA8(s[2]);
ptr[3] = DecodePixel_IA8(s[3]);
}
}
break;
case GX_TF_C14X2:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt);
break;
case GX_TF_RGB565:
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
for (int j = 0; j < 4; j++)
*ptr++ = DecodePixel_RGB565(Common::swap16(*s++));
}
}
break;
case GX_TF_RGB5A3:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
DecodeBytes_RGB5A3(dst+(y+iy)*width+x, (u16*)src);
}
break;
case GX_TF_RGBA8: // speed critical
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
{
for (int iy = 0; iy < 4; iy++)
DecodeBytes_RGBA8(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16);
src += 64;
}
}
break;
case GX_TF_CMPR: // speed critical
// The metroid games use this format almost exclusively.
{
for (int y = 0; y < height; y += 8)
{
for (int x = 0; x < width; x += 8)
{
DecodeDXTBlock((u32*)dst + y * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
}
}
break;
}
}
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0];
(newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = newsrc[0];
newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
}
}
break;
case GX_TF_C8:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt);
break;
case GX_TF_IA4:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_IA4(dst + (y + iy) * width + x, src + 8 * xStep);
}
break;
case GX_TF_IA8:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32* ptr = dst + (y + iy) * width + x;
u16* s = (u16*)src;
ptr[0] = DecodePixel_IA8(s[0]);
ptr[1] = DecodePixel_IA8(s[1]);
ptr[2] = DecodePixel_IA8(s[2]);
ptr[3] = DecodePixel_IA8(s[3]);
}
}
break;
case GX_TF_C14X2:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt);
break;
case GX_TF_RGB565:
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32* ptr = dst + (y + iy) * width + x;
u16* s = (u16*)src;
for (int j = 0; j < 4; j++)
*ptr++ = DecodePixel_RGB565(Common::swap16(*s++));
}
}
break;
case GX_TF_RGB5A3:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
DecodeBytes_RGB5A3(dst + (y + iy) * width + x, (u16*)src);
}
break;
case GX_TF_RGBA8: // speed critical
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
{
for (int iy = 0; iy < 4; iy++)
DecodeBytes_RGBA8(dst + (y + iy) * width + x, (u16*)src + 4 * iy,
(u16*)src + 4 * iy + 16);
src += 64;
}
}
break;
case GX_TF_CMPR: // speed critical
// The metroid games use this format almost exclusively.
{
for (int y = 0; y < height; y += 8)
{
for (int x = 0; x < width; x += 8)
{
DecodeDXTBlock((u32*)dst + y * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
DecodeDXTBlock((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
}
}
break;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -7,12 +7,12 @@
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VideoCommon.h"
// This pointer is used as the source/dst for all fixed function loader calls
@ -21,310 +21,402 @@ u8* g_vertex_manager_write_ptr;
static void PosMtx_ReadDirect_UByte(VertexLoader* loader)
{
u32 posmtx = DataRead<u8>() & 0x3f;
if (loader->m_counter < 3)
VertexLoaderManager::position_matrix_index[loader->m_counter] = posmtx;
DataWrite<u32>(posmtx);
PRIM_LOG("posmtx: %d, ", posmtx);
u32 posmtx = DataRead<u8>() & 0x3f;
if (loader->m_counter < 3)
VertexLoaderManager::position_matrix_index[loader->m_counter] = posmtx;
DataWrite<u32>(posmtx);
PRIM_LOG("posmtx: %d, ", posmtx);
}
static void TexMtx_ReadDirect_UByte(VertexLoader* loader)
{
loader->m_curtexmtx[loader->m_texmtxread] = DataRead<u8>() & 0x3f;
loader->m_curtexmtx[loader->m_texmtxread] = DataRead<u8>() & 0x3f;
PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]);
loader->m_texmtxread++;
PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]);
loader->m_texmtxread++;
}
static void TexMtx_Write_Float(VertexLoader* loader)
{
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
}
static void TexMtx_Write_Float2(VertexLoader* loader)
{
DataWrite(0.f);
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
DataWrite(0.f);
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
}
static void TexMtx_Write_Float3(VertexLoader* loader)
{
DataWrite(0.f);
DataWrite(0.f);
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
DataWrite(0.f);
DataWrite(0.f);
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
}
static void SkipVertex(VertexLoader* loader)
{
if (loader->m_vertexSkip)
{
// reset the output buffer
g_vertex_manager_write_ptr -= loader->m_native_vtx_decl.stride;
if (loader->m_vertexSkip)
{
// reset the output buffer
g_vertex_manager_write_ptr -= loader->m_native_vtx_decl.stride;
loader->m_skippedVertices++;
}
loader->m_skippedVertices++;
}
}
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
: VertexLoaderBase(vtx_desc, vtx_attr)
VertexLoader::VertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
: VertexLoaderBase(vtx_desc, vtx_attr)
{
VertexLoader_Normal::Init();
VertexLoader_Normal::Init();
CompileVertexTranslator();
CompileVertexTranslator();
// generate frac factors
m_posScale = 1.0f / (1U << m_VtxAttr.PosFrac);
for (int i = 0; i < 8; i++)
m_tcScale[i] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac);
// generate frac factors
m_posScale = 1.0f / (1U << m_VtxAttr.PosFrac);
for (int i = 0; i < 8; i++)
m_tcScale[i] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac);
}
void VertexLoader::CompileVertexTranslator()
{
m_VertexSize = 0;
const TVtxAttr &vtx_attr = m_VtxAttr;
m_VertexSize = 0;
const TVtxAttr& vtx_attr = m_VtxAttr;
// Reset pipeline
m_numPipelineStages = 0;
// Reset pipeline
m_numPipelineStages = 0;
// Colors
const u64 col[2] = { m_VtxDesc.Color0, m_VtxDesc.Color1 };
// TextureCoord
const u64 tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord
};
// Colors
const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
// TextureCoord
const u64 tc[8] = {m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord,
m_VtxDesc.Tex3Coord, m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord,
m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord};
u32 components = 0;
u32 components = 0;
// Position in pc vertex format.
int nat_offset = 0;
// Position in pc vertex format.
int nat_offset = 0;
// Position Matrix Index
if (m_VtxDesc.PosMatIdx)
{
WriteCall(PosMtx_ReadDirect_UByte);
components |= VB_HAS_POSMTXIDX;
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = nat_offset;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.integer = true;
nat_offset += 4;
m_VertexSize += 1;
}
// Position Matrix Index
if (m_VtxDesc.PosMatIdx)
{
WriteCall(PosMtx_ReadDirect_UByte);
components |= VB_HAS_POSMTXIDX;
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = nat_offset;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.integer = true;
nat_offset += 4;
m_VertexSize += 1;
}
if (m_VtxDesc.Tex0MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex1MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX1; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex2MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX2; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex3MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX3; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex4MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX4; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex5MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX5; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex6MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX6; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex7MatIdx) { m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex0MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX0;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex1MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX1;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex2MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX2;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex3MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX3;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex4MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX4;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex5MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX5;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex6MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX6;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex7MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX7;
WriteCall(TexMtx_ReadDirect_UByte);
}
// Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
// Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat,
m_VtxAttr.PosElements));
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
int pos_elements = m_VtxAttr.PosElements + 2;
m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset;
m_native_vtx_decl.position.type = VAR_FLOAT;
m_native_vtx_decl.position.integer = false;
nat_offset += pos_elements * sizeof(float);
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat,
m_VtxAttr.PosElements);
int pos_elements = m_VtxAttr.PosElements + 2;
m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset;
m_native_vtx_decl.position.type = VAR_FLOAT;
m_native_vtx_decl.position.integer = false;
nat_offset += pos_elements * sizeof(float);
// Normals
if (m_VtxDesc.Normal != NOT_PRESENT)
{
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal,
m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
// Normals
if (m_VtxDesc.Normal != NOT_PRESENT)
{
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat,
m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal,
m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(
m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
if (pFunc == nullptr)
{
PanicAlert("VertexLoader_Normal::GetFunction(%i %i %i %i) returned zero!",
(u32)m_VtxDesc.Normal, m_VtxAttr.NormalFormat,
m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
}
WriteCall(pFunc);
if (pFunc == nullptr)
{
PanicAlert("VertexLoader_Normal::GetFunction(%i %i %i %i) returned zero!",
(u32)m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements,
m_VtxAttr.NormalIndex3);
}
WriteCall(pFunc);
for (int i = 0; i < (vtx_attr.NormalElements ? 3 : 1); i++)
{
m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true;
m_native_vtx_decl.normals[i].offset = nat_offset;
m_native_vtx_decl.normals[i].type = VAR_FLOAT;
m_native_vtx_decl.normals[i].integer = false;
nat_offset += 12;
}
for (int i = 0; i < (vtx_attr.NormalElements ? 3 : 1); i++)
{
m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true;
m_native_vtx_decl.normals[i].offset = nat_offset;
m_native_vtx_decl.normals[i].type = VAR_FLOAT;
m_native_vtx_decl.normals[i].integer = false;
nat_offset += 12;
}
components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements == 1)
components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}
components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements == 1)
components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}
for (int i = 0; i < 2; i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;
switch (col[i])
{
case NOT_PRESENT:
break;
case DIRECT:
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_565); break;
case FORMAT_24B_888: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_888); break;
case FORMAT_32B_888x: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_888x); break;
case FORMAT_16B_4444: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_4444); break;
case FORMAT_24B_6666: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_6666); break;
case FORMAT_32B_8888: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_8888); break;
default: _assert_(0); break;
}
break;
case INDEX8:
m_VertexSize += 1;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: WriteCall(Color_ReadIndex8_16b_565); break;
case FORMAT_24B_888: WriteCall(Color_ReadIndex8_24b_888); break;
case FORMAT_32B_888x: WriteCall(Color_ReadIndex8_32b_888x); break;
case FORMAT_16B_4444: WriteCall(Color_ReadIndex8_16b_4444); break;
case FORMAT_24B_6666: WriteCall(Color_ReadIndex8_24b_6666); break;
case FORMAT_32B_8888: WriteCall(Color_ReadIndex8_32b_8888); break;
default: _assert_(0); break;
}
break;
case INDEX16:
m_VertexSize += 2;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: WriteCall(Color_ReadIndex16_16b_565); break;
case FORMAT_24B_888: WriteCall(Color_ReadIndex16_24b_888); break;
case FORMAT_32B_888x: WriteCall(Color_ReadIndex16_32b_888x); break;
case FORMAT_16B_4444: WriteCall(Color_ReadIndex16_16b_4444); break;
case FORMAT_24B_6666: WriteCall(Color_ReadIndex16_24b_6666); break;
case FORMAT_32B_8888: WriteCall(Color_ReadIndex16_32b_8888); break;
default: _assert_(0); break;
}
break;
}
// Common for the three bottom cases
if (col[i] != NOT_PRESENT)
{
components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].offset = nat_offset;
m_native_vtx_decl.colors[i].enable = true;
nat_offset += 4;
}
}
for (int i = 0; i < 2; i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;
switch (col[i])
{
case NOT_PRESENT:
break;
case DIRECT:
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
m_VertexSize += 2;
WriteCall(Color_ReadDirect_16b_565);
break;
case FORMAT_24B_888:
m_VertexSize += 3;
WriteCall(Color_ReadDirect_24b_888);
break;
case FORMAT_32B_888x:
m_VertexSize += 4;
WriteCall(Color_ReadDirect_32b_888x);
break;
case FORMAT_16B_4444:
m_VertexSize += 2;
WriteCall(Color_ReadDirect_16b_4444);
break;
case FORMAT_24B_6666:
m_VertexSize += 3;
WriteCall(Color_ReadDirect_24b_6666);
break;
case FORMAT_32B_8888:
m_VertexSize += 4;
WriteCall(Color_ReadDirect_32b_8888);
break;
default:
_assert_(0);
break;
}
break;
case INDEX8:
m_VertexSize += 1;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
WriteCall(Color_ReadIndex8_16b_565);
break;
case FORMAT_24B_888:
WriteCall(Color_ReadIndex8_24b_888);
break;
case FORMAT_32B_888x:
WriteCall(Color_ReadIndex8_32b_888x);
break;
case FORMAT_16B_4444:
WriteCall(Color_ReadIndex8_16b_4444);
break;
case FORMAT_24B_6666:
WriteCall(Color_ReadIndex8_24b_6666);
break;
case FORMAT_32B_8888:
WriteCall(Color_ReadIndex8_32b_8888);
break;
default:
_assert_(0);
break;
}
break;
case INDEX16:
m_VertexSize += 2;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
WriteCall(Color_ReadIndex16_16b_565);
break;
case FORMAT_24B_888:
WriteCall(Color_ReadIndex16_24b_888);
break;
case FORMAT_32B_888x:
WriteCall(Color_ReadIndex16_32b_888x);
break;
case FORMAT_16B_4444:
WriteCall(Color_ReadIndex16_16b_4444);
break;
case FORMAT_24B_6666:
WriteCall(Color_ReadIndex16_24b_6666);
break;
case FORMAT_32B_8888:
WriteCall(Color_ReadIndex16_32b_8888);
break;
default:
_assert_(0);
break;
}
break;
}
// Common for the three bottom cases
if (col[i] != NOT_PRESENT)
{
components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].offset = nat_offset;
m_native_vtx_decl.colors[i].enable = true;
nat_offset += 4;
}
}
// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
for (int i = 0; i < 8; i++)
{
m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;
// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
for (int i = 0; i < 8; i++)
{
m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;
const int format = m_VtxAttr.texCoord[i].Format;
const int elements = m_VtxAttr.texCoord[i].Elements;
const int format = m_VtxAttr.texCoord[i].Format;
const int elements = m_VtxAttr.texCoord[i].Elements;
if (tc[i] != NOT_PRESENT)
{
_assert_msg_(VIDEO, DIRECT <= tc[i] && tc[i] <= INDEX16, "Invalid texture coordinates!\n(tc[i] = %d)", (u32)tc[i]);
_assert_msg_(VIDEO, FORMAT_UBYTE <= format && format <= FORMAT_FLOAT, "Invalid texture coordinates format!\n(format = %d)", format);
_assert_msg_(VIDEO, 0 <= elements && elements <= 1, "Invalid number of texture coordinates elements!\n(elements = %d)", elements);
if (tc[i] != NOT_PRESENT)
{
_assert_msg_(VIDEO, DIRECT <= tc[i] && tc[i] <= INDEX16,
"Invalid texture coordinates!\n(tc[i] = %d)", (u32)tc[i]);
_assert_msg_(VIDEO, FORMAT_UBYTE <= format && format <= FORMAT_FLOAT,
"Invalid texture coordinates format!\n(format = %d)", format);
_assert_msg_(VIDEO, 0 <= elements && elements <= 1,
"Invalid number of texture coordinates elements!\n(elements = %d)", elements);
components |= VB_HAS_UV0 << i;
WriteCall(VertexLoader_TextCoord::GetFunction(tc[i], format, elements));
m_VertexSize += VertexLoader_TextCoord::GetSize(tc[i], format, elements);
}
components |= VB_HAS_UV0 << i;
WriteCall(VertexLoader_TextCoord::GetFunction(tc[i], format, elements));
m_VertexSize += VertexLoader_TextCoord::GetSize(tc[i], format, elements);
}
if (components & (VB_HAS_TEXMTXIDX0 << i))
{
m_native_vtx_decl.texcoords[i].enable = true;
if (tc[i] != NOT_PRESENT)
{
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
m_native_vtx_decl.texcoords[i].components = 3;
nat_offset += 12;
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
}
else
{
m_native_vtx_decl.texcoords[i].components = 3;
nat_offset += 12;
WriteCall(TexMtx_Write_Float3);
}
}
else
{
if (tc[i] != NOT_PRESENT)
{
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].components = vtx_attr.texCoord[i].Elements ? 2 : 1;
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
}
}
if (components & (VB_HAS_TEXMTXIDX0 << i))
{
m_native_vtx_decl.texcoords[i].enable = true;
if (tc[i] != NOT_PRESENT)
{
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
m_native_vtx_decl.texcoords[i].components = 3;
nat_offset += 12;
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
}
else
{
m_native_vtx_decl.texcoords[i].components = 3;
nat_offset += 12;
WriteCall(TexMtx_Write_Float3);
}
}
else
{
if (tc[i] != NOT_PRESENT)
{
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].components = vtx_attr.texCoord[i].Elements ? 2 : 1;
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
}
}
if (tc[i] == NOT_PRESENT)
{
// if there's more tex coords later, have to write a dummy call
int j = i + 1;
for (; j < 8; ++j)
{
if (tc[j] != NOT_PRESENT)
{
WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right!
break;
}
}
// tricky!
if (j == 8 && !((components & VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL << (i + 1))))
{
// no more tex coords and tex matrices, so exit loop
break;
}
}
}
if (tc[i] == NOT_PRESENT)
{
// if there's more tex coords later, have to write a dummy call
int j = i + 1;
for (; j < 8; ++j)
{
if (tc[j] != NOT_PRESENT)
{
WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right!
break;
}
}
// tricky!
if (j == 8 && !((components & VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL << (i + 1))))
{
// no more tex coords and tex matrices, so exit loop
break;
}
}
}
// indexed position formats may skip a the vertex
if (m_VtxDesc.Position & 2)
{
WriteCall(SkipVertex);
}
// indexed position formats may skip a the vertex
if (m_VtxDesc.Position & 2)
{
WriteCall(SkipVertex);
}
m_native_components = components;
m_native_vtx_decl.stride = nat_offset;
m_native_components = components;
m_native_vtx_decl.stride = nat_offset;
}
void VertexLoader::WriteCall(TPipelineFunction func)
{
m_PipelineStages[m_numPipelineStages++] = func;
m_PipelineStages[m_numPipelineStages++] = func;
}
int VertexLoader::RunVertices(DataReader src, DataReader dst, int count)
{
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
m_numLoadedVertices += count;
m_skippedVertices = 0;
m_numLoadedVertices += count;
m_skippedVertices = 0;
for (m_counter = count - 1; m_counter >= 0; m_counter--)
{
m_tcIndex = 0;
m_colIndex = 0;
m_texmtxwrite = m_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i](this);
PRIM_LOG("\n");
}
for (m_counter = count - 1; m_counter >= 0; m_counter--)
{
m_tcIndex = 0;
m_colIndex = 0;
m_texmtxwrite = m_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i](this);
PRIM_LOG("\n");
}
return count - m_skippedVertices;
return count - m_skippedVertices;
}

View file

@ -19,33 +19,33 @@ typedef void (*TPipelineFunction)(VertexLoader* loader);
class VertexLoader : public VertexLoaderBase
{
public:
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
VertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr);
int RunVertices(DataReader src, DataReader dst, int count) override;
std::string GetName() const override { return "OldLoader"; }
bool IsInitialized() override { return true; } // This vertex loader supports all formats
int RunVertices(DataReader src, DataReader dst, int count) override;
std::string GetName() const override { return "OldLoader"; }
bool IsInitialized() override { return true; } // This vertex loader supports all formats
// They are used for the communication with the loader functions
float m_posScale;
float m_tcScale[8];
int m_tcIndex;
int m_colIndex;
// They are used for the communication with the loader functions
float m_posScale;
float m_tcScale[8];
int m_tcIndex;
int m_colIndex;
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex.
u8 m_curtexmtx[8];
int m_texmtxwrite;
int m_texmtxread;
bool m_vertexSkip;
int m_skippedVertices;
int m_counter;
// Matrix components are first in GC format but later in PC format - we need to store it
// temporarily
// when decoding each vertex.
u8 m_curtexmtx[8];
int m_texmtxwrite;
int m_texmtxread;
bool m_vertexSkip;
int m_skippedVertices;
int m_counter;
private:
// Pipeline.
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
int m_numPipelineStages;
// Pipeline.
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
int m_numPipelineStages;
void CompileVertexTranslator();
void CompileVertexTranslator();
void WriteCall(TPipelineFunction);
void WriteCall(TPipelineFunction);
};

File diff suppressed because it is too large Load diff

View file

@ -13,21 +13,22 @@ class DataReader;
class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock
{
public:
VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_att);
VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_att);
protected:
std::string GetName() const override { return "VertexLoaderARM64"; }
bool IsInitialized() override { return true; }
int RunVertices(DataReader src, DataReader dst, int count) override;
std::string GetName() const override { return "VertexLoaderARM64"; }
bool IsInitialized() override { return true; }
int RunVertices(DataReader src, DataReader dst, int count) override;
private:
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, u64 attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, u64 attribute, Arm64Gen::ARM64Reg reg, u32 align);
int ReadVertex(u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format, s32 offset = -1);
void ReadColor(u64 attribute, int format, s32 offset);
void GenerateVertexLoader();
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, u64 attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, u64 attribute, Arm64Gen::ARM64Reg reg, u32 align);
int ReadVertex(u64 attribute, int format, int count_in, int count_out, bool dequantize,
u8 scaling_exponent, AttributeFormat* native_format, s32 offset = -1);
void ReadColor(u64 attribute, int format, s32 offset);
void GenerateVertexLoader();
};

View file

@ -9,9 +9,9 @@
#include <vector>
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "Common/Logging/Log.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
@ -23,210 +23,211 @@
#include "VideoCommon/VertexLoaderARM64.h"
#endif
VertexLoaderBase::VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
VertexLoaderBase::VertexLoaderBase(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
{
m_numLoadedVertices = 0;
m_VertexSize = 0;
m_native_vertex_format = nullptr;
m_native_components = 0;
memset(&m_native_vtx_decl, 0, sizeof(m_native_vtx_decl));
m_numLoadedVertices = 0;
m_VertexSize = 0;
m_native_vertex_format = nullptr;
m_native_components = 0;
memset(&m_native_vtx_decl, 0, sizeof(m_native_vtx_decl));
SetVAT(vtx_attr);
m_VtxDesc = vtx_desc;
m_vat = vtx_attr;
SetVAT(vtx_attr);
m_VtxDesc = vtx_desc;
m_vat = vtx_attr;
}
void VertexLoaderBase::SetVAT(const VAT& vat)
{
m_VtxAttr.PosElements = vat.g0.PosElements;
m_VtxAttr.PosFormat = vat.g0.PosFormat;
m_VtxAttr.PosFrac = vat.g0.PosFrac;
m_VtxAttr.NormalElements = vat.g0.NormalElements;
m_VtxAttr.NormalFormat = vat.g0.NormalFormat;
m_VtxAttr.color[0].Elements = vat.g0.Color0Elements;
m_VtxAttr.color[0].Comp = vat.g0.Color0Comp;
m_VtxAttr.color[1].Elements = vat.g0.Color1Elements;
m_VtxAttr.color[1].Comp = vat.g0.Color1Comp;
m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements;
m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat;
m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac;
m_VtxAttr.ByteDequant = vat.g0.ByteDequant;
m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3;
m_VtxAttr.PosElements = vat.g0.PosElements;
m_VtxAttr.PosFormat = vat.g0.PosFormat;
m_VtxAttr.PosFrac = vat.g0.PosFrac;
m_VtxAttr.NormalElements = vat.g0.NormalElements;
m_VtxAttr.NormalFormat = vat.g0.NormalFormat;
m_VtxAttr.color[0].Elements = vat.g0.Color0Elements;
m_VtxAttr.color[0].Comp = vat.g0.Color0Comp;
m_VtxAttr.color[1].Elements = vat.g0.Color1Elements;
m_VtxAttr.color[1].Comp = vat.g0.Color1Comp;
m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements;
m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat;
m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac;
m_VtxAttr.ByteDequant = vat.g0.ByteDequant;
m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3;
m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements;
m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat;
m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac;
m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements;
m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat;
m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac;
m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements;
m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat;
m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac;
m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements;
m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat;
m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements;
m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat;
m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac;
m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements;
m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat;
m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac;
m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements;
m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat;
m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac;
m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements;
m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat;
m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac;
m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements;
m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat;
m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac;
m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements;
m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat;
m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac;
m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements;
m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat;
m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac;
m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac;
m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements;
m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat;
m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac;
m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements;
m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat;
m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac;
m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements;
m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat;
m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac;
};
void VertexLoaderBase::AppendToString(std::string *dest) const
void VertexLoaderBase::AppendToString(std::string* dest) const
{
dest->reserve(250);
dest->reserve(250);
dest->append(GetName());
dest->append(": ");
dest->append(GetName());
dest->append(": ");
static const char *posMode[4] = {
"Inv",
"Dir", "I8", "I16",
};
static const char *posFormats[8] = {
"u8", "s8", "u16", "s16", "flt",
"Inv", "Inv", "Inv",
};
static const char *colorFormat[8] = {
"565", "888", "888x", "4444", "6666", "8888",
"Inv", "Inv",
};
static const char* posMode[4] = {
"Inv", "Dir", "I8", "I16",
};
static const char* posFormats[8] = {
"u8", "s8", "u16", "s16", "flt", "Inv", "Inv", "Inv",
};
static const char* colorFormat[8] = {
"565", "888", "888x", "4444", "6666", "8888", "Inv", "Inv",
};
dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ",
m_VertexSize, (u32)m_VtxDesc.PosMatIdx,
m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat]));
dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ", m_VertexSize, (u32)m_VtxDesc.PosMatIdx,
m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position],
posFormats[m_VtxAttr.PosFormat]));
if (m_VtxDesc.Normal)
{
dest->append(StringFromFormat("Nrm: %i %s-%s ",
m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat]));
}
if (m_VtxDesc.Normal)
{
dest->append(StringFromFormat("Nrm: %i %s-%s ", m_VtxAttr.NormalElements,
posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat]));
}
u64 color_mode[2] = { m_VtxDesc.Color0, m_VtxDesc.Color1 };
for (int i = 0; i < 2; i++)
{
if (color_mode[i])
{
dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp]));
}
}
u64 tex_mode[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord
};
for (int i = 0; i < 8; i++)
{
if (tex_mode[i])
{
dest->append(StringFromFormat("T%i: %i %s-%s ",
i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format]));
}
}
dest->append(StringFromFormat(" - %i v", m_numLoadedVertices));
u64 color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
for (int i = 0; i < 2; i++)
{
if (color_mode[i])
{
dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements,
posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp]));
}
}
u64 tex_mode[8] = {m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord,
m_VtxDesc.Tex3Coord, m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord,
m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord};
for (int i = 0; i < 8; i++)
{
if (tex_mode[i])
{
dest->append(StringFromFormat("T%i: %i %s-%s ", i, m_VtxAttr.texCoord[i].Elements,
posMode[tex_mode[i]],
posFormats[m_VtxAttr.texCoord[i].Format]));
}
}
dest->append(StringFromFormat(" - %i v", m_numLoadedVertices));
}
// a hacky implementation to compare two vertex loaders
class VertexLoaderTester : public VertexLoaderBase
{
public:
VertexLoaderTester(std::unique_ptr<VertexLoaderBase> a_, std::unique_ptr<VertexLoaderBase> b_, const TVtxDesc& vtx_desc, const VAT& vtx_attr)
: VertexLoaderBase(vtx_desc, vtx_attr), a(std::move(a_)), b(std::move(b_))
{
m_initialized = a && b && a->IsInitialized() && b->IsInitialized();
VertexLoaderTester(std::unique_ptr<VertexLoaderBase> a_, std::unique_ptr<VertexLoaderBase> b_,
const TVtxDesc& vtx_desc, const VAT& vtx_attr)
: VertexLoaderBase(vtx_desc, vtx_attr), a(std::move(a_)), b(std::move(b_))
{
m_initialized = a && b && a->IsInitialized() && b->IsInitialized();
if (m_initialized)
{
m_initialized = a->m_VertexSize == b->m_VertexSize &&
a->m_native_components == b->m_native_components &&
a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride;
if (m_initialized)
{
m_initialized = a->m_VertexSize == b->m_VertexSize &&
a->m_native_components == b->m_native_components &&
a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride;
if (m_initialized)
{
m_VertexSize = a->m_VertexSize;
m_native_components = a->m_native_components;
memcpy(&m_native_vtx_decl, &a->m_native_vtx_decl, sizeof(PortableVertexDeclaration));
}
else
{
ERROR_LOG(VIDEO, "Can't compare vertex loaders that expect different vertex formats!");
ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d",
a->m_VertexSize, a->m_native_components, a->m_native_vtx_decl.stride);
ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d",
b->m_VertexSize, b->m_native_components, b->m_native_vtx_decl.stride);
}
}
}
~VertexLoaderTester() override
{
}
if (m_initialized)
{
m_VertexSize = a->m_VertexSize;
m_native_components = a->m_native_components;
memcpy(&m_native_vtx_decl, &a->m_native_vtx_decl, sizeof(PortableVertexDeclaration));
}
else
{
ERROR_LOG(VIDEO, "Can't compare vertex loaders that expect different vertex formats!");
ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d",
a->m_VertexSize, a->m_native_components, a->m_native_vtx_decl.stride);
ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d",
b->m_VertexSize, b->m_native_components, b->m_native_vtx_decl.stride);
}
}
}
~VertexLoaderTester() override {}
int RunVertices(DataReader src, DataReader dst, int count) override
{
buffer_a.resize(count * a->m_native_vtx_decl.stride + 4);
buffer_b.resize(count * b->m_native_vtx_decl.stride + 4);
int RunVertices(DataReader src, DataReader dst, int count) override
{
buffer_a.resize(count * a->m_native_vtx_decl.stride + 4);
buffer_b.resize(count * b->m_native_vtx_decl.stride + 4);
int count_a =
a->RunVertices(src, DataReader(buffer_a.data(), buffer_a.data() + buffer_a.size()), count);
int count_b =
b->RunVertices(src, DataReader(buffer_b.data(), buffer_b.data() + buffer_b.size()), count);
int count_a = a->RunVertices(src, DataReader(buffer_a.data(), buffer_a.data()+buffer_a.size()), count);
int count_b = b->RunVertices(src, DataReader(buffer_b.data(), buffer_b.data()+buffer_b.size()), count);
if (count_a != count_b)
ERROR_LOG(VIDEO,
"The two vertex loaders have loaded a different amount of vertices (a: %d, b: %d).",
count_a, count_b);
if (count_a != count_b)
ERROR_LOG(VIDEO, "The two vertex loaders have loaded a different amount of vertices (a: %d, b: %d).", count_a, count_b);
if (memcmp(buffer_a.data(), buffer_b.data(), std::min(count_a, count_b) * m_native_vtx_decl.stride))
ERROR_LOG(VIDEO, "The two vertex loaders have loaded different data "
"(guru meditation 0x%016" PRIx64 ", 0x%08x, 0x%08x, 0x%08x).",
m_VtxDesc.Hex, m_vat.g0.Hex, m_vat.g1.Hex, m_vat.g2.Hex);
memcpy(dst.GetPointer(), buffer_a.data(), count_a * m_native_vtx_decl.stride);
m_numLoadedVertices += count;
return count_a;
}
std::string GetName() const override { return "CompareLoader"; }
bool IsInitialized() override { return m_initialized; }
if (memcmp(buffer_a.data(), buffer_b.data(),
std::min(count_a, count_b) * m_native_vtx_decl.stride))
ERROR_LOG(VIDEO, "The two vertex loaders have loaded different data "
"(guru meditation 0x%016" PRIx64 ", 0x%08x, 0x%08x, 0x%08x).",
m_VtxDesc.Hex, m_vat.g0.Hex, m_vat.g1.Hex, m_vat.g2.Hex);
memcpy(dst.GetPointer(), buffer_a.data(), count_a * m_native_vtx_decl.stride);
m_numLoadedVertices += count;
return count_a;
}
std::string GetName() const override { return "CompareLoader"; }
bool IsInitialized() override { return m_initialized; }
private:
bool m_initialized;
bool m_initialized;
std::unique_ptr<VertexLoaderBase> a;
std::unique_ptr<VertexLoaderBase> b;
std::unique_ptr<VertexLoaderBase> a;
std::unique_ptr<VertexLoaderBase> b;
std::vector<u8> buffer_a;
std::vector<u8> buffer_b;
std::vector<u8> buffer_a;
std::vector<u8> buffer_b;
};
std::unique_ptr<VertexLoaderBase> VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
std::unique_ptr<VertexLoaderBase> VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc,
const VAT& vtx_attr)
{
std::unique_ptr<VertexLoaderBase> loader;
std::unique_ptr<VertexLoaderBase> loader;
//#define COMPARE_VERTEXLOADERS
#if defined(COMPARE_VERTEXLOADERS) && defined(_M_X86_64)
// first try: Any new VertexLoader vs the old one
loader = std::make_unique<VertexLoaderTester>(
std::make_unique<VertexLoader>(vtx_desc, vtx_attr), // the software one
std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr), // the new one to compare
vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
// first try: Any new VertexLoader vs the old one
loader = std::make_unique<VertexLoaderTester>(
std::make_unique<VertexLoader>(vtx_desc, vtx_attr), // the software one
std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr), // the new one to compare
vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
#elif defined(_M_X86_64)
loader = std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
loader = std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
#elif defined(_M_ARM_64)
loader = std::make_unique<VertexLoaderARM64>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
loader = std::make_unique<VertexLoaderARM64>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
#endif
// last try: The old VertexLoader
loader = std::make_unique<VertexLoader>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
// last try: The old VertexLoader
loader = std::make_unique<VertexLoader>(vtx_desc, vtx_attr);
if (loader->IsInitialized())
return loader;
PanicAlert("No Vertex Loader found.");
return nullptr;
PanicAlert("No Vertex Loader found.");
return nullptr;
}

View file

@ -16,89 +16,76 @@ class DataReader;
class VertexLoaderUID
{
std::array<u32, 5> vid;
size_t hash;
std::array<u32, 5> vid;
size_t hash;
public:
VertexLoaderUID()
{
}
VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat)
{
vid[0] = vtx_desc.Hex & 0xFFFFFFFF;
vid[1] = vtx_desc.Hex >> 32;
vid[2] = vat.g0.Hex;
vid[3] = vat.g1.Hex;
vid[4] = vat.g2.Hex;
hash = CalculateHash();
}
bool operator == (const VertexLoaderUID& rh) const
{
return vid == rh.vid;
}
size_t GetHash() const
{
return hash;
}
VertexLoaderUID() {}
VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat)
{
vid[0] = vtx_desc.Hex & 0xFFFFFFFF;
vid[1] = vtx_desc.Hex >> 32;
vid[2] = vat.g0.Hex;
vid[3] = vat.g1.Hex;
vid[4] = vat.g2.Hex;
hash = CalculateHash();
}
bool operator==(const VertexLoaderUID& rh) const { return vid == rh.vid; }
size_t GetHash() const { return hash; }
private:
size_t CalculateHash() const
{
size_t h = -1;
size_t CalculateHash() const
{
size_t h = -1;
for (auto word : vid)
{
h = h * 137 + word;
}
for (auto word : vid)
{
h = h * 137 + word;
}
return h;
}
return h;
}
};
namespace std
{
template <> struct hash<VertexLoaderUID>
template <>
struct hash<VertexLoaderUID>
{
size_t operator()(const VertexLoaderUID& uid) const
{
return uid.GetHash();
}
size_t operator()(const VertexLoaderUID& uid) const { return uid.GetHash(); }
};
}
class VertexLoaderBase
{
public:
static std::unique_ptr<VertexLoaderBase> CreateVertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
virtual ~VertexLoaderBase() {}
static std::unique_ptr<VertexLoaderBase> CreateVertexLoader(const TVtxDesc& vtx_desc,
const VAT& vtx_attr);
virtual ~VertexLoaderBase() {}
virtual int RunVertices(DataReader src, DataReader dst, int count) = 0;
virtual int RunVertices(DataReader src, DataReader dst, int count) = 0;
virtual bool IsInitialized() = 0;
virtual bool IsInitialized() = 0;
// For debugging / profiling
void AppendToString(std::string* dest) const;
// For debugging / profiling
void AppendToString(std::string *dest) const;
virtual std::string GetName() const = 0;
virtual std::string GetName() const = 0;
// per loader public state
int m_VertexSize; // number of bytes of a raw GC vertex
PortableVertexDeclaration m_native_vtx_decl;
u32 m_native_components;
// per loader public state
int m_VertexSize; // number of bytes of a raw GC vertex
PortableVertexDeclaration m_native_vtx_decl;
u32 m_native_components;
// used by VertexLoaderManager
NativeVertexFormat* m_native_vertex_format;
int m_numLoadedVertices;
// used by VertexLoaderManager
NativeVertexFormat* m_native_vertex_format;
int m_numLoadedVertices;
protected:
VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
void SetVAT(const VAT& vat);
VertexLoaderBase(const TVtxDesc& vtx_desc, const VAT& vtx_attr);
void SetVAT(const VAT& vat);
// GC vertex format
TVtxAttr m_VtxAttr; // VAT decoded into easy format
TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided.
VAT m_vat;
// GC vertex format
TVtxAttr m_VtxAttr; // VAT decoded into easy format
TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided.
VAT m_vat;
};

View file

@ -27,7 +27,6 @@
namespace VertexLoaderManager
{
float position_cache[3][4];
u32 position_matrix_index[3];
@ -40,268 +39,271 @@ static std::mutex s_vertex_loader_map_lock;
static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far.
u8 *cached_arraybases[12];
u8* cached_arraybases[12];
// Used in D3D12 backend, to populate input layouts used by cached-to-disk PSOs.
NativeVertexFormatMap* GetNativeVertexFormatMap()
{
return &s_native_vertex_map;
return &s_native_vertex_map;
}
void Init()
{
MarkAllDirty();
for (auto& map_entry : g_main_cp_state.vertex_loaders)
map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
map_entry = nullptr;
SETSTAT(stats.numVertexLoaders, 0);
MarkAllDirty();
for (auto& map_entry : g_main_cp_state.vertex_loaders)
map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
map_entry = nullptr;
SETSTAT(stats.numVertexLoaders, 0);
}
void Shutdown()
{
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
s_vertex_loader_map.clear();
s_native_vertex_map.clear();
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
s_vertex_loader_map.clear();
s_native_vertex_map.clear();
}
void UpdateVertexArrayPointers()
{
// Anything to update?
if (!g_main_cp_state.bases_dirty)
return;
// Anything to update?
if (!g_main_cp_state.bases_dirty)
return;
// Some games such as Burnout 2 can put invalid addresses into
// the array base registers. (see issue 8591)
// But the vertex arrays with invalid addresses aren't actually enabled.
// Note: Only array bases 0 through 11 are used by the Vertex loaders.
// 12 through 15 are used for loading data into xfmem.
for (int i = 0; i < 12; i++)
{
// Only update the array base if the vertex description states we are going to use it.
if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) & MASK_INDEXED)
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
}
// Some games such as Burnout 2 can put invalid addresses into
// the array base registers. (see issue 8591)
// But the vertex arrays with invalid addresses aren't actually enabled.
// Note: Only array bases 0 through 11 are used by the Vertex loaders.
// 12 through 15 are used for loading data into xfmem.
for (int i = 0; i < 12; i++)
{
// Only update the array base if the vertex description states we are going to use it.
if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) & MASK_INDEXED)
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
}
g_main_cp_state.bases_dirty = false;
g_main_cp_state.bases_dirty = false;
}
namespace
{
struct entry
{
std::string text;
u64 num_verts;
bool operator < (const entry &other) const
{
return num_verts > other.num_verts;
}
std::string text;
u64 num_verts;
bool operator<(const entry& other) const { return num_verts > other.num_verts; }
};
}
void AppendListToString(std::string *dest)
void AppendListToString(std::string* dest)
{
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
std::vector<entry> entries;
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
std::vector<entry> entries;
size_t total_size = 0;
for (const auto& map_entry : s_vertex_loader_map)
{
entry e;
map_entry.second->AppendToString(&e.text);
e.num_verts = map_entry.second->m_numLoadedVertices;
entries.push_back(e);
total_size += e.text.size() + 1;
}
sort(entries.begin(), entries.end());
dest->reserve(dest->size() + total_size);
for (const entry& entry : entries)
{
*dest += entry.text;
*dest += '\n';
}
size_t total_size = 0;
for (const auto& map_entry : s_vertex_loader_map)
{
entry e;
map_entry.second->AppendToString(&e.text);
e.num_verts = map_entry.second->m_numLoadedVertices;
entries.push_back(e);
total_size += e.text.size() + 1;
}
sort(entries.begin(), entries.end());
dest->reserve(dest->size() + total_size);
for (const entry& entry : entries)
{
*dest += entry.text;
*dest += '\n';
}
}
void MarkAllDirty()
{
g_main_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_main_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
}
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
{
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
state->last_id = vtx_attr_group;
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
state->last_id = vtx_attr_group;
VertexLoaderBase* loader;
if (state->attr_dirty[vtx_attr_group])
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong thread
bool check_for_native_format = !preprocess;
VertexLoaderBase* loader;
if (state->attr_dirty[vtx_attr_group])
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread
bool check_for_native_format = !preprocess;
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
if (iter != s_vertex_loader_map.end())
{
loader = iter->second.get();
check_for_native_format &= !loader->m_native_vertex_format;
}
else
{
s_vertex_loader_map[uid] = VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
loader = s_vertex_loader_map[uid].get();
INCSTAT(stats.numVertexLoaders);
}
if (check_for_native_format)
{
// search for a cached native vertex format
const PortableVertexDeclaration& format = loader->m_native_vtx_decl;
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
if (!native)
{
native.reset(g_vertex_manager->CreateNativeVertexFormat(format));
}
loader->m_native_vertex_format = native.get();
}
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty[vtx_attr_group] = false;
} else {
loader = state->vertex_loaders[vtx_attr_group];
}
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
if (iter != s_vertex_loader_map.end())
{
loader = iter->second.get();
check_for_native_format &= !loader->m_native_vertex_format;
}
else
{
s_vertex_loader_map[uid] =
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
loader = s_vertex_loader_map[uid].get();
INCSTAT(stats.numVertexLoaders);
}
if (check_for_native_format)
{
// search for a cached native vertex format
const PortableVertexDeclaration& format = loader->m_native_vtx_decl;
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
if (!native)
{
native.reset(g_vertex_manager->CreateNativeVertexFormat(format));
}
loader->m_native_vertex_format = native.get();
}
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty[vtx_attr_group] = false;
}
else
{
loader = state->vertex_loaders[vtx_attr_group];
}
// Lookup pointers for any vertex arrays.
if (!preprocess)
UpdateVertexArrayPointers();
// Lookup pointers for any vertex arrays.
if (!preprocess)
UpdateVertexArrayPointers();
return loader;
return loader;
}
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing, bool is_preprocess)
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing,
bool is_preprocess)
{
if (!count)
return 0;
if (!count)
return 0;
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
int size = count * loader->m_VertexSize;
if ((int)src.size() < size)
return -1;
int size = count * loader->m_VertexSize;
if ((int)src.size() < size)
return -1;
if (skip_drawing || is_preprocess)
return size;
if (skip_drawing || is_preprocess)
return size;
// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components)
{
VertexManagerBase::Flush();
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components)
{
VertexManagerBase::Flush();
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope.
bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);
DataReader dst = VertexManagerBase::PrepareForAdditionalData(primitive, count,
loader->m_native_vtx_decl.stride, cullall);
DataReader dst = VertexManagerBase::PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
count = loader->RunVertices(src, dst, count);
count = loader->RunVertices(src, dst, count);
IndexGenerator::AddIndices(primitive, count);
IndexGenerator::AddIndices(primitive, count);
VertexManagerBase::FlushData(count, loader->m_native_vtx_decl.stride);
VertexManagerBase::FlushData(count, loader->m_native_vtx_decl.stride);
ADDSTAT(stats.thisFrame.numPrims, count);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
return size;
ADDSTAT(stats.thisFrame.numPrims, count);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
return size;
}
NativeVertexFormat* GetCurrentVertexFormat()
{
return s_current_vtx_fmt;
return s_current_vtx_fmt;
}
} // namespace
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & 0xF0)
{
case 0x30:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & 0xF0)
{
case 0x30:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case 0x40:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case 0x40:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case 0x50:
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
state->vtx_desc.Hex |= value;
state->attr_dirty = BitSet32::AllTrue(8);
state->bases_dirty = true;
break;
case 0x50:
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
state->vtx_desc.Hex |= value;
state->attr_dirty = BitSet32::AllTrue(8);
state->bases_dirty = true;
break;
case 0x60:
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
state->vtx_desc.Hex |= (u64)value << 17;
state->attr_dirty = BitSet32::AllTrue(8);
state->bases_dirty = true;
break;
case 0x60:
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
state->vtx_desc.Hex |= (u64)value << 17;
state->attr_dirty = BitSet32::AllTrue(8);
state->bases_dirty = true;
break;
case 0x70:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
case 0x70:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
case 0x80:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
case 0x80:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
case 0x90:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
case 0x90:
_assert_((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
break;
// Pointers to vertex arrays in GC RAM
case 0xA0:
state->array_bases[sub_cmd & 0xF] = value;
state->bases_dirty = true;
break;
// Pointers to vertex arrays in GC RAM
case 0xA0:
state->array_bases[sub_cmd & 0xF] = value;
state->bases_dirty = true;
break;
case 0xB0:
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
break;
}
case 0xB0:
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
break;
}
}
void FillCPMemoryArray(u32 *memory)
void FillCPMemoryArray(u32* memory)
{
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
for (int i = 0; i < 8; ++i)
{
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}
for (int i = 0; i < 8; ++i)
{
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}
for (int i = 0; i < 16; ++i)
{
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
}
for (int i = 0; i < 16; ++i)
{
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
}
}

View file

@ -16,33 +16,34 @@ struct PortableVertexDeclaration;
namespace VertexLoaderManager
{
using NativeVertexFormatMap = std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>>;
using NativeVertexFormatMap =
std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>>;
void Init();
void Shutdown();
void Init();
void Shutdown();
void MarkAllDirty();
void MarkAllDirty();
NativeVertexFormatMap* GetNativeVertexFormatMap();
NativeVertexFormatMap* GetNativeVertexFormatMap();
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing, bool is_preprocess);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing,
bool is_preprocess);
// For debugging
void AppendListToString(std::string *dest);
// For debugging
void AppendListToString(std::string* dest);
NativeVertexFormat* GetCurrentVertexFormat();
NativeVertexFormat* GetCurrentVertexFormat();
// Resolved pointers to array bases. Used by vertex loaders.
extern u8 *cached_arraybases[12];
void UpdateVertexArrayPointers();
// Resolved pointers to array bases. Used by vertex loaders.
extern u8* cached_arraybases[12];
void UpdateVertexArrayPointers();
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
// These arrays are in reverse order.
extern float position_cache[3][4];
extern u32 position_matrix_index[3];
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
// These arrays are in reverse order.
extern float position_cache[3][4];
extern u32 position_matrix_index[3];
// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;
// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;
}

View file

@ -14,48 +14,48 @@ extern u8* g_vertex_manager_write_ptr;
__forceinline void DataSkip(u32 skip)
{
g_video_buffer_read_ptr += skip;
g_video_buffer_read_ptr += skip;
}
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_video_buffer_read_ptr += count;
g_video_buffer_read_ptr += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset, u8* bufp = g_video_buffer_read_ptr)
{
T result;
std::memcpy(&result, &bufp[_uOffset], sizeof(T));
return Common::FromBigEndian(result);
T result;
std::memcpy(&result, &bufp[_uOffset], sizeof(T));
return Common::FromBigEndian(result);
}
template <typename T>
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = DataPeek<T>(0, *bufp);
*bufp += sizeof(T);
return result;
auto const result = DataPeek<T>(0, *bufp);
*bufp += sizeof(T);
return result;
}
__forceinline u32 DataReadU32Unswapped()
{
u32 result;
std::memcpy(&result, g_video_buffer_read_ptr, sizeof(u32));
g_video_buffer_read_ptr += sizeof(u32);
return result;
u32 result;
std::memcpy(&result, g_video_buffer_read_ptr, sizeof(u32));
g_video_buffer_read_ptr += sizeof(u32);
return result;
}
__forceinline u8* DataGetPosition()
{
return g_video_buffer_read_ptr;
return g_video_buffer_read_ptr;
}
template <typename T>
__forceinline void DataWrite(T data)
{
std::memcpy(g_vertex_manager_write_ptr, &data, sizeof(T));
g_vertex_manager_write_ptr += sizeof(T);
std::memcpy(g_vertex_manager_write_ptr, &data, sizeof(T));
g_vertex_manager_write_ptr += sizeof(T);
}

View file

@ -6,9 +6,9 @@
#include <string>
#include "Common/BitSet.h"
#include "Common/CPUDetect.h"
#include "Common/Common.h"
#include "Common/CommonTypes.h"
#include "Common/CPUDetect.h"
#include "Common/Intrinsics.h"
#include "Common/JitRegister.h"
#include "Common/x64ABI.h"
@ -32,523 +32,533 @@ static const u8* memory_base_ptr = (u8*)&g_main_cp_state.array_strides;
static OpArg MPIC(const void* ptr, X64Reg scale_reg, int scale = SCALE_1)
{
return MComplex(base_reg, scale_reg, scale, (s32)((u8*)ptr - memory_base_ptr));
return MComplex(base_reg, scale_reg, scale, (s32)((u8*)ptr - memory_base_ptr));
}
static OpArg MPIC(const void* ptr)
{
return MDisp(base_reg, (s32)((u8*)ptr - memory_base_ptr));
return MDisp(base_reg, (s32)((u8*)ptr - memory_base_ptr));
}
VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att) : VertexLoaderBase(vtx_desc, vtx_att)
VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att)
: VertexLoaderBase(vtx_desc, vtx_att)
{
if (!IsInitialized())
return;
if (!IsInitialized())
return;
AllocCodeSpace(4096, false);
ClearCodeSpace();
GenerateVertexLoader();
WriteProtect();
AllocCodeSpace(4096, false);
ClearCodeSpace();
GenerateVertexLoader();
WriteProtect();
std::string name;
AppendToString(&name);
JitRegister::Register(region, GetCodePtr(), name.c_str());
std::string name;
AppendToString(&name);
JitRegister::Register(region, GetCodePtr(), name.c_str());
}
OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
{
OpArg data = MDisp(src_reg, m_src_ofs);
if (attribute & MASK_INDEXED)
{
int bits = attribute == INDEX8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8;
if (array == ARRAY_POSITION)
{
CMP(bits, R(scratch1), Imm8(-1));
m_skip_vertex = J_CC(CC_E, true);
}
IMUL(32, scratch1, MPIC(&g_main_cp_state.array_strides[array]));
MOV(64, R(scratch2), MPIC(&VertexLoaderManager::cached_arraybases[array]));
return MRegSum(scratch1, scratch2);
}
else
{
return data;
}
OpArg data = MDisp(src_reg, m_src_ofs);
if (attribute & MASK_INDEXED)
{
int bits = attribute == INDEX8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8;
if (array == ARRAY_POSITION)
{
CMP(bits, R(scratch1), Imm8(-1));
m_skip_vertex = J_CC(CC_E, true);
}
IMUL(32, scratch1, MPIC(&g_main_cp_state.array_strides[array]));
MOV(64, R(scratch2), MPIC(&VertexLoaderManager::cached_arraybases[array]));
return MRegSum(scratch1, scratch2);
}
else
{
return data;
}
}
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out,
bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format)
{
static const __m128i shuffle_lut[5][3] = {
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00FFFFFFL), // 1x s8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL), // 2x s8
_mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL)}, // 3x s8
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0001L), // 1x u16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L), // 2x u16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L)}, // 3x u16
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
};
static const __m128 scale_factors[32] = {
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
_mm_set_ps1(1./(1u<< 4)), _mm_set_ps1(1./(1u<< 5)), _mm_set_ps1(1./(1u<< 6)), _mm_set_ps1(1./(1u<< 7)),
_mm_set_ps1(1./(1u<< 8)), _mm_set_ps1(1./(1u<< 9)), _mm_set_ps1(1./(1u<<10)), _mm_set_ps1(1./(1u<<11)),
_mm_set_ps1(1./(1u<<12)), _mm_set_ps1(1./(1u<<13)), _mm_set_ps1(1./(1u<<14)), _mm_set_ps1(1./(1u<<15)),
_mm_set_ps1(1./(1u<<16)), _mm_set_ps1(1./(1u<<17)), _mm_set_ps1(1./(1u<<18)), _mm_set_ps1(1./(1u<<19)),
_mm_set_ps1(1./(1u<<20)), _mm_set_ps1(1./(1u<<21)), _mm_set_ps1(1./(1u<<22)), _mm_set_ps1(1./(1u<<23)),
_mm_set_ps1(1./(1u<<24)), _mm_set_ps1(1./(1u<<25)), _mm_set_ps1(1./(1u<<26)), _mm_set_ps1(1./(1u<<27)),
_mm_set_ps1(1./(1u<<28)), _mm_set_ps1(1./(1u<<29)), _mm_set_ps1(1./(1u<<30)), _mm_set_ps1(1./(1u<<31)),
};
static const __m128i shuffle_lut[5][3] = {
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00FFFFFFL), // 1x s8
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL), // 2x s8
_mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL)}, // 3x s8
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0001L), // 1x u16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L), // 2x u16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L)}, // 3x u16
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
};
static const __m128 scale_factors[32] = {
_mm_set_ps1(1. / (1u << 0)), _mm_set_ps1(1. / (1u << 1)), _mm_set_ps1(1. / (1u << 2)),
_mm_set_ps1(1. / (1u << 3)), _mm_set_ps1(1. / (1u << 4)), _mm_set_ps1(1. / (1u << 5)),
_mm_set_ps1(1. / (1u << 6)), _mm_set_ps1(1. / (1u << 7)), _mm_set_ps1(1. / (1u << 8)),
_mm_set_ps1(1. / (1u << 9)), _mm_set_ps1(1. / (1u << 10)), _mm_set_ps1(1. / (1u << 11)),
_mm_set_ps1(1. / (1u << 12)), _mm_set_ps1(1. / (1u << 13)), _mm_set_ps1(1. / (1u << 14)),
_mm_set_ps1(1. / (1u << 15)), _mm_set_ps1(1. / (1u << 16)), _mm_set_ps1(1. / (1u << 17)),
_mm_set_ps1(1. / (1u << 18)), _mm_set_ps1(1. / (1u << 19)), _mm_set_ps1(1. / (1u << 20)),
_mm_set_ps1(1. / (1u << 21)), _mm_set_ps1(1. / (1u << 22)), _mm_set_ps1(1. / (1u << 23)),
_mm_set_ps1(1. / (1u << 24)), _mm_set_ps1(1. / (1u << 25)), _mm_set_ps1(1. / (1u << 26)),
_mm_set_ps1(1. / (1u << 27)), _mm_set_ps1(1. / (1u << 28)), _mm_set_ps1(1. / (1u << 29)),
_mm_set_ps1(1. / (1u << 30)), _mm_set_ps1(1. / (1u << 31)),
};
X64Reg coords = XMM0;
X64Reg coords = XMM0;
int elem_size = 1 << (format / 2);
int load_bytes = elem_size * count_in;
OpArg dest = MDisp(dst_reg, m_dst_ofs);
int elem_size = 1 << (format / 2);
int load_bytes = elem_size * count_in;
OpArg dest = MDisp(dst_reg, m_dst_ofs);
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->integer = false;
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out;
m_dst_ofs += sizeof(float) * count_out;
if (attribute == DIRECT)
m_src_ofs += load_bytes;
if (attribute == DIRECT)
m_src_ofs += load_bytes;
if (cpu_info.bSSSE3)
{
if (load_bytes > 8)
MOVDQU(coords, data);
else if (load_bytes > 4)
MOVQ_xmm(coords, data);
else
MOVD_xmm(coords, data);
if (cpu_info.bSSSE3)
{
if (load_bytes > 8)
MOVDQU(coords, data);
else if (load_bytes > 4)
MOVQ_xmm(coords, data);
else
MOVD_xmm(coords, data);
PSHUFB(coords, MPIC(&shuffle_lut[format][count_in - 1]));
PSHUFB(coords, MPIC(&shuffle_lut[format][count_in - 1]));
// Sign-extend.
if (format == FORMAT_BYTE)
PSRAD(coords, 24);
if (format == FORMAT_SHORT)
PSRAD(coords, 16);
}
else
{
// SSE2
X64Reg temp = XMM1;
switch (format)
{
case FORMAT_UBYTE:
MOVD_xmm(coords, data);
PXOR(temp, R(temp));
PUNPCKLBW(coords, R(temp));
PUNPCKLWD(coords, R(temp));
break;
case FORMAT_BYTE:
MOVD_xmm(coords, data);
PUNPCKLBW(coords, R(coords));
PUNPCKLWD(coords, R(coords));
PSRAD(coords, 24);
break;
case FORMAT_USHORT:
case FORMAT_SHORT:
switch (count_in)
{
case 1:
LoadAndSwap(32, scratch3, data);
MOVD_xmm(coords, R(scratch3)); // ......X.
break;
case 2:
LoadAndSwap(32, scratch3, data);
MOVD_xmm(coords, R(scratch3)); // ......XY
PSHUFLW(coords, R(coords), 0x24); // ....Y.X.
break;
case 3:
LoadAndSwap(64, scratch3, data);
MOVQ_xmm(coords, R(scratch3)); // ....XYZ.
PUNPCKLQDQ(coords, R(coords)); // ..Z.XYZ.
PSHUFLW(coords, R(coords), 0xAC); // ..Z.Y.X.
break;
}
if (format == FORMAT_SHORT)
PSRAD(coords, 16);
else
PSRLD(coords, 16);
break;
case FORMAT_FLOAT:
// Floats don't need to be scaled or converted,
// so we can just load/swap/store them directly
// and return early.
// (In SSSE3 we still need to store them.)
for (int i = 0; i < count_in; i++)
{
LoadAndSwap(32, scratch3, data);
MOV(32, dest, R(scratch3));
data.AddMemOffset(sizeof(float));
dest.AddMemOffset(sizeof(float));
// Sign-extend.
if (format == FORMAT_BYTE)
PSRAD(coords, 24);
if (format == FORMAT_SHORT)
PSRAD(coords, 16);
}
else
{
// SSE2
X64Reg temp = XMM1;
switch (format)
{
case FORMAT_UBYTE:
MOVD_xmm(coords, data);
PXOR(temp, R(temp));
PUNPCKLBW(coords, R(temp));
PUNPCKLWD(coords, R(temp));
break;
case FORMAT_BYTE:
MOVD_xmm(coords, data);
PUNPCKLBW(coords, R(coords));
PUNPCKLWD(coords, R(coords));
PSRAD(coords, 24);
break;
case FORMAT_USHORT:
case FORMAT_SHORT:
switch (count_in)
{
case 1:
LoadAndSwap(32, scratch3, data);
MOVD_xmm(coords, R(scratch3)); // ......X.
break;
case 2:
LoadAndSwap(32, scratch3, data);
MOVD_xmm(coords, R(scratch3)); // ......XY
PSHUFLW(coords, R(coords), 0x24); // ....Y.X.
break;
case 3:
LoadAndSwap(64, scratch3, data);
MOVQ_xmm(coords, R(scratch3)); // ....XYZ.
PUNPCKLQDQ(coords, R(coords)); // ..Z.XYZ.
PSHUFLW(coords, R(coords), 0xAC); // ..Z.Y.X.
break;
}
if (format == FORMAT_SHORT)
PSRAD(coords, 16);
else
PSRLD(coords, 16);
break;
case FORMAT_FLOAT:
// Floats don't need to be scaled or converted,
// so we can just load/swap/store them directly
// and return early.
// (In SSSE3 we still need to store them.)
for (int i = 0; i < count_in; i++)
{
LoadAndSwap(32, scratch3, data);
MOV(32, dest, R(scratch3));
data.AddMemOffset(sizeof(float));
dest.AddMemOffset(sizeof(float));
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
if (cpu_info.bSSE4_1)
{
PINSRD(coords, R(scratch3), i);
}
else
{
PINSRW(coords, R(scratch3), 2 * i + 0);
SHR(32, R(scratch3), Imm8(16));
PINSRW(coords, R(scratch3), 2 * i + 1);
}
}
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
if (cpu_info.bSSE4_1)
{
PINSRD(coords, R(scratch3), i);
}
else
{
PINSRW(coords, R(scratch3), 2 * i + 0);
SHR(32, R(scratch3), Imm8(16));
PINSRW(coords, R(scratch3), 2 * i + 1);
}
}
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
}
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
}
}
if (format != FORMAT_FLOAT)
{
CVTDQ2PS(coords, R(coords));
if (format != FORMAT_FLOAT)
{
CVTDQ2PS(coords, R(coords));
if (dequantize && scaling_exponent)
MULPS(coords, MPIC(&scale_factors[scaling_exponent]));
}
if (dequantize && scaling_exponent)
MULPS(coords, MPIC(&scale_factors[scaling_exponent]));
}
switch (count_out)
{
case 1: MOVSS(dest, coords); break;
case 2: MOVLPS(dest, coords); break;
case 3: MOVUPS(dest, coords); break;
}
switch (count_out)
{
case 1:
MOVSS(dest, coords);
break;
case 2:
MOVLPS(dest, coords);
break;
case 3:
MOVUPS(dest, coords);
break;
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
return load_bytes;
}
void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
{
int load_bytes = 0;
switch (format)
{
case FORMAT_24B_888:
case FORMAT_32B_888x:
case FORMAT_32B_8888:
MOV(32, R(scratch1), data);
if (format != FORMAT_32B_8888)
OR(32, R(scratch1), Imm32(0xFF000000));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
load_bytes = 3 + (format != FORMAT_24B_888);
break;
int load_bytes = 0;
switch (format)
{
case FORMAT_24B_888:
case FORMAT_32B_888x:
case FORMAT_32B_8888:
MOV(32, R(scratch1), data);
if (format != FORMAT_32B_8888)
OR(32, R(scratch1), Imm32(0xFF000000));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
load_bytes = 3 + (format != FORMAT_24B_888);
break;
case FORMAT_16B_565:
// RRRRRGGG GGGBBBBB
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
if (cpu_info.bBMI1 && cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0x07C3F7C0));
PDEP(32, scratch3, scratch1, R(scratch2));
case FORMAT_16B_565:
// RRRRRGGG GGGBBBBB
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
if (cpu_info.bBMI1 && cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0x07C3F7C0));
PDEP(32, scratch3, scratch1, R(scratch2));
MOV(32, R(scratch2), Imm32(0xF8FCF800));
PDEP(32, scratch1, scratch1, R(scratch2));
ANDN(32, scratch2, scratch2, R(scratch3));
MOV(32, R(scratch2), Imm32(0xF8FCF800));
PDEP(32, scratch1, scratch1, R(scratch2));
ANDN(32, scratch2, scratch2, R(scratch3));
OR(32, R(scratch1), R(scratch2));
}
else
{
SHL(32, R(scratch1), Imm8(11));
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0));
LEA(32, scratch3, MScaled(scratch2, SCALE_8, 0));
AND(32, R(scratch1), Imm32(0x0000F800));
AND(32, R(scratch2), Imm32(0x00FC0000));
AND(32, R(scratch3), Imm32(0xF8000000));
OR(32, R(scratch1), R(scratch2));
OR(32, R(scratch1), R(scratch3));
OR(32, R(scratch1), R(scratch2));
}
else
{
SHL(32, R(scratch1), Imm8(11));
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0));
LEA(32, scratch3, MScaled(scratch2, SCALE_8, 0));
AND(32, R(scratch1), Imm32(0x0000F800));
AND(32, R(scratch2), Imm32(0x00FC0000));
AND(32, R(scratch3), Imm32(0xF8000000));
OR(32, R(scratch1), R(scratch2));
OR(32, R(scratch1), R(scratch3));
MOV(32, R(scratch2), R(scratch1));
SHR(32, R(scratch1), Imm8(5));
AND(32, R(scratch1), Imm32(0x07000700));
OR(32, R(scratch1), R(scratch2));
MOV(32, R(scratch2), R(scratch1));
SHR(32, R(scratch1), Imm8(5));
AND(32, R(scratch1), Imm32(0x07000700));
OR(32, R(scratch1), R(scratch2));
SHR(32, R(scratch2), Imm8(6));
AND(32, R(scratch2), Imm32(0x00030000));
OR(32, R(scratch1), R(scratch2));
}
OR(32, R(scratch1), Imm32(0x000000FF));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 2;
break;
SHR(32, R(scratch2), Imm8(6));
AND(32, R(scratch2), Imm32(0x00030000));
OR(32, R(scratch1), R(scratch2));
}
OR(32, R(scratch1), Imm32(0x000000FF));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 2;
break;
case FORMAT_16B_4444:
// RRRRGGGG BBBBAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
if (cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
PDEP(32, scratch1, scratch1, R(scratch2));
}
else
{
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(8));
OR(32, R(scratch1), R(scratch2));
AND(32, R(scratch1), Imm32(0x00FF00FF));
case FORMAT_16B_4444:
// RRRRGGGG BBBBAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
if (cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
PDEP(32, scratch1, scratch1, R(scratch2));
}
else
{
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(8));
OR(32, R(scratch1), R(scratch2));
AND(32, R(scratch1), Imm32(0x00FF00FF));
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(4));
OR(32, R(scratch1), R(scratch2));
AND(32, R(scratch1), Imm32(0x0F0F0F0F));
}
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(4));
OR(32, R(scratch1), R(scratch2));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 2;
break;
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(4));
OR(32, R(scratch1), R(scratch2));
AND(32, R(scratch1), Imm32(0x0F0F0F0F));
}
MOV(32, R(scratch2), R(scratch1));
SHL(32, R(scratch1), Imm8(4));
OR(32, R(scratch1), R(scratch2));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 2;
break;
case FORMAT_24B_6666:
// RRRRRRGG GGGGBBBB BBAAAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap
LoadAndSwap(32, scratch1, data);
if (cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0xFCFCFCFC));
PDEP(32, scratch1, scratch1, R(scratch2));
MOV(32, R(scratch2), R(scratch1));
}
else
{
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0)); // ______RR RRRRGGGG GGBBBBBB AAAAAA__
AND(32, R(scratch2), Imm32(0x00003FFC)); // ________ ________ __BBBBBB AAAAAA__
SHL(32, R(scratch1), Imm8(6)); // __RRRRRR GGGGGGBB BBBBAAAA AA______
AND(32, R(scratch1), Imm32(0x3FFC0000)); // __RRRRRR GGGGGG__ ________ ________
OR(32, R(scratch1), R(scratch2)); // __RRRRRR GGGGGG__ __BBBBBB AAAAAA__
case FORMAT_24B_6666:
// RRRRRRGG GGGGBBBB BBAAAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap
LoadAndSwap(32, scratch1, data);
if (cpu_info.bBMI2)
{
MOV(32, R(scratch2), Imm32(0xFCFCFCFC));
PDEP(32, scratch1, scratch1, R(scratch2));
MOV(32, R(scratch2), R(scratch1));
}
else
{
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0)); // ______RR RRRRGGGG GGBBBBBB AAAAAA__
AND(32, R(scratch2), Imm32(0x00003FFC)); // ________ ________ __BBBBBB AAAAAA__
SHL(32, R(scratch1), Imm8(6)); // __RRRRRR GGGGGGBB BBBBAAAA AA______
AND(32, R(scratch1), Imm32(0x3FFC0000)); // __RRRRRR GGGGGG__ ________ ________
OR(32, R(scratch1), R(scratch2)); // __RRRRRR GGGGGG__ __BBBBBB AAAAAA__
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0)); // RRRRRRGG GGGG____ BBBBBBAA AAAA____
AND(32, R(scratch2), Imm32(0xFC00FC00)); // RRRRRR__ ________ BBBBBB__ ________
AND(32, R(scratch1), Imm32(0x00FC00FC)); // ________ GGGGGG__ ________ AAAAAA__
OR(32, R(scratch1), R(scratch2)); // RRRRRR__ GGGGGG__ BBBBBB__ AAAAAA__
MOV(32, R(scratch2), R(scratch1));
}
SHR(32, R(scratch1), Imm8(6));
AND(32, R(scratch1), Imm32(0x03030303));
OR(32, R(scratch1), R(scratch2));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 3;
break;
}
if (attribute == DIRECT)
m_src_ofs += load_bytes;
LEA(32, scratch2, MScaled(scratch1, SCALE_4, 0)); // RRRRRRGG GGGG____ BBBBBBAA AAAA____
AND(32, R(scratch2), Imm32(0xFC00FC00)); // RRRRRR__ ________ BBBBBB__ ________
AND(32, R(scratch1), Imm32(0x00FC00FC)); // ________ GGGGGG__ ________ AAAAAA__
OR(32, R(scratch1), R(scratch2)); // RRRRRR__ GGGGGG__ BBBBBB__ AAAAAA__
MOV(32, R(scratch2), R(scratch1));
}
SHR(32, R(scratch1), Imm8(6));
AND(32, R(scratch1), Imm32(0x03030303));
OR(32, R(scratch1), R(scratch2));
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
load_bytes = 3;
break;
}
if (attribute == DIRECT)
m_src_ofs += load_bytes;
}
void VertexLoaderX64::GenerateVertexLoader()
{
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2, scratch3, count_reg, skipped_reg, base_reg};
regs &= ABI_ALL_CALLEE_SAVED;
ABI_PushRegistersAndAdjustStack(regs, 0);
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2,
scratch3, count_reg, skipped_reg, base_reg};
regs &= ABI_ALL_CALLEE_SAVED;
ABI_PushRegistersAndAdjustStack(regs, 0);
// Backup count since we're going to count it down.
PUSH(32, R(ABI_PARAM3));
// Backup count since we're going to count it down.
PUSH(32, R(ABI_PARAM3));
// ABI_PARAM3 is one of the lower registers, so free it for scratch2.
MOV(32, R(count_reg), R(ABI_PARAM3));
// ABI_PARAM3 is one of the lower registers, so free it for scratch2.
MOV(32, R(count_reg), R(ABI_PARAM3));
MOV(64, R(base_reg), R(ABI_PARAM4));
MOV(64, R(base_reg), R(ABI_PARAM4));
if (m_VtxDesc.Position & MASK_INDEXED)
XOR(32, R(skipped_reg), R(skipped_reg));
if (m_VtxDesc.Position & MASK_INDEXED)
XOR(32, R(skipped_reg), R(skipped_reg));
// TODO: load constants into registers outside the main loop
// TODO: load constants into registers outside the main loop
const u8* loop_start = GetCodePtr();
const u8* loop_start = GetCodePtr();
if (m_VtxDesc.PosMatIdx)
{
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
AND(32, R(scratch1), Imm8(0x3F));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
if (m_VtxDesc.PosMatIdx)
{
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
AND(32, R(scratch1), Imm8(0x3F));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
// zfreeze
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
MOV(32, MPIC(VertexLoaderManager::position_matrix_index - 1, count_reg, SCALE_4), R(scratch1));
SetJumpTarget(dont_store);
// zfreeze
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
MOV(32, MPIC(VertexLoaderManager::position_matrix_index - 1, count_reg, SCALE_4), R(scratch1));
SetJumpTarget(dont_store);
m_native_components |= VB_HAS_POSMTXIDX;
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
}
m_native_components |= VB_HAS_POSMTXIDX;
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
}
u32 texmatidx_ofs[8];
const u64 tm[8] = {
m_VtxDesc.Tex0MatIdx, m_VtxDesc.Tex1MatIdx, m_VtxDesc.Tex2MatIdx, m_VtxDesc.Tex3MatIdx,
m_VtxDesc.Tex4MatIdx, m_VtxDesc.Tex5MatIdx, m_VtxDesc.Tex6MatIdx, m_VtxDesc.Tex7MatIdx,
};
for (int i = 0; i < 8; i++)
{
if (tm[i])
texmatidx_ofs[i] = m_src_ofs++;
}
u32 texmatidx_ofs[8];
const u64 tm[8] = {
m_VtxDesc.Tex0MatIdx, m_VtxDesc.Tex1MatIdx, m_VtxDesc.Tex2MatIdx, m_VtxDesc.Tex3MatIdx,
m_VtxDesc.Tex4MatIdx, m_VtxDesc.Tex5MatIdx, m_VtxDesc.Tex6MatIdx, m_VtxDesc.Tex7MatIdx,
};
for (int i = 0; i < 8; i++)
{
if (tm[i])
texmatidx_ofs[i] = m_src_ofs++;
}
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.Position);
int pos_elements = 2 + m_VtxAttr.PosElements;
ReadVertex(data, m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position);
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.Position);
int pos_elements = 2 + m_VtxAttr.PosElements;
ReadVertex(data, m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position);
if (m_VtxDesc.Normal)
{
static const u8 map[8] = { 7, 6, 15, 14 };
u8 scaling_exponent = map[m_VtxAttr.NormalFormat];
if (m_VtxDesc.Normal)
{
static const u8 map[8] = {7, 6, 15, 14};
u8 scaling_exponent = map[m_VtxAttr.NormalFormat];
for (int i = 0; i < (m_VtxAttr.NormalElements ? 3 : 1); i++)
{
if (!i || m_VtxAttr.NormalIndex3)
{
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.Normal);
int elem_size = 1 << (m_VtxAttr.NormalFormat / 2);
data.AddMemOffset(i * elem_size * 3);
}
data.AddMemOffset(ReadVertex(data, m_VtxDesc.Normal, m_VtxAttr.NormalFormat, 3, 3,
true, scaling_exponent, &m_native_vtx_decl.normals[i]));
}
for (int i = 0; i < (m_VtxAttr.NormalElements ? 3 : 1); i++)
{
if (!i || m_VtxAttr.NormalIndex3)
{
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.Normal);
int elem_size = 1 << (m_VtxAttr.NormalFormat / 2);
data.AddMemOffset(i * elem_size * 3);
}
data.AddMemOffset(ReadVertex(data, m_VtxDesc.Normal, m_VtxAttr.NormalFormat, 3, 3, true,
scaling_exponent, &m_native_vtx_decl.normals[i]));
}
m_native_components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements)
m_native_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}
m_native_components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements)
m_native_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}
const u64 col[2] = { m_VtxDesc.Color0, m_VtxDesc.Color1 };
for (int i = 0; i < 2; i++)
{
if (col[i])
{
data = GetVertexAddr(ARRAY_COLOR + i, col[i]);
ReadColor(data, col[i], m_VtxAttr.color[i].Comp);
m_native_components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}
const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
for (int i = 0; i < 2; i++)
{
if (col[i])
{
data = GetVertexAddr(ARRAY_COLOR + i, col[i]);
ReadColor(data, col[i], m_VtxAttr.color[i].Comp);
m_native_components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}
const u64 tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
};
for (int i = 0; i < 8; i++)
{
int elements = m_VtxAttr.texCoord[i].Elements + 1;
if (tc[i])
{
data = GetVertexAddr(ARRAY_TEXCOORD0 + i, tc[i]);
u8 scaling_exponent = m_VtxAttr.texCoord[i].Frac;
ReadVertex(data, tc[i], m_VtxAttr.texCoord[i].Format, elements, tm[i] ? 2 : elements,
m_VtxAttr.ByteDequant, scaling_exponent, &m_native_vtx_decl.texcoords[i]);
m_native_components |= VB_HAS_UV0 << i;
}
if (tm[i])
{
m_native_components |= VB_HAS_TEXMTXIDX0 << i;
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (tc[i])
{
CVTSI2SS(XMM0, R(scratch1));
MOVSS(MDisp(dst_reg, m_dst_ofs), XMM0);
m_dst_ofs += sizeof(float);
}
else
{
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
PXOR(XMM0, R(XMM0));
CVTSI2SS(XMM0, R(scratch1));
SHUFPS(XMM0, R(XMM0), 0x45); // 000X -> 0X00
MOVUPS(MDisp(dst_reg, m_dst_ofs), XMM0);
m_dst_ofs += sizeof(float) * 3;
}
}
}
const u64 tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
};
for (int i = 0; i < 8; i++)
{
int elements = m_VtxAttr.texCoord[i].Elements + 1;
if (tc[i])
{
data = GetVertexAddr(ARRAY_TEXCOORD0 + i, tc[i]);
u8 scaling_exponent = m_VtxAttr.texCoord[i].Frac;
ReadVertex(data, tc[i], m_VtxAttr.texCoord[i].Format, elements, tm[i] ? 2 : elements,
m_VtxAttr.ByteDequant, scaling_exponent, &m_native_vtx_decl.texcoords[i]);
m_native_components |= VB_HAS_UV0 << i;
}
if (tm[i])
{
m_native_components |= VB_HAS_TEXMTXIDX0 << i;
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (tc[i])
{
CVTSI2SS(XMM0, R(scratch1));
MOVSS(MDisp(dst_reg, m_dst_ofs), XMM0);
m_dst_ofs += sizeof(float);
}
else
{
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
PXOR(XMM0, R(XMM0));
CVTSI2SS(XMM0, R(scratch1));
SHUFPS(XMM0, R(XMM0), 0x45); // 000X -> 0X00
MOVUPS(MDisp(dst_reg, m_dst_ofs), XMM0);
m_dst_ofs += sizeof(float) * 3;
}
}
}
// Prepare for the next vertex.
ADD(64, R(dst_reg), Imm32(m_dst_ofs));
const u8* cont = GetCodePtr();
ADD(64, R(src_reg), Imm32(m_src_ofs));
// Prepare for the next vertex.
ADD(64, R(dst_reg), Imm32(m_dst_ofs));
const u8* cont = GetCodePtr();
ADD(64, R(src_reg), Imm32(m_src_ofs));
SUB(32, R(count_reg), Imm8(1));
J_CC(CC_NZ, loop_start);
SUB(32, R(count_reg), Imm8(1));
J_CC(CC_NZ, loop_start);
// Get the original count.
POP(32, R(ABI_RETURN));
// Get the original count.
POP(32, R(ABI_RETURN));
ABI_PopRegistersAndAdjustStack(regs, 0);
ABI_PopRegistersAndAdjustStack(regs, 0);
if (m_VtxDesc.Position & MASK_INDEXED)
{
SUB(32, R(ABI_RETURN), R(skipped_reg));
RET();
if (m_VtxDesc.Position & MASK_INDEXED)
{
SUB(32, R(ABI_RETURN), R(skipped_reg));
RET();
SetJumpTarget(m_skip_vertex);
ADD(32, R(skipped_reg), Imm8(1));
JMP(cont);
}
else
{
RET();
}
SetJumpTarget(m_skip_vertex);
ADD(32, R(skipped_reg), Imm8(1));
JMP(cont);
}
else
{
RET();
}
m_VertexSize = m_src_ofs;
m_native_vtx_decl.stride = m_dst_ofs;
m_VertexSize = m_src_ofs;
m_native_vtx_decl.stride = m_dst_ofs;
}
int VertexLoaderX64::RunVertices(DataReader src, DataReader dst, int count)
{
m_numLoadedVertices += count;
return ((int (*)(u8*, u8*, int, const void*))region)(
src.GetPointer(),
dst.GetPointer(),
count,
memory_base_ptr);
m_numLoadedVertices += count;
return ((int (*)(u8*, u8*, int, const void*))region)(src.GetPointer(), dst.GetPointer(), count,
memory_base_ptr);
}

View file

@ -9,19 +9,20 @@
class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
{
public:
VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att);
VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att);
protected:
std::string GetName() const override { return "VertexLoaderX64"; }
bool IsInitialized() override { return true; }
int RunVertices(DataReader src, DataReader dst, int count) override;
std::string GetName() const override { return "VertexLoaderX64"; }
bool IsInitialized() override { return true; }
int RunVertices(DataReader src, DataReader dst, int count) override;
private:
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, u64 attribute);
int ReadVertex(Gen::OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format);
void ReadColor(Gen::OpArg data, u64 attribute, int format);
void GenerateVertexLoader();
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, u64 attribute);
int ReadVertex(Gen::OpArg data, u64 attribute, int format, int count_in, int count_out,
bool dequantize, u8 scaling_exponent, AttributeFormat* native_format);
void ReadColor(Gen::OpArg data, u64 attribute, int format);
void GenerateVertexLoader();
};

View file

@ -7,168 +7,212 @@
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_Color.h"
#define AMASK 0xFF000000
static void SetCol(VertexLoader* loader, u32 val)
{
DataWrite(val);
loader->m_colIndex++;
DataWrite(val);
loader->m_colIndex++;
}
// Color comes in format BARG in 16 bits
// BARG -> AABBGGRR
static void SetCol4444(VertexLoader* loader, u16 val_)
{
u32 col, val = val_;
col = val & 0x00F0; // col = 000000R0;
col |= (val & 0x000F) << 12; // col |= 0000G000;
col |= (val & 0xF000) << 8; // col |= 00B00000;
col |= (val & 0x0F00) << 20; // col |= A0000000;
col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R;
SetCol(loader, col);
u32 col, val = val_;
col = val & 0x00F0; // col = 000000R0;
col |= (val & 0x000F) << 12; // col |= 0000G000;
col |= (val & 0xF000) << 8; // col |= 00B00000;
col |= (val & 0x0F00) << 20; // col |= A0000000;
col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R;
SetCol(loader, col);
}
// Color comes in format RGBA
// RRRRRRGG GGGGBBBB BBAAAAAA
static void SetCol6666(VertexLoader* loader, u32 val)
{
u32 col = (val >> 16) & 0x000000FC;
col |= (val >> 2) & 0x0000FC00;
col |= (val << 12) & 0x00FC0000;
col |= (val << 26) & 0xFC000000;
col |= (col >> 6) & 0x03030303;
SetCol(loader, col);
u32 col = (val >> 16) & 0x000000FC;
col |= (val >> 2) & 0x0000FC00;
col |= (val << 12) & 0x00FC0000;
col |= (val << 26) & 0xFC000000;
col |= (col >> 6) & 0x03030303;
SetCol(loader, col);
}
// Color comes in RGB
// RRRRRGGG GGGBBBBB
static void SetCol565(VertexLoader* loader, u16 val_)
{
u32 col, val = val_;
col = (val >> 8) & 0x0000F8;
col |= (val << 5) & 0x00FC00;
col |= (val << 19) & 0xF80000;
col |= (col >> 5) & 0x070007;
col |= (col >> 6) & 0x000300;
SetCol(loader, col | AMASK);
u32 col, val = val_;
col = (val >> 8) & 0x0000F8;
col |= (val << 5) & 0x00FC00;
col |= (val << 19) & 0xF80000;
col |= (col >> 5) & 0x070007;
col |= (col >> 6) & 0x000300;
SetCol(loader, col | AMASK);
}
static u32 Read32(const u8* addr)
{
u32 value;
std::memcpy(&value, addr, sizeof(u32));
return value;
u32 value;
std::memcpy(&value, addr, sizeof(u32));
return value;
}
static u32 Read24(const u8* addr)
{
return Read32(addr) | AMASK;
return Read32(addr) | AMASK;
}
void Color_ReadDirect_24b_888(VertexLoader* loader)
{
SetCol(loader, Read24(DataGetPosition()));
DataSkip(3);
SetCol(loader, Read24(DataGetPosition()));
DataSkip(3);
}
void Color_ReadDirect_32b_888x(VertexLoader* loader)
{
SetCol(loader, Read24(DataGetPosition()));
DataSkip(4);
SetCol(loader, Read24(DataGetPosition()));
DataSkip(4);
}
void Color_ReadDirect_16b_565(VertexLoader* loader)
{
SetCol565(loader, DataRead<u16>());
SetCol565(loader, DataRead<u16>());
}
void Color_ReadDirect_16b_4444(VertexLoader* loader)
{
u16 value;
std::memcpy(&value, DataGetPosition(), sizeof(u16));
u16 value;
std::memcpy(&value, DataGetPosition(), sizeof(u16));
SetCol4444(loader, value);
DataSkip(2);
SetCol4444(loader, value);
DataSkip(2);
}
void Color_ReadDirect_24b_6666(VertexLoader* loader)
{
SetCol6666(loader, Common::swap32(DataGetPosition() - 1));
DataSkip(3);
SetCol6666(loader, Common::swap32(DataGetPosition() - 1));
DataSkip(3);
}
void Color_ReadDirect_32b_8888(VertexLoader* loader)
{
SetCol(loader, DataReadU32Unswapped());
SetCol(loader, DataReadU32Unswapped());
}
template <typename I>
void Color_ReadIndex_16b_565(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8* const address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
auto const Index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
u16 value;
std::memcpy(&value, address, sizeof(u16));
u16 value;
std::memcpy(&value, address, sizeof(u16));
SetCol565(loader, Common::swap16(value));
SetCol565(loader, Common::swap16(value));
}
template <typename I>
void Color_ReadIndex_24b_888(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read24(iAddress));
auto const Index = DataRead<I>();
const u8* iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read24(iAddress));
}
template <typename I>
void Color_ReadIndex_32b_888x(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read24(iAddress));
auto const Index = DataRead<I>();
const u8* iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read24(iAddress));
}
template <typename I>
void Color_ReadIndex_16b_4444(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8* const address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
auto const Index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
u16 value;
std::memcpy(&value, address, sizeof(u16));
u16 value;
std::memcpy(&value, address, sizeof(u16));
SetCol4444(loader, value);
SetCol4444(loader, value);
}
template <typename I>
void Color_ReadIndex_24b_6666(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8* pData = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]) - 1;
u32 val = Common::swap32(pData);
SetCol6666(loader, val);
auto const Index = DataRead<I>();
const u8* pData = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]) - 1;
u32 val = Common::swap32(pData);
SetCol6666(loader, val);
}
template <typename I>
void Color_ReadIndex_32b_8888(VertexLoader* loader)
{
auto const Index = DataRead<I>();
const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read32(iAddress));
auto const Index = DataRead<I>();
const u8* iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] +
(Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]);
SetCol(loader, Read32(iAddress));
}
void Color_ReadIndex8_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565<u8>(loader); }
void Color_ReadIndex8_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888<u8>(loader); }
void Color_ReadIndex8_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x<u8>(loader); }
void Color_ReadIndex8_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444<u8>(loader); }
void Color_ReadIndex8_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666<u8>(loader); }
void Color_ReadIndex8_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888<u8>(loader); }
void Color_ReadIndex8_16b_565(VertexLoader* loader)
{
Color_ReadIndex_16b_565<u8>(loader);
}
void Color_ReadIndex8_24b_888(VertexLoader* loader)
{
Color_ReadIndex_24b_888<u8>(loader);
}
void Color_ReadIndex8_32b_888x(VertexLoader* loader)
{
Color_ReadIndex_32b_888x<u8>(loader);
}
void Color_ReadIndex8_16b_4444(VertexLoader* loader)
{
Color_ReadIndex_16b_4444<u8>(loader);
}
void Color_ReadIndex8_24b_6666(VertexLoader* loader)
{
Color_ReadIndex_24b_6666<u8>(loader);
}
void Color_ReadIndex8_32b_8888(VertexLoader* loader)
{
Color_ReadIndex_32b_8888<u8>(loader);
}
void Color_ReadIndex16_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565<u16>(loader); }
void Color_ReadIndex16_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888<u16>(loader); }
void Color_ReadIndex16_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x<u16>(loader); }
void Color_ReadIndex16_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444<u16>(loader); }
void Color_ReadIndex16_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666<u16>(loader); }
void Color_ReadIndex16_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888<u16>(loader); }
void Color_ReadIndex16_16b_565(VertexLoader* loader)
{
Color_ReadIndex_16b_565<u16>(loader);
}
void Color_ReadIndex16_24b_888(VertexLoader* loader)
{
Color_ReadIndex_24b_888<u16>(loader);
}
void Color_ReadIndex16_32b_888x(VertexLoader* loader)
{
Color_ReadIndex_32b_888x<u16>(loader);
}
void Color_ReadIndex16_16b_4444(VertexLoader* loader)
{
Color_ReadIndex_16b_4444<u16>(loader);
}
void Color_ReadIndex16_24b_6666(VertexLoader* loader)
{
Color_ReadIndex_24b_6666<u16>(loader);
}
void Color_ReadIndex16_32b_8888(VertexLoader* loader)
{
Color_ReadIndex_32b_8888<u16>(loader);
}

View file

@ -10,182 +10,180 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_Normal.h"
// warning: mapping buffer should be disabled to use this
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3],
// ((float*)g_vertex_manager_write_ptr)[-2],
// ((float*)g_vertex_manager_write_ptr)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES]
[NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
namespace
{
template <typename T>
__forceinline float FracAdjust(T val)
{
//auto const S8FRAC = 1.f / (1u << 6);
//auto const U8FRAC = 1.f / (1u << 7);
//auto const S16FRAC = 1.f / (1u << 14);
//auto const U16FRAC = 1.f / (1u << 15);
// auto const S8FRAC = 1.f / (1u << 6);
// auto const U8FRAC = 1.f / (1u << 7);
// auto const S16FRAC = 1.f / (1u << 14);
// auto const U16FRAC = 1.f / (1u << 15);
// TODO: is this right?
return val / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
// TODO: is this right?
return val / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
}
template <>
__forceinline float FracAdjust(float val)
{
return val;
return val;
}
template <typename T, int N>
__forceinline void ReadIndirect(const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
DataReader dst(g_vertex_manager_write_ptr, nullptr);
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i != N; ++i)
{
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
}
for (int i = 0; i != N; ++i)
{
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
}
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_NORM();
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_NORM();
}
template <typename T, int N>
struct Normal_Direct
{
static void function(VertexLoader* loader)
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
static void function(VertexLoader* loader)
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
static const int size = sizeof(T) * N * 3;
static const int size = sizeof(T) * N * 3;
};
template <typename I, typename T, int N, int Offset>
__forceinline void Normal_Index_Offset()
{
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_NORMAL]
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] +
(index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}
template <typename I, typename T, int N>
struct Normal_Index
{
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, N, 0>();
}
static const int size = sizeof(I);
static void function(VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(); }
static const int size = sizeof(I);
};
template <typename I, typename T>
struct Normal_Index_Indices3
{
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
}
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
}
static const int size = sizeof(I) * 3;
static const int size = sizeof(I) * 3;
};
}
void VertexLoader_Normal::Init()
{
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
// Same as above
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
// Same as above
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
// Same as above for NRM_NBT
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
// Same as above for NRM_NBT
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
// Same as above for NRM_NBT
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
// Same as above for NRM_NBT
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
}
unsigned int VertexLoader_Normal::GetSize(u64 _type,
unsigned int _format, unsigned int _elements, unsigned int _index3)
unsigned int VertexLoader_Normal::GetSize(u64 _type, unsigned int _format, unsigned int _elements,
unsigned int _index3)
{
return m_Table[_type][_index3][_elements][_format].gc_size;
return m_Table[_type][_index3][_elements][_format].gc_size;
}
TPipelineFunction VertexLoader_Normal::GetFunction(u64 _type,
unsigned int _format, unsigned int _elements, unsigned int _index3)
TPipelineFunction VertexLoader_Normal::GetFunction(u64 _type, unsigned int _format,
unsigned int _elements, unsigned int _index3)
{
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc;
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc;
}

View file

@ -9,64 +9,63 @@
class VertexLoader_Normal
{
public:
// Init
static void Init();
// Init
static void Init();
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format, unsigned int _elements,
unsigned int _index3);
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format,
unsigned int _elements, unsigned int _index3);
// GetFunction
static TPipelineFunction GetFunction(u64 _type,
unsigned int _format, unsigned int _elements, unsigned int _index3);
// GetFunction
static TPipelineFunction GetFunction(u64 _type, unsigned int _format, unsigned int _elements,
unsigned int _index3);
private:
enum ENormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};
enum ENormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};
enum ENormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};
enum ENormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};
enum ENormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};
enum ENormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};
enum ENormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};
enum ENormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};
struct Set
{
template <typename T>
void operator=(const T&)
{
gc_size = T::size;
function = T::function;
}
struct Set
{
template <typename T>
void operator=(const T&)
{
gc_size = T::size;
function = T::function;
}
int gc_size;
TPipelineFunction function;
};
int gc_size;
TPipelineFunction function;
};
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
};

View file

@ -9,120 +9,219 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VideoCommon.h"
template <typename T>
float PosScale(T val, float scale)
{
return val * scale;
return val * scale;
}
template <>
float PosScale(float val, float scale)
{
return val;
return val;
}
template <typename T, int N>
void Pos_ReadDirect(VertexLoader* loader)
{
static_assert(N <= 3, "N > 3 is not sane!");
auto const scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
static_assert(N <= 3, "N > 3 is not sane!");
auto const scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
for (int i = 0; i < N; ++i)
{
float value = PosScale(src.Read<T>(), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
dst.Write(value);
}
for (int i = 0; i < N; ++i)
{
float value = PosScale(src.Read<T>(), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
dst.Write(value);
}
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
LOG_VTX();
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
LOG_VTX();
}
template <typename I, typename T, int N>
void Pos_ReadIndex(VertexLoader* loader)
{
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!");
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!");
auto const index = DataRead<I>();
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
auto const data = reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
auto const index = DataRead<I>();
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
auto const data =
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] +
(index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i < N; ++i)
{
float value = PosScale(Common::FromBigEndian(data[i]), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
dst.Write(value);
}
for (int i = 0; i < N; ++i)
{
float value = PosScale(Common::FromBigEndian(data[i]), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
dst.Write(value);
}
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_VTX();
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_VTX();
}
static TPipelineFunction tableReadPosition[4][8][2] = {
{
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
},
{
{Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
{Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
{Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
{Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
{Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
},
{
{Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
{Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
{Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
{Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
{Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
},
{
{Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
{Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
{Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
{Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
{Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
},
{
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
},
{
{
Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,
},
{
Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,
},
{
Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,
},
{
Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,
},
{
Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,
},
},
{
{
Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,
},
{
Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,
},
{
Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,
},
{
Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,
},
{
Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,
},
},
{
{
Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,
},
{
Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,
},
{
Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,
},
{
Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,
},
{
Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,
},
},
};
static int tableReadPositionVertexSize[4][8][2] = {
{
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
},
{
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
{
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
},
{
{
2, 3,
},
{
2, 3,
},
{
4, 6,
},
{
4, 6,
},
{
8, 12,
},
},
{
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
},
{
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
},
};
unsigned int VertexLoader_Position::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
{
return tableReadPositionVertexSize[_type][_format][_elements];
return tableReadPositionVertexSize[_type][_format][_elements];
}
TPipelineFunction VertexLoader_Position::GetFunction(u64 _type, unsigned int _format, unsigned int _elements)
TPipelineFunction VertexLoader_Position::GetFunction(u64 _type, unsigned int _format,
unsigned int _elements)
{
return tableReadPosition[_type][_format][_elements];
return tableReadPosition[_type][_format][_elements];
}

View file

@ -9,10 +9,9 @@
class VertexLoader_Position
{
public:
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format, unsigned int _elements);
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format, unsigned int _elements);
// GetFunction
static TPipelineFunction GetFunction(u64 _type, unsigned int _format, unsigned int _elements);
// GetFunction
static TPipelineFunction GetFunction(u64 _type, unsigned int _format, unsigned int _elements);
};

View file

@ -8,9 +8,9 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
template <int N>
void LOG_TEX();
@ -18,127 +18,227 @@ void LOG_TEX();
template <>
void LOG_TEX<1>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)g_vertex_manager_write_ptr)[-1]);
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)g_vertex_manager_write_ptr)[-1]);
}
template <>
void LOG_TEX<2>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2],
// ((float*)g_vertex_manager_write_ptr)[-1]);
}
static void TexCoord_Read_Dummy(VertexLoader* loader)
{
loader->m_tcIndex++;
loader->m_tcIndex++;
}
template <typename T>
float TCScale(T val, float scale)
{
return val * scale;
return val * scale;
}
template <>
float TCScale(float val, float scale)
{
return val;
return val;
}
template <typename T, int N>
void TexCoord_ReadDirect(VertexLoader* loader)
{
auto const scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
auto const scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
for (int i = 0; i != N; ++i)
dst.Write(TCScale(src.Read<T>(), scale));
for (int i = 0; i != N; ++i)
dst.Write(TCScale(src.Read<T>(), scale));
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
LOG_TEX<N>();
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
LOG_TEX<N>();
++loader->m_tcIndex;
++loader->m_tcIndex;
}
template <typename I, typename T, int N>
void TexCoord_ReadIndex(VertexLoader* loader)
{
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex]
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
auto const scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
auto const scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i != N; ++i)
dst.Write(TCScale(Common::FromBigEndian(data[i]), scale));
for (int i = 0; i != N; ++i)
dst.Write(TCScale(Common::FromBigEndian(data[i]), scale));
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_TEX<N>();
++loader->m_tcIndex;
g_vertex_manager_write_ptr = dst.GetPointer();
LOG_TEX<N>();
++loader->m_tcIndex;
}
static TPipelineFunction tableReadTexCoord[4][8][2] = {
{
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
{nullptr, nullptr,},
},
{
{TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>,},
{TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>,},
{TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>,},
{TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>,},
{TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>,},
},
{
{TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>,},
{TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>,},
{TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>,},
{TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>,},
{TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>,},
},
{
{TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>,},
{TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>,},
{TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>,},
{TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>,},
{TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>,},
},
{
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
{
nullptr, nullptr,
},
},
{
{
TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>,
},
{
TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>,
},
{
TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>,
},
{
TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>,
},
{
TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>,
},
},
{
{
TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>,
},
{
TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>,
},
{
TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>,
},
{
TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>,
},
{
TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>,
},
},
{
{
TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>,
},
{
TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>,
},
{
TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>,
},
{
TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>,
},
{
TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>,
},
},
};
static int tableReadTexCoordVertexSize[4][8][2] = {
{
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,},
},
{
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
{
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
{
0, 0,
},
},
{
{
1, 2,
},
{
1, 2,
},
{
2, 4,
},
{
2, 4,
},
{
4, 8,
},
},
{
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
{
1, 1,
},
},
{
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
{
2, 2,
},
},
};
unsigned int VertexLoader_TextCoord::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
unsigned int VertexLoader_TextCoord::GetSize(u64 _type, unsigned int _format,
unsigned int _elements)
{
return tableReadTexCoordVertexSize[_type][_format][_elements];
return tableReadTexCoordVertexSize[_type][_format][_elements];
}
TPipelineFunction VertexLoader_TextCoord::GetFunction(u64 _type, unsigned int _format, unsigned int _elements)
TPipelineFunction VertexLoader_TextCoord::GetFunction(u64 _type, unsigned int _format,
unsigned int _elements)
{
return tableReadTexCoord[_type][_format][_elements];
return tableReadTexCoord[_type][_format][_elements];
}
TPipelineFunction VertexLoader_TextCoord::GetDummyFunction()
{
return TexCoord_Read_Dummy;
return TexCoord_Read_Dummy;
}

View file

@ -9,13 +9,13 @@
class VertexLoader_TextCoord
{
public:
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format, unsigned int _elements);
// GetSize
static unsigned int GetSize(u64 _type, unsigned int _format, unsigned int _elements);
// GetFunction
static TPipelineFunction GetFunction(u64 _type, unsigned int _format, unsigned int _elements);
// GetFunction
static TPipelineFunction GetFunction(u64 _type, unsigned int _format, unsigned int _elements);
// GetDummyFunction
// It is important to synchronize tcIndex.
static TPipelineFunction GetDummyFunction();
// GetDummyFunction
// It is important to synchronize tcIndex.
static TPipelineFunction GetDummyFunction();
};

View file

@ -40,20 +40,20 @@ bool VertexManagerBase::s_is_flushed;
bool VertexManagerBase::s_cull_all;
static const PrimitiveType primitive_from_gx[8] = {
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS_2
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLES
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_STRIP
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_FAN
PRIMITIVE_LINES, // GX_DRAW_LINES
PRIMITIVE_LINES, // GX_DRAW_LINE_STRIP
PRIMITIVE_POINTS, // GX_DRAW_POINTS
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS_2
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLES
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_STRIP
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_FAN
PRIMITIVE_LINES, // GX_DRAW_LINES
PRIMITIVE_LINES, // GX_DRAW_LINE_STRIP
PRIMITIVE_POINTS, // GX_DRAW_POINTS
};
VertexManagerBase::VertexManagerBase()
{
s_is_flushed = true;
s_cull_all = false;
s_is_flushed = true;
s_cull_all = false;
}
VertexManagerBase::~VertexManagerBase()
@ -62,280 +62,294 @@ VertexManagerBase::~VertexManagerBase()
u32 VertexManagerBase::GetRemainingSize()
{
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall)
DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride,
bool cullall)
{
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
// We can't merge different kinds of primitives, so we have to flush here
if (current_primitive_type != primitive_from_gx[primitive])
Flush();
current_primitive_type = primitive_from_gx[primitive];
// We can't merge different kinds of primitives, so we have to flush here
if (current_primitive_type != primitive_from_gx[primitive])
Flush();
current_primitive_type = primitive_from_gx[primitive];
// Check for size in buffer, if the buffer gets full, call Flush()
if (!s_is_flushed && ( count > IndexGenerator::GetRemainingIndices() ||
count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize()))
{
Flush();
// Check for size in buffer, if the buffer gets full, call Flush()
if (!s_is_flushed &&
(count > IndexGenerator::GetRemainingIndices() || count > GetRemainingIndices(primitive) ||
needed_vertex_bytes > GetRemainingSize()))
{
Flush();
if (count > IndexGenerator::GetRemainingIndices())
ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.");
if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
}
if (count > IndexGenerator::GetRemainingIndices())
ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.");
if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
}
s_cull_all = cullall;
s_cull_all = cullall;
// need to alloc new buffer
if (s_is_flushed)
{
g_vertex_manager->ResetBuffer(stride);
s_is_flushed = false;
}
// need to alloc new buffer
if (s_is_flushed)
{
g_vertex_manager->ResetBuffer(stride);
s_is_flushed = false;
}
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
}
void VertexManagerBase::FlushData(u32 count, u32 stride)
{
s_pCurBufferPointer += count * stride;
s_pCurBufferPointer += count * stride;
}
u32 VertexManagerBase::GetRemainingIndices(int primitive)
{
u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen();
u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen();
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 5 * 4;
case GX_DRAW_TRIANGLES:
return index_len / 4 * 3;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 5 * 4;
case GX_DRAW_TRIANGLES:
return index_len / 4 * 3;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_POINTS:
return index_len;
case GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
}
else
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 6 * 4;
case GX_DRAW_TRIANGLES:
return index_len;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;
default:
return 0;
}
}
else
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 6 * 4;
case GX_DRAW_TRIANGLES:
return index_len;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_POINTS:
return index_len;
case GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
}
default:
return 0;
}
}
}
void VertexManagerBase::Flush()
{
if (s_is_flushed)
return;
if (s_is_flushed)
return;
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens,
xfmem.numChan.numColorChans, xfmem.dualTexTrans.enabled, bpmem.ztex2.op,
(int)bpmem.blendmode.colorupdate, (int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d",
g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
xfmem.dualTexTrans.enabled, bpmem.ztex2.op, (int)bpmem.blendmode.colorupdate,
(int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
{
LitChannel* ch = &xfmem.color[i];
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
ch = &xfmem.alpha[i];
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
}
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
{
LitChannel* ch = &xfmem.color[i];
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
ch = &xfmem.alpha[i];
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
}
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) tinfo.hex &= 0x7ff;
if (tinfo.texgentype != XF_TEXGEN_REGULAR) tinfo.projection = 0;
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
tinfo.hex &= 0x7ff;
if (tinfo.texgentype != XF_TEXGEN_REGULAR)
tinfo.projection = 0;
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, postmtx=%d, postnorm=%d",
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow, tinfo.embosssourceshift, tinfo.embosslightshift,
xfmem.postMtxInfo[i].index, xfmem.postMtxInfo[i].normalize);
}
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, "
"postmtx=%d, postnorm=%d",
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow,
tinfo.embosssourceshift, tinfo.embosslightshift, xfmem.postMtxInfo[i].index,
xfmem.postMtxInfo[i].normalize);
}
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x", (int)bpmem.genMode.numtevstages+1, (int)bpmem.genMode.numindstages,
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x",
(int)bpmem.genMode.numtevstages + 1, (int)bpmem.genMode.numindstages,
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable,
(bpmem.alpha_test.hex >> 16) & 0xff);
#endif
// If the primitave is marked CullAll. All we need to do is update the vertex constants and calculate the zfreeze refrence slope
if (!s_cull_all)
{
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
// If the primitave is marked CullAll. All we need to do is update the vertex constants and
// calculate the zfreeze refrence slope
if (!s_cull_all)
{
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
TextureCacheBase::UnbindTextures();
for (unsigned int i : usedtextures)
{
const TextureCacheBase::TCacheEntryBase* tentry = TextureCacheBase::Load(i);
TextureCacheBase::UnbindTextures();
for (unsigned int i : usedtextures)
{
const TextureCacheBase::TCacheEntryBase* tentry = TextureCacheBase::Load(i);
if (tentry)
{
g_renderer->SetSamplerState(i & 3, i >> 2, tentry->is_custom_tex);
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
}
else
{
ERROR_LOG(VIDEO, "error loading texture");
}
}
g_texture_cache->BindTextures();
}
if (tentry)
{
g_renderer->SetSamplerState(i & 3, i >> 2, tentry->is_custom_tex);
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
}
else
{
ERROR_LOG(VIDEO, "error loading texture");
}
}
g_texture_cache->BindTextures();
}
// set global vertex constants
VertexShaderManager::SetConstants();
// set global vertex constants
VertexShaderManager::SetConstants();
// Calculate ZSlope for zfreeze
if (!bpmem.genMode.zfreeze)
{
// Must be done after VertexShaderManager::SetConstants()
CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
}
else if (s_zslope.dirty && !s_cull_all) // or apply any dirty ZSlopes
{
PixelShaderManager::SetZSlope(s_zslope.dfdx, s_zslope.dfdy, s_zslope.f0);
s_zslope.dirty = false;
}
// Calculate ZSlope for zfreeze
if (!bpmem.genMode.zfreeze)
{
// Must be done after VertexShaderManager::SetConstants()
CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
}
else if (s_zslope.dirty && !s_cull_all) // or apply any dirty ZSlopes
{
PixelShaderManager::SetZSlope(s_zslope.dfdx, s_zslope.dfdy, s_zslope.f0);
s_zslope.dirty = false;
}
if (!s_cull_all)
{
// set the rest of the global constants
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
if (!s_cull_all)
{
// set the rest of the global constants
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
bool useDstAlpha = bpmem.dstalpha.enable &&
bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (PerfQueryBase::ShouldEmulate())
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
g_vertex_manager->vFlush(useDstAlpha);
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
}
if (PerfQueryBase::ShouldEmulate())
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
g_vertex_manager->vFlush(useDstAlpha);
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
ERROR_LOG(VIDEO,
"xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.",
xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
s_is_flushed = true;
s_cull_all = false;
s_is_flushed = true;
s_cull_all = false;
}
void VertexManagerBase::DoState(PointerWrap& p)
{
p.Do(s_zslope);
g_vertex_manager->vDoState(p);
p.Do(s_zslope);
g_vertex_manager->vDoState(p);
}
void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
{
float out[12];
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
float out[12];
float viewOffset[2] = {xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
if (current_primitive_type != PRIMITIVE_TRIANGLES)
return;
if (current_primitive_type != PRIMITIVE_TRIANGLES)
return;
// Global matrix ID.
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
// Global matrix ID.
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
// Make sure the buffer contains at least 3 vertices.
if ((s_pCurBufferPointer - s_pBaseBufferPointer) < (vert_decl.stride * 3))
return;
// Make sure the buffer contains at least 3 vertices.
if ((s_pCurBufferPointer - s_pBaseBufferPointer) < (vert_decl.stride * 3))
return;
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if z-freeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
// If this vertex format has per-vertex position matrix IDs, look it up.
if (vert_decl.posmtx.enable)
mtxIdx = VertexLoaderManager::position_matrix_index[2 - i];
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if z-freeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
// If this vertex format has per-vertex position matrix IDs, look it up.
if (vert_decl.posmtx.enable)
mtxIdx = VertexLoaderManager::position_matrix_index[2 - i];
if (vert_decl.position.components == 2)
VertexLoaderManager::position_cache[2 - i][2] = 0;
if (vert_decl.position.components == 2)
VertexLoaderManager::position_cache[2 - i][2] = 0;
VertexShaderManager::TransformToClipSpace(&VertexLoaderManager::position_cache[2 - i][0], &out[i * 4], mtxIdx);
VertexShaderManager::TransformToClipSpace(&VertexLoaderManager::position_cache[2 - i][0],
&out[i * 4], mtxIdx);
// Transform to Screenspace
float inv_w = 1.0f / out[3 + i * 4];
// Transform to Screenspace
float inv_w = 1.0f / out[3 + i * 4];
out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
// Sometimes we process de-generate triangles. Stop any divide by zeros
if (c == 0)
return;
// Sometimes we process de-generate triangles. Stop any divide by zeros
if (c == 0)
return;
s_zslope.dfdx = -a / c;
s_zslope.dfdy = -b / c;
s_zslope.f0 = out[2] - (out[0] * s_zslope.dfdx + out[1] * s_zslope.dfdy);
s_zslope.dirty = true;
s_zslope.dfdx = -a / c;
s_zslope.dfdy = -b / c;
s_zslope.f0 = out[2] - (out[0] * s_zslope.dfdx + out[1] * s_zslope.dfdy);
s_zslope.dirty = true;
}

View file

@ -15,73 +15,76 @@ class NativeVertexFormat;
class PointerWrap;
struct PortableVertexDeclaration;
enum PrimitiveType {
PRIMITIVE_POINTS,
PRIMITIVE_LINES,
PRIMITIVE_TRIANGLES,
enum PrimitiveType
{
PRIMITIVE_POINTS,
PRIMITIVE_LINES,
PRIMITIVE_TRIANGLES,
};
struct Slope
{
float dfdx;
float dfdy;
float f0;
bool dirty;
float dfdx;
float dfdy;
float f0;
bool dirty;
};
class VertexManagerBase
{
private:
static const u32 SMALLEST_POSSIBLE_VERTEX = sizeof(float)*3; // 3 pos
static const u32 LARGEST_POSSIBLE_VERTEX = sizeof(float)*45 + sizeof(u32)*2; // 3 pos, 3*3 normal, 2*u32 color, 8*4 tex, 1 posMat
static const u32 SMALLEST_POSSIBLE_VERTEX = sizeof(float) * 3; // 3 pos
static const u32 LARGEST_POSSIBLE_VERTEX =
sizeof(float) * 45 + sizeof(u32) * 2; // 3 pos, 3*3 normal, 2*u32 color, 8*4 tex, 1 posMat
static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
public:
static const u32 MAXVBUFFERSIZE = ROUND_UP_POW2(MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX);
static const u32 MAXVBUFFERSIZE =
ROUND_UP_POW2(MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX);
// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
static const u32 MAXIBUFFERSIZE = ROUND_UP_POW2(MAX_PRIMITIVES_PER_COMMAND * 3);
// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
static const u32 MAXIBUFFERSIZE = ROUND_UP_POW2(MAX_PRIMITIVES_PER_COMMAND * 3);
VertexManagerBase();
// needs to be virtual for DX11's dtor
virtual ~VertexManagerBase();
VertexManagerBase();
// needs to be virtual for DX11's dtor
virtual ~VertexManagerBase();
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
static void FlushData(u32 count, u32 stride);
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
static void FlushData(u32 count, u32 stride);
static void Flush();
static void Flush();
virtual NativeVertexFormat* CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0;
virtual NativeVertexFormat*
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0;
static void DoState(PointerWrap& p);
static void DoState(PointerWrap& p);
protected:
virtual void vDoState(PointerWrap& p) { }
virtual void vDoState(PointerWrap& p) {}
static PrimitiveType current_primitive_type;
static PrimitiveType current_primitive_type;
virtual void ResetBuffer(u32 stride) = 0;
virtual void ResetBuffer(u32 stride) = 0;
static u8* s_pCurBufferPointer;
static u8* s_pBaseBufferPointer;
static u8* s_pEndBufferPointer;
static u8* s_pCurBufferPointer;
static u8* s_pBaseBufferPointer;
static u8* s_pEndBufferPointer;
static u32 GetRemainingSize();
static u32 GetRemainingIndices(int primitive);
static u32 GetRemainingSize();
static u32 GetRemainingIndices(int primitive);
static Slope s_zslope;
static void CalculateZSlope(NativeVertexFormat* format);
static Slope s_zslope;
static void CalculateZSlope(NativeVertexFormat* format);
static bool s_cull_all;
static bool s_cull_all;
private:
static bool s_is_flushed;
static bool s_is_flushed;
virtual void vFlush(bool useDstAlpha) = 0;
virtual void vFlush(bool useDstAlpha) = 0;
virtual void CreateDeviceObjects() {}
virtual void DestroyDeviceObjects() {}
virtual void CreateDeviceObjects() {}
virtual void DestroyDeviceObjects() {}
};
extern std::unique_ptr<VertexManagerBase> g_vertex_manager;

View file

@ -14,383 +14,417 @@
#include "VideoCommon/VertexShaderGen.h"
#include "VideoCommon/VideoConfig.h"
template<class T>
template <class T>
static T GenerateVertexShader(API_TYPE api_type)
{
T out;
const u32 components = VertexLoaderManager::g_current_components;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data;
vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
if (uid_data != nullptr)
memset(uid_data, 0, sizeof(*uid_data));
else
uid_data = &dummy_data;
T out;
const u32 components = VertexLoaderManager::g_current_components;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data;
vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
if (uid_data != nullptr)
memset(uid_data, 0, sizeof(*uid_data));
else
uid_data = &dummy_data;
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
out.Write("%s", s_lighting_struct);
out.Write("%s", s_lighting_struct);
// uniforms
if (api_type == API_OPENGL)
out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
else
out.Write("cbuffer VSBlock {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
// uniforms
if (api_type == API_OPENGL)
out.Write("layout(std140%s) uniform VSBlock {\n",
g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : "");
else
out.Write("cbuffer VSBlock {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, api_type, "");
out.Write("};\n");
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, api_type, "");
out.Write("};\n");
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->components = components;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->components = components;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
if (api_type == API_OPENGL)
{
out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
if (components & VB_HAS_POSMTXIDX)
out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
if (components & VB_HAS_NRM0)
out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
if (components & VB_HAS_NRM1)
out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2)
out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (api_type == API_OPENGL)
{
out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
if (components & VB_HAS_POSMTXIDX)
out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
if (components & VB_HAS_NRM0)
out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
if (components & VB_HAS_NRM1)
out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2)
out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (components & VB_HAS_COL0)
out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
if (components & VB_HAS_COL1)
out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
if (components & VB_HAS_COL0)
out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
if (components & VB_HAS_COL1)
out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
}
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i,
SHADER_TEXTURE0_ATTRIB + i);
}
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("out VertexData {\n");
GenerateVSOutputMembers<T>(out, api_type, GetInterpolationQualifier(true, false));
out.Write("} vs;\n");
}
else
{
// Let's set up attributes
for (u32 i = 0; i < 8; ++i)
{
if (i < xfmem.numTexGen.numTexGens)
{
out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i);
}
}
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier());
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("%s out float3 Normal;\n", GetInterpolationQualifier());
out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier());
}
out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier());
out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier());
}
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("out VertexData {\n");
GenerateVSOutputMembers<T>(out, api_type, GetInterpolationQualifier(true, false));
out.Write("} vs;\n");
}
else
{
// Let's set up attributes
for (u32 i = 0; i < 8; ++i)
{
if (i < xfmem.numTexGen.numTexGens)
{
out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i);
}
}
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier());
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("%s out float3 Normal;\n", GetInterpolationQualifier());
out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier());
}
out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier());
out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier());
}
out.Write("void main()\n{\n");
}
else // D3D
{
out.Write("VS_OUTPUT main(\n");
out.Write("void main()\n{\n");
}
else // D3D
{
out.Write("VS_OUTPUT main(\n");
// inputs
if (components & VB_HAS_NRM0)
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if (components & VB_HAS_NRM1)
out.Write(" float3 rawnorm1 : NORMAL1,\n");
if (components & VB_HAS_NRM2)
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if (components & VB_HAS_COL0)
out.Write(" float4 color0 : COLOR0,\n");
if (components & VB_HAS_COL1)
out.Write(" float4 color1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
}
if (components & VB_HAS_POSMTXIDX)
out.Write(" int posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {\n");
}
// inputs
if (components & VB_HAS_NRM0)
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if (components & VB_HAS_NRM1)
out.Write(" float3 rawnorm1 : NORMAL1,\n");
if (components & VB_HAS_NRM2)
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if (components & VB_HAS_COL0)
out.Write(" float4 color0 : COLOR0,\n");
if (components & VB_HAS_COL1)
out.Write(" float4 color1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i));
if ((components & (VB_HAS_UV0 << i)) || hastexmtx)
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
}
if (components & VB_HAS_POSMTXIDX)
out.Write(" int posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {\n");
}
out.Write("VS_OUTPUT o;\n");
out.Write("VS_OUTPUT o;\n");
// transforms
if (components & VB_HAS_POSMTXIDX)
{
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");
// transforms
if (components & VB_HAS_POSMTXIDX)
{
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES
"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL)
{
out.Write("int normidx = posmtx & 31;\n");
out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
}
if (components & VB_HAS_NRMALL)
{
out.Write("int normidx = posmtx & 31;\n");
out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
}
if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
if (components & VB_HAS_NRM2)
out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
}
else
{
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0);\n");
if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n");
if (components & VB_HAS_NRM2)
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n");
}
if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n");
if (components & VB_HAS_NRM1)
out.Write(
"float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
if (components & VB_HAS_NRM2)
out.Write(
"float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
}
else
{
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
if (components & VB_HAS_NRM2)
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
}
if (!(components & VB_HAS_NRM0))
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
if (!(components & VB_HAS_NRM0))
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
out.Write("int4 lacc;\n"
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
out.Write("int4 lacc;\n"
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
uid_data->numColorChans = xfmem.numChan.numColorChans;
if (xfmem.numChan.numColorChans == 0)
{
if (components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
else
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
uid_data->numColorChans = xfmem.numChan.numColorChans;
if (xfmem.numChan.numColorChans == 0)
{
if (components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
else
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_");
GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_");
if (xfmem.numChan.numColorChans < 2)
{
if (components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
else
out.Write("o.colors_1 = o.colors_0;\n");
}
if (xfmem.numChan.numColorChans < 2)
{
if (components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
else
out.Write("o.colors_1 = o.colors_0;\n");
}
// transform texcoords
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo& texinfo = xfmem.texMtxInfo[i];
// transform texcoords
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo& texinfo = xfmem.texMtxInfo[i];
out.Write("{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow;
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0)
{
out.Write("coord.xyz = rawnorm0.xyz;\n");
}
break;
case XF_SRCCOLORS_INROW:
_assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 ||
texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
break;
case XF_SRCBINORMAL_T_INROW:
if (components & VB_HAS_NRM1)
{
out.Write("coord.xyz = rawnorm1.xyz;\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2)
{
out.Write("coord.xyz = rawnorm2.xyz;\n");
}
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
// Input form of AB11 sets z element to 1.0
uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform;
if (texinfo.inputform == XF_TEXINPUT_AB11)
out.Write("coord.z = 1.0;\n");
out.Write("{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow;
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0)
{
out.Write("coord.xyz = rawnorm0.xyz;\n");
}
break;
case XF_SRCCOLORS_INROW:
_assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
break;
case XF_SRCBINORMAL_T_INROW:
if (components & VB_HAS_NRM1)
{
out.Write("coord.xyz = rawnorm1.xyz;\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2)
{
out.Write("coord.xyz = rawnorm2.xyz;\n");
}
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
// Input form of AB11 sets z element to 1.0
uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform;
if (texinfo.inputform == XF_TEXINPUT_AB11)
out.Write("coord.z = 1.0;\n");
// first transformation
uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
// first transformation
uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
if (components & (VB_HAS_NRM1 | VB_HAS_NRM2))
{
// transform the light dir into tangent space
uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
texinfo.embosssourceshift);
}
else
{
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
//_assert_(0); // should have normals
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
}
if (components & (VB_HAS_NRM1|VB_HAS_NRM2))
{
// transform the light dir into tangent space
uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift);
}
else
{
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue Squadron 2
//_assert_(0); // should have normals
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (components & (VB_HAS_TEXMTXIDX0 << i))
{
out.Write("int tmp = int(tex%d.z);\n", i);
if (texinfo.projection == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
i);
else
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
i);
}
else
{
if (texinfo.projection == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
"[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES
"[%d]));\n",
i, 3 * i, 3 * i + 1, 3 * i + 2);
else
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
"[%d]), dot(coord, " I_TEXMATRICES "[%d]), 1);\n",
i, 3 * i, 3 * i + 1);
}
break;
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (components & (VB_HAS_TEXMTXIDX0<<i))
{
out.Write("int tmp = int(tex%d.z);\n", i);
if (texinfo.projection == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
else
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
}
else
{
if (texinfo.projection == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2);
else
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1);
}
break;
}
uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{
const PostMtxInfo& postInfo = xfmem.postMtxInfo[i];
uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{
const PostMtxInfo& postInfo = xfmem.postMtxInfo[i];
uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index;
int postidx = postInfo.index;
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n",
postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);
uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index;
int postidx = postInfo.index;
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f);
uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize;
if (postInfo.normalize)
out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize;
if (postInfo.normalize)
out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
// multiply by postmatrix
out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + "
"P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n",
i, i, i, i);
}
// multiply by postmatrix
out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
}
out.Write("}\n");
}
out.Write("}\n");
}
// clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n");
// clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n");
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("o.Normal = _norm0;\n");
out.Write("o.WorldPos = pos.xyz;\n");
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("o.Normal = _norm0;\n");
out.Write("o.WorldPos = pos.xyz;\n");
if (components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
if (components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
if (components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
}
if (components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
}
// write the true depth value, if the game uses depth textures pixel shaders will override with
// the correct values
// if not early z culling will improve speed
if (g_ActiveConfig.backend_info.bSupportsClipControl)
{
out.Write("o.pos.z = -o.pos.z;\n");
}
else // OGL
{
// this results in a scale from -1..0 to -1..1 after perspective
// divide
out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
//if not early z culling will improve speed
if (g_ActiveConfig.backend_info.bSupportsClipControl)
{
out.Write("o.pos.z = -o.pos.z;\n");
}
else // OGL
{
// this results in a scale from -1..0 to -1..1 after perspective
// divide
out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");
// the next steps of the OGL pipeline are:
// (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology
// clipping to -w_c <= (x_c,y_c,z_c) <= w_c
// (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
// z_w = (f-n)/2*z_d + (n+f)/2
// z_w now contains the value to go to the 0..1 depth buffer
// the next steps of the OGL pipeline are:
// (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology
// clipping to -w_c <= (x_c,y_c,z_c) <= w_c
// (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
// z_w = (f-n)/2*z_d + (n+f)/2
// z_w now contains the value to go to the 0..1 depth buffer
// trying to get the correct semantic while not using glDepthRange
// seems to get rather complicated
}
//trying to get the correct semantic while not using glDepthRange
//seems to get rather complicated
}
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
// in some primitives being placed one pixel too far to the bottom-right,
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
// in some primitives being placed one pixel too far to the bottom-right,
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION".xy;\n");
if (api_type == API_OPENGL)
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
AssignVSOutputMembers(out, "vs", "o");
}
else
{
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
out.Write("uv%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n");
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("Normal = o.Normal;\n");
out.Write("WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n");
out.Write("colors_1 = o.colors_1;\n");
}
if (api_type == API_OPENGL)
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
AssignVSOutputMembers(out, "vs", "o");
}
else
{
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
out.Write("uv%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n");
if (g_ActiveConfig.bEnablePixelLighting)
{
out.Write("Normal = o.Normal;\n");
out.Write("WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n");
out.Write("colors_1 = o.colors_1;\n");
}
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.Write("return o;\n");
}
out.Write("}\n");
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.Write("return o;\n");
}
out.Write("}\n");
return out;
return out;
}
VertexShaderUid GetVertexShaderUid(API_TYPE api_type)
{
return GenerateVertexShader<VertexShaderUid>(api_type);
return GenerateVertexShader<VertexShaderUid>(api_type);
}
ShaderCode GenerateVertexShaderCode(API_TYPE api_type)
{
return GenerateVertexShader<ShaderCode>(api_type);
return GenerateVertexShader<ShaderCode>(api_type);
}

View file

@ -10,52 +10,54 @@
#include "VideoCommon/VideoCommon.h"
// TODO should be reordered
#define SHADER_POSITION_ATTRIB 0
#define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM0_ATTRIB 2
#define SHADER_NORM1_ATTRIB 3
#define SHADER_NORM2_ATTRIB 4
#define SHADER_COLOR0_ATTRIB 5
#define SHADER_COLOR1_ATTRIB 6
#define SHADER_POSITION_ATTRIB 0
#define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM0_ATTRIB 2
#define SHADER_NORM1_ATTRIB 3
#define SHADER_NORM2_ATTRIB 4
#define SHADER_COLOR0_ATTRIB 5
#define SHADER_COLOR1_ATTRIB 6
#define SHADER_TEXTURE0_ATTRIB 8
#define SHADER_TEXTURE1_ATTRIB 9
#define SHADER_TEXTURE2_ATTRIB 10
#define SHADER_TEXTURE3_ATTRIB 11
#define SHADER_TEXTURE4_ATTRIB 12
#define SHADER_TEXTURE5_ATTRIB 13
#define SHADER_TEXTURE6_ATTRIB 14
#define SHADER_TEXTURE7_ATTRIB 15
#define SHADER_TEXTURE0_ATTRIB 8
#define SHADER_TEXTURE1_ATTRIB 9
#define SHADER_TEXTURE2_ATTRIB 10
#define SHADER_TEXTURE3_ATTRIB 11
#define SHADER_TEXTURE4_ATTRIB 12
#define SHADER_TEXTURE5_ATTRIB 13
#define SHADER_TEXTURE6_ATTRIB 14
#define SHADER_TEXTURE7_ATTRIB 15
#pragma pack(1)
struct vertex_shader_uid_data
{
u32 NumValues() const { return sizeof(vertex_shader_uid_data); }
u32 NumValues() const { return sizeof(vertex_shader_uid_data); }
u32 components : 23;
u32 numTexGens : 4;
u32 numColorChans : 2;
u32 dualTexTrans_enabled : 1;
u32 pixel_lighting : 1;
u32 pad : 1;
u32 components : 23;
u32 numTexGens : 4;
u32 numColorChans : 2;
u32 dualTexTrans_enabled : 1;
u32 pixel_lighting : 1;
u32 pad : 1;
u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is
// 8 bits wide
struct
{
u32 inputform : 2;
u32 texgentype : 3;
u32 sourcerow : 5;
u32 embosssourceshift : 3;
u32 embosslightshift : 3;
} texMtxInfo[8];
u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is 8 bits wide
struct {
u32 inputform : 2;
u32 texgentype : 3;
u32 sourcerow : 5;
u32 embosssourceshift : 3;
u32 embosslightshift : 3;
} texMtxInfo[8];
struct
{
u32 index : 6;
u32 normalize : 1;
u32 pad : 1;
} postMtxInfo[8];
struct {
u32 index : 6;
u32 normalize : 1;
u32 pad : 1;
} postMtxInfo[8];
LightingUidData lighting;
LightingUidData lighting;
};
#pragma pack()

File diff suppressed because it is too large Load diff

View file

@ -17,31 +17,31 @@ void UpdateProjectionHack(int iParams[], std::string sParams[]);
class VertexShaderManager
{
public:
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap &p);
static void Init();
static void Dirty();
static void Shutdown();
static void DoState(PointerWrap& p);
// constant management
static void SetConstants();
// constant management
static void SetConstants();
static void InvalidateXFRange(int start, int end);
static void SetTexMatrixChangedA(u32 value);
static void SetTexMatrixChangedB(u32 value);
static void SetViewportChanged();
static void SetProjectionChanged();
static void SetMaterialColorChanged(int index);
static void InvalidateXFRange(int start, int end);
static void SetTexMatrixChangedA(u32 value);
static void SetTexMatrixChangedB(u32 value);
static void SetViewportChanged();
static void SetProjectionChanged();
static void SetMaterialColorChanged(int index);
static void TranslateView(float x, float y, float z = 0.0f);
static void RotateView(float x, float y);
static void ResetView();
static void TranslateView(float x, float y, float z = 0.0f);
static void RotateView(float x, float y);
static void ResetView();
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
// out: 4 floats which will be initialized with the corresponding clip space coordinates
// NOTE: g_fProjectionMatrix must be up to date when this is called
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
static void TransformToClipSpace(const float* data, float* out, u32 mtxIdx);
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
// out: 4 floats which will be initialized with the corresponding clip space coordinates
// NOTE: g_fProjectionMatrix must be up to date when this is called
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
static void TransformToClipSpace(const float* data, float* out, u32 mtxIdx);
static VertexShaderConstants constants;
static bool dirty;
static VertexShaderConstants constants;
static bool dirty;
};

View file

@ -34,49 +34,49 @@ __declspec(dllexport) DWORD NvOptimusEnablement = 1;
void VideoBackendBase::PopulateList()
{
// OGL > D3D11 > D3D12 > SW
g_available_video_backends.push_back(std::make_unique<OGL::VideoBackend>());
// OGL > D3D11 > D3D12 > SW
g_available_video_backends.push_back(std::make_unique<OGL::VideoBackend>());
#ifdef _WIN32
g_available_video_backends.push_back(std::make_unique<DX11::VideoBackend>());
g_available_video_backends.push_back(std::make_unique<DX11::VideoBackend>());
// More robust way to check for D3D12 support than (unreliable) OS version checks.
HMODULE d3d12_module = LoadLibraryA("d3d12.dll");
if (d3d12_module != nullptr)
{
FreeLibrary(d3d12_module);
g_available_video_backends.push_back(std::make_unique<DX12::VideoBackend>());
}
// More robust way to check for D3D12 support than (unreliable) OS version checks.
HMODULE d3d12_module = LoadLibraryA("d3d12.dll");
if (d3d12_module != nullptr)
{
FreeLibrary(d3d12_module);
g_available_video_backends.push_back(std::make_unique<DX12::VideoBackend>());
}
#endif
g_available_video_backends.push_back(std::make_unique<SW::VideoSoftware>());
g_available_video_backends.push_back(std::make_unique<SW::VideoSoftware>());
const auto iter = std::find_if(g_available_video_backends.begin(), g_available_video_backends.end(), [](const auto& backend) {
return backend != nullptr;
});
const auto iter =
std::find_if(g_available_video_backends.begin(), g_available_video_backends.end(),
[](const auto& backend) { return backend != nullptr; });
if (iter == g_available_video_backends.end())
return;
if (iter == g_available_video_backends.end())
return;
s_default_backend = iter->get();
g_video_backend = iter->get();
s_default_backend = iter->get();
g_video_backend = iter->get();
}
void VideoBackendBase::ClearList()
{
g_available_video_backends.clear();
g_available_video_backends.clear();
}
void VideoBackendBase::ActivateBackend(const std::string& name)
{
// If empty, set it to the default backend (expected behavior)
if (name.empty())
g_video_backend = s_default_backend;
// If empty, set it to the default backend (expected behavior)
if (name.empty())
g_video_backend = s_default_backend;
const auto iter = std::find_if(g_available_video_backends.begin(), g_available_video_backends.end(), [&name](const auto& backend) {
return name == backend->GetName();
});
const auto iter =
std::find_if(g_available_video_backends.begin(), g_available_video_backends.end(),
[&name](const auto& backend) { return name == backend->GetName(); });
if (iter == g_available_video_backends.end())
return;
if (iter == g_available_video_backends.end())
return;
g_video_backend = iter->get();
g_video_backend = iter->get();
}

View file

@ -11,93 +11,96 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/PerfQueryBase.h"
namespace MMIO { class Mapping; }
namespace MMIO
{
class Mapping;
}
class PointerWrap;
enum FieldType
{
FIELD_ODD = 0,
FIELD_EVEN = 1,
FIELD_ODD = 0,
FIELD_EVEN = 1,
};
enum EFBAccessType
{
PEEK_Z = 0,
POKE_Z,
PEEK_COLOR,
POKE_COLOR
PEEK_Z = 0,
POKE_Z,
PEEK_COLOR,
POKE_COLOR
};
struct SCPFifoStruct
{
// fifo registers
volatile u32 CPBase;
volatile u32 CPEnd;
u32 CPHiWatermark;
u32 CPLoWatermark;
volatile u32 CPReadWriteDistance;
volatile u32 CPWritePointer;
volatile u32 CPReadPointer;
volatile u32 CPBreakpoint;
volatile u32 SafeCPReadPointer;
// Super Monkey Ball Adventure require this.
// Because the read&check-PEToken-loop stays in its JITed block I suppose.
// So no possiblity to ack the Token irq by the scheduler until some sort of PPC watchdog do its mess.
volatile u16 PEToken;
// fifo registers
volatile u32 CPBase;
volatile u32 CPEnd;
u32 CPHiWatermark;
u32 CPLoWatermark;
volatile u32 CPReadWriteDistance;
volatile u32 CPWritePointer;
volatile u32 CPReadPointer;
volatile u32 CPBreakpoint;
volatile u32 SafeCPReadPointer;
// Super Monkey Ball Adventure require this.
// Because the read&check-PEToken-loop stays in its JITed block I suppose.
// So no possiblity to ack the Token irq by the scheduler until some sort of PPC watchdog do its
// mess.
volatile u16 PEToken;
volatile u32 bFF_GPLinkEnable;
volatile u32 bFF_GPReadEnable;
volatile u32 bFF_BPEnable;
volatile u32 bFF_BPInt;
volatile u32 bFF_Breakpoint;
volatile u32 bFF_GPLinkEnable;
volatile u32 bFF_GPReadEnable;
volatile u32 bFF_BPEnable;
volatile u32 bFF_BPInt;
volatile u32 bFF_Breakpoint;
volatile u32 bFF_LoWatermarkInt;
volatile u32 bFF_HiWatermarkInt;
volatile u32 bFF_LoWatermarkInt;
volatile u32 bFF_HiWatermarkInt;
volatile u32 bFF_LoWatermark;
volatile u32 bFF_HiWatermark;
volatile u32 bFF_LoWatermark;
volatile u32 bFF_HiWatermark;
};
class VideoBackendBase
{
public:
virtual ~VideoBackendBase() {}
virtual ~VideoBackendBase() {}
virtual unsigned int PeekMessages() = 0;
virtual unsigned int PeekMessages() = 0;
virtual bool Initialize(void* window_handle) = 0;
virtual void Shutdown() = 0;
virtual bool Initialize(void* window_handle) = 0;
virtual void Shutdown() = 0;
virtual std::string GetName() const = 0;
virtual std::string GetDisplayName() const { return GetName(); }
virtual void ShowConfig(void*) = 0;
virtual std::string GetName() const = 0;
virtual std::string GetDisplayName() const { return GetName(); }
virtual void Video_Prepare() = 0;
void Video_ExitLoop();
virtual void Video_Cleanup() = 0; // called from gl/d3d thread
virtual void ShowConfig(void*) = 0;
void Video_BeginField(u32, u32, u32, u32);
void Video_EndField();
virtual void Video_Prepare() = 0;
void Video_ExitLoop();
virtual void Video_Cleanup() = 0; // called from gl/d3d thread
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
u32 Video_GetQueryResult(PerfQueryType type);
u16 Video_GetBoundingBox(int index);
void Video_BeginField(u32, u32, u32, u32);
void Video_EndField();
static void PopulateList();
static void ClearList();
static void ActivateBackend(const std::string& name);
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
u32 Video_GetQueryResult(PerfQueryType type);
u16 Video_GetBoundingBox(int index);
// the implementation needs not do synchronization logic, because calls to it are surrounded by
// PauseAndLock now
void DoState(PointerWrap& p);
static void PopulateList();
static void ClearList();
static void ActivateBackend(const std::string& name);
// the implementation needs not do synchronization logic, because calls to it are surrounded by PauseAndLock now
void DoState(PointerWrap &p);
void CheckInvalidState();
void CheckInvalidState();
protected:
void InitializeShared();
void InitializeShared();
bool m_initialized = false;
bool m_invalid = false;
bool m_initialized = false;
bool m_invalid = false;
};
extern std::vector<std::unique_ptr<VideoBackendBase>> g_available_video_backends;

View file

@ -17,8 +17,8 @@ extern bool g_bRecordFifoData;
// These are accurate (disregarding AA modes).
enum
{
EFB_WIDTH = 640,
EFB_HEIGHT = 528,
EFB_WIDTH = 640,
EFB_HEIGHT = 528,
};
// Max XFB width is 720. You can only copy out 640 wide areas of efb to XFB
@ -43,17 +43,17 @@ typedef MathUtil::Rectangle<int> EFBRectangle;
struct TargetRectangle : public MathUtil::Rectangle<int>
{
#ifdef _WIN32
// Only used by D3D backend.
const RECT *AsRECT() const
{
// The types are binary compatible so this works.
return (const RECT *)this;
}
RECT *AsRECT()
{
// The types are binary compatible so this works.
return (RECT *)this;
}
// Only used by D3D backend.
const RECT* AsRECT() const
{
// The types are binary compatible so this works.
return (const RECT*)this;
}
RECT* AsRECT()
{
// The types are binary compatible so this works.
return (RECT*)this;
}
#endif
};
@ -64,35 +64,36 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#endif
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3],
// ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
#define LOG_VTX()
enum API_TYPE
{
API_OPENGL = 1,
API_D3D = 2,
API_NONE = 3
API_OPENGL = 1,
API_D3D = 2,
API_NONE = 3
};
inline u32 RGBA8ToRGBA6ToRGBA8(u32 src)
{
u32 color = src;
color &= 0xFCFCFCFC;
color |= (color >> 6) & 0x03030303;
return color;
u32 color = src;
color &= 0xFCFCFCFC;
color |= (color >> 6) & 0x03030303;
return color;
}
inline u32 RGBA8ToRGB565ToRGBA8(u32 src)
{
u32 color = (src & 0xF8FCF8);
color |= (color >> 5) & 0x070007;
color |= (color >> 6) & 0x000300;
color |= 0xFF000000;
return color;
u32 color = (src & 0xF8FCF8);
color |= (color >> 5) & 0x070007;
color |= (color >> 6) & 0x000300;
color |= 0xFF000000;
return color;
}
inline u32 Z24ToZ16ToZ24(u32 src)
{
return (src & 0xFFFF00) | (src >> 16);
return (src & 0xFFFF00) | (src >> 16);
}

View file

@ -21,309 +21,318 @@ VideoConfig g_ActiveConfig;
void UpdateActiveConfig()
{
if (Movie::IsPlayingInput() && Movie::IsConfigSaved())
Movie::SetGraphicsConfig();
g_ActiveConfig = g_Config;
if (Movie::IsPlayingInput() && Movie::IsConfigSaved())
Movie::SetGraphicsConfig();
g_ActiveConfig = g_Config;
}
VideoConfig::VideoConfig()
{
bRunning = false;
bRunning = false;
// Exclusive fullscreen flags
bFullscreen = false;
bExclusiveMode = false;
// Exclusive fullscreen flags
bFullscreen = false;
bExclusiveMode = false;
// Needed for the first frame, I think
fAspectRatioHackW = 1;
fAspectRatioHackH = 1;
// Needed for the first frame, I think
fAspectRatioHackW = 1;
fAspectRatioHackH = 1;
// disable all features by default
backend_info.APIType = API_NONE;
backend_info.bSupportsExclusiveFullscreen = false;
// disable all features by default
backend_info.APIType = API_NONE;
backend_info.bSupportsExclusiveFullscreen = false;
}
void VideoConfig::Load(const std::string& ini_file)
{
IniFile iniFile;
iniFile.Load(ini_file);
IniFile iniFile;
iniFile.Load(ini_file);
IniFile::Section* hardware = iniFile.GetOrCreateSection("Hardware");
hardware->Get("VSync", &bVSync, 0);
hardware->Get("Adapter", &iAdapter, 0);
IniFile::Section* hardware = iniFile.GetOrCreateSection("Hardware");
hardware->Get("VSync", &bVSync, 0);
hardware->Get("Adapter", &iAdapter, 0);
IniFile::Section* settings = iniFile.GetOrCreateSection("Settings");
settings->Get("wideScreenHack", &bWidescreenHack, false);
settings->Get("AspectRatio", &iAspectRatio, (int)ASPECT_AUTO);
settings->Get("Crop", &bCrop, false);
settings->Get("UseXFB", &bUseXFB, 0);
settings->Get("UseRealXFB", &bUseRealXFB, 0);
settings->Get("SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples, 128);
settings->Get("ShowFPS", &bShowFPS, false);
settings->Get("LogRenderTimeToFile", &bLogRenderTimeToFile, false);
settings->Get("OverlayStats", &bOverlayStats, false);
settings->Get("OverlayProjStats", &bOverlayProjStats, false);
settings->Get("DumpTextures", &bDumpTextures, 0);
settings->Get("HiresTextures", &bHiresTextures, 0);
settings->Get("ConvertHiresTextures", &bConvertHiresTextures, 0);
settings->Get("CacheHiresTextures", &bCacheHiresTextures, 0);
settings->Get("DumpEFBTarget", &bDumpEFBTarget, 0);
settings->Get("FreeLook", &bFreeLook, 0);
settings->Get("UseFFV1", &bUseFFV1, 0);
settings->Get("EnablePixelLighting", &bEnablePixelLighting, 0);
settings->Get("FastDepthCalc", &bFastDepthCalc, true);
settings->Get("MSAA", &iMultisamples, 1);
settings->Get("SSAA", &bSSAA, false);
settings->Get("EFBScale", &iEFBScale, (int)SCALE_1X); // native
settings->Get("TexFmtOverlayEnable", &bTexFmtOverlayEnable, 0);
settings->Get("TexFmtOverlayCenter", &bTexFmtOverlayCenter, 0);
settings->Get("WireFrame", &bWireFrame, 0);
settings->Get("DisableFog", &bDisableFog, 0);
settings->Get("EnableShaderDebugging", &bEnableShaderDebugging, false);
settings->Get("BorderlessFullscreen", &bBorderlessFullscreen, false);
IniFile::Section* settings = iniFile.GetOrCreateSection("Settings");
settings->Get("wideScreenHack", &bWidescreenHack, false);
settings->Get("AspectRatio", &iAspectRatio, (int)ASPECT_AUTO);
settings->Get("Crop", &bCrop, false);
settings->Get("UseXFB", &bUseXFB, 0);
settings->Get("UseRealXFB", &bUseRealXFB, 0);
settings->Get("SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples, 128);
settings->Get("ShowFPS", &bShowFPS, false);
settings->Get("LogRenderTimeToFile", &bLogRenderTimeToFile, false);
settings->Get("OverlayStats", &bOverlayStats, false);
settings->Get("OverlayProjStats", &bOverlayProjStats, false);
settings->Get("DumpTextures", &bDumpTextures, 0);
settings->Get("HiresTextures", &bHiresTextures, 0);
settings->Get("ConvertHiresTextures", &bConvertHiresTextures, 0);
settings->Get("CacheHiresTextures", &bCacheHiresTextures, 0);
settings->Get("DumpEFBTarget", &bDumpEFBTarget, 0);
settings->Get("FreeLook", &bFreeLook, 0);
settings->Get("UseFFV1", &bUseFFV1, 0);
settings->Get("EnablePixelLighting", &bEnablePixelLighting, 0);
settings->Get("FastDepthCalc", &bFastDepthCalc, true);
settings->Get("MSAA", &iMultisamples, 1);
settings->Get("SSAA", &bSSAA, false);
settings->Get("EFBScale", &iEFBScale, (int)SCALE_1X); // native
settings->Get("TexFmtOverlayEnable", &bTexFmtOverlayEnable, 0);
settings->Get("TexFmtOverlayCenter", &bTexFmtOverlayCenter, 0);
settings->Get("WireFrame", &bWireFrame, 0);
settings->Get("DisableFog", &bDisableFog, 0);
settings->Get("EnableShaderDebugging", &bEnableShaderDebugging, false);
settings->Get("BorderlessFullscreen", &bBorderlessFullscreen, false);
settings->Get("SWZComploc", &bZComploc, true);
settings->Get("SWZFreeze", &bZFreeze, true);
settings->Get("SWDumpObjects", &bDumpObjects, false);
settings->Get("SWDumpTevStages", &bDumpTevStages, false);
settings->Get("SWDumpTevTexFetches", &bDumpTevTextureFetches, false);
settings->Get("SWDrawStart", &drawStart, 0);
settings->Get("SWDrawEnd", &drawEnd, 100000);
settings->Get("SWZComploc", &bZComploc, true);
settings->Get("SWZFreeze", &bZFreeze, true);
settings->Get("SWDumpObjects", &bDumpObjects, false);
settings->Get("SWDumpTevStages", &bDumpTevStages, false);
settings->Get("SWDumpTevTexFetches", &bDumpTevTextureFetches, false);
settings->Get("SWDrawStart", &drawStart, 0);
settings->Get("SWDrawEnd", &drawEnd, 100000);
IniFile::Section* enhancements = iniFile.GetOrCreateSection("Enhancements");
enhancements->Get("ForceFiltering", &bForceFiltering, 0);
enhancements->Get("MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x)
enhancements->Get("PostProcessingShader", &sPostProcessingShader, "");
IniFile::Section* enhancements = iniFile.GetOrCreateSection("Enhancements");
enhancements->Get("ForceFiltering", &bForceFiltering, 0);
enhancements->Get("MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x)
enhancements->Get("PostProcessingShader", &sPostProcessingShader, "");
IniFile::Section* stereoscopy = iniFile.GetOrCreateSection("Stereoscopy");
stereoscopy->Get("StereoMode", &iStereoMode, 0);
stereoscopy->Get("StereoDepth", &iStereoDepth, 20);
stereoscopy->Get("StereoConvergencePercentage", &iStereoConvergencePercentage, 100);
stereoscopy->Get("StereoSwapEyes", &bStereoSwapEyes, false);
IniFile::Section* stereoscopy = iniFile.GetOrCreateSection("Stereoscopy");
stereoscopy->Get("StereoMode", &iStereoMode, 0);
stereoscopy->Get("StereoDepth", &iStereoDepth, 20);
stereoscopy->Get("StereoConvergencePercentage", &iStereoConvergencePercentage, 100);
stereoscopy->Get("StereoSwapEyes", &bStereoSwapEyes, false);
IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks");
hacks->Get("EFBAccessEnable", &bEFBAccessEnable, true);
hacks->Get("BBoxEnable", &bBBoxEnable, false);
hacks->Get("ForceProgressive", &bForceProgressive, true);
hacks->Get("EFBToTextureEnable", &bSkipEFBCopyToRam, true);
hacks->Get("EFBScaledCopy", &bCopyEFBScaled, true);
hacks->Get("EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false);
IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks");
hacks->Get("EFBAccessEnable", &bEFBAccessEnable, true);
hacks->Get("BBoxEnable", &bBBoxEnable, false);
hacks->Get("ForceProgressive", &bForceProgressive, true);
hacks->Get("EFBToTextureEnable", &bSkipEFBCopyToRam, true);
hacks->Get("EFBScaledCopy", &bCopyEFBScaled, true);
hacks->Get("EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false);
// hacks which are disabled by default
iPhackvalue[0] = 0;
bPerfQueriesEnable = false;
// hacks which are disabled by default
iPhackvalue[0] = 0;
bPerfQueriesEnable = false;
// Load common settings
iniFile.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
IniFile::Section* interface = iniFile.GetOrCreateSection("Interface");
bool bTmp;
interface->Get("UsePanicHandlers", &bTmp, true);
SetEnableAlert(bTmp);
// Load common settings
iniFile.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
IniFile::Section* interface = iniFile.GetOrCreateSection("Interface");
bool bTmp;
interface->Get("UsePanicHandlers", &bTmp, true);
SetEnableAlert(bTmp);
// Shader Debugging causes a huge slowdown and it's easy to forget about it
// since it's not exposed in the settings dialog. It's only used by
// developers, so displaying an obnoxious message avoids some confusion and
// is not too annoying/confusing for users.
//
// XXX(delroth): This is kind of a bad place to put this, but the current
// VideoCommon is a mess and we don't have a central initialization
// function to do these kind of checks. Instead, the init code is
// triplicated for each video backend.
if (bEnableShaderDebugging)
OSD::AddMessage("Warning: Shader Debugging is enabled, performance will suffer heavily", 15000);
// Shader Debugging causes a huge slowdown and it's easy to forget about it
// since it's not exposed in the settings dialog. It's only used by
// developers, so displaying an obnoxious message avoids some confusion and
// is not too annoying/confusing for users.
//
// XXX(delroth): This is kind of a bad place to put this, but the current
// VideoCommon is a mess and we don't have a central initialization
// function to do these kind of checks. Instead, the init code is
// triplicated for each video backend.
if (bEnableShaderDebugging)
OSD::AddMessage("Warning: Shader Debugging is enabled, performance will suffer heavily", 15000);
VerifyValidity();
VerifyValidity();
}
void VideoConfig::GameIniLoad()
{
bool gfx_override_exists = false;
bool gfx_override_exists = false;
// XXX: Again, bad place to put OSD messages at (see delroth's comment above)
// XXX: This will add an OSD message for each projection hack value... meh
#define CHECK_SETTING(section, key, var) do { \
decltype(var) temp = var; \
if (iniFile.GetIfExists(section, key, &var) && var != temp) { \
std::string msg = StringFromFormat("Note: Option \"%s\" is overridden by game ini.", key); \
OSD::AddMessage(msg, 7500); \
gfx_override_exists = true; \
} \
} while (0)
// XXX: Again, bad place to put OSD messages at (see delroth's comment above)
// XXX: This will add an OSD message for each projection hack value... meh
#define CHECK_SETTING(section, key, var) \
do \
{ \
decltype(var) temp = var; \
if (iniFile.GetIfExists(section, key, &var) && var != temp) \
{ \
std::string msg = StringFromFormat("Note: Option \"%s\" is overridden by game ini.", key); \
OSD::AddMessage(msg, 7500); \
gfx_override_exists = true; \
} \
} while (0)
IniFile iniFile = SConfig::GetInstance().LoadGameIni();
IniFile iniFile = SConfig::GetInstance().LoadGameIni();
CHECK_SETTING("Video_Hardware", "VSync", bVSync);
CHECK_SETTING("Video_Hardware", "VSync", bVSync);
CHECK_SETTING("Video_Settings", "wideScreenHack", bWidescreenHack);
CHECK_SETTING("Video_Settings", "AspectRatio", iAspectRatio);
CHECK_SETTING("Video_Settings", "Crop", bCrop);
CHECK_SETTING("Video_Settings", "UseXFB", bUseXFB);
CHECK_SETTING("Video_Settings", "UseRealXFB", bUseRealXFB);
CHECK_SETTING("Video_Settings", "SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
CHECK_SETTING("Video_Settings", "HiresTextures", bHiresTextures);
CHECK_SETTING("Video_Settings", "ConvertHiresTextures", bConvertHiresTextures);
CHECK_SETTING("Video_Settings", "CacheHiresTextures", bCacheHiresTextures);
CHECK_SETTING("Video_Settings", "EnablePixelLighting", bEnablePixelLighting);
CHECK_SETTING("Video_Settings", "FastDepthCalc", bFastDepthCalc);
CHECK_SETTING("Video_Settings", "MSAA", iMultisamples);
CHECK_SETTING("Video_Settings", "SSAA", bSSAA);
CHECK_SETTING("Video_Settings", "wideScreenHack", bWidescreenHack);
CHECK_SETTING("Video_Settings", "AspectRatio", iAspectRatio);
CHECK_SETTING("Video_Settings", "Crop", bCrop);
CHECK_SETTING("Video_Settings", "UseXFB", bUseXFB);
CHECK_SETTING("Video_Settings", "UseRealXFB", bUseRealXFB);
CHECK_SETTING("Video_Settings", "SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
CHECK_SETTING("Video_Settings", "HiresTextures", bHiresTextures);
CHECK_SETTING("Video_Settings", "ConvertHiresTextures", bConvertHiresTextures);
CHECK_SETTING("Video_Settings", "CacheHiresTextures", bCacheHiresTextures);
CHECK_SETTING("Video_Settings", "EnablePixelLighting", bEnablePixelLighting);
CHECK_SETTING("Video_Settings", "FastDepthCalc", bFastDepthCalc);
CHECK_SETTING("Video_Settings", "MSAA", iMultisamples);
CHECK_SETTING("Video_Settings", "SSAA", bSSAA);
int tmp = -9000;
CHECK_SETTING("Video_Settings", "EFBScale", tmp); // integral
if (tmp != -9000)
{
if (tmp != SCALE_FORCE_INTEGRAL)
{
iEFBScale = tmp;
}
else // Round down to multiple of native IR
{
switch (iEFBScale)
{
case SCALE_AUTO:
iEFBScale = SCALE_AUTO_INTEGRAL;
break;
case SCALE_1_5X:
iEFBScale = SCALE_1X;
break;
case SCALE_2_5X:
iEFBScale = SCALE_2X;
break;
default:
break;
}
}
}
int tmp = -9000;
CHECK_SETTING("Video_Settings", "EFBScale", tmp); // integral
if (tmp != -9000)
{
if (tmp != SCALE_FORCE_INTEGRAL)
{
iEFBScale = tmp;
}
else // Round down to multiple of native IR
{
switch (iEFBScale)
{
case SCALE_AUTO:
iEFBScale = SCALE_AUTO_INTEGRAL;
break;
case SCALE_1_5X:
iEFBScale = SCALE_1X;
break;
case SCALE_2_5X:
iEFBScale = SCALE_2X;
break;
default:
break;
}
}
}
CHECK_SETTING("Video_Settings", "DisableFog", bDisableFog);
CHECK_SETTING("Video_Settings", "DisableFog", bDisableFog);
CHECK_SETTING("Video_Enhancements", "ForceFiltering", bForceFiltering);
CHECK_SETTING("Video_Enhancements", "MaxAnisotropy", iMaxAnisotropy); // NOTE - this is x in (1 << x)
CHECK_SETTING("Video_Enhancements", "PostProcessingShader", sPostProcessingShader);
CHECK_SETTING("Video_Enhancements", "ForceFiltering", bForceFiltering);
CHECK_SETTING("Video_Enhancements", "MaxAnisotropy",
iMaxAnisotropy); // NOTE - this is x in (1 << x)
CHECK_SETTING("Video_Enhancements", "PostProcessingShader", sPostProcessingShader);
// These are not overrides, they are per-game stereoscopy parameters, hence no warning
iniFile.GetIfExists("Video_Stereoscopy", "StereoConvergence", &iStereoConvergence, 20);
iniFile.GetIfExists("Video_Stereoscopy", "StereoEFBMonoDepth", &bStereoEFBMonoDepth, false);
iniFile.GetIfExists("Video_Stereoscopy", "StereoDepthPercentage", &iStereoDepthPercentage, 100);
// These are not overrides, they are per-game stereoscopy parameters, hence no warning
iniFile.GetIfExists("Video_Stereoscopy", "StereoConvergence", &iStereoConvergence, 20);
iniFile.GetIfExists("Video_Stereoscopy", "StereoEFBMonoDepth", &bStereoEFBMonoDepth, false);
iniFile.GetIfExists("Video_Stereoscopy", "StereoDepthPercentage", &iStereoDepthPercentage, 100);
CHECK_SETTING("Video_Stereoscopy", "StereoMode", iStereoMode);
CHECK_SETTING("Video_Stereoscopy", "StereoDepth", iStereoDepth);
CHECK_SETTING("Video_Stereoscopy", "StereoSwapEyes", bStereoSwapEyes);
CHECK_SETTING("Video_Stereoscopy", "StereoMode", iStereoMode);
CHECK_SETTING("Video_Stereoscopy", "StereoDepth", iStereoDepth);
CHECK_SETTING("Video_Stereoscopy", "StereoSwapEyes", bStereoSwapEyes);
CHECK_SETTING("Video_Hacks", "EFBAccessEnable", bEFBAccessEnable);
CHECK_SETTING("Video_Hacks", "BBoxEnable", bBBoxEnable);
CHECK_SETTING("Video_Hacks", "ForceProgressive", bForceProgressive);
CHECK_SETTING("Video_Hacks", "EFBToTextureEnable", bSkipEFBCopyToRam);
CHECK_SETTING("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled);
CHECK_SETTING("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
CHECK_SETTING("Video_Hacks", "EFBAccessEnable", bEFBAccessEnable);
CHECK_SETTING("Video_Hacks", "BBoxEnable", bBBoxEnable);
CHECK_SETTING("Video_Hacks", "ForceProgressive", bForceProgressive);
CHECK_SETTING("Video_Hacks", "EFBToTextureEnable", bSkipEFBCopyToRam);
CHECK_SETTING("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled);
CHECK_SETTING("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
CHECK_SETTING("Video", "ProjectionHack", iPhackvalue[0]);
CHECK_SETTING("Video", "PH_SZNear", iPhackvalue[1]);
CHECK_SETTING("Video", "PH_SZFar", iPhackvalue[2]);
CHECK_SETTING("Video", "PH_ZNear", sPhackvalue[0]);
CHECK_SETTING("Video", "PH_ZFar", sPhackvalue[1]);
CHECK_SETTING("Video", "PerfQueriesEnable", bPerfQueriesEnable);
CHECK_SETTING("Video", "ProjectionHack", iPhackvalue[0]);
CHECK_SETTING("Video", "PH_SZNear", iPhackvalue[1]);
CHECK_SETTING("Video", "PH_SZFar", iPhackvalue[2]);
CHECK_SETTING("Video", "PH_ZNear", sPhackvalue[0]);
CHECK_SETTING("Video", "PH_ZFar", sPhackvalue[1]);
CHECK_SETTING("Video", "PerfQueriesEnable", bPerfQueriesEnable);
if (gfx_override_exists)
OSD::AddMessage("Warning: Opening the graphics configuration will reset settings and might cause issues!", 10000);
if (gfx_override_exists)
OSD::AddMessage(
"Warning: Opening the graphics configuration will reset settings and might cause issues!",
10000);
}
void VideoConfig::VerifyValidity()
{
// TODO: Check iMaxAnisotropy value
if (iAdapter < 0 || iAdapter > ((int)backend_info.Adapters.size() - 1))
iAdapter = 0;
// TODO: Check iMaxAnisotropy value
if (iAdapter < 0 || iAdapter > ((int)backend_info.Adapters.size() - 1))
iAdapter = 0;
if (std::find(backend_info.AAModes.begin(), backend_info.AAModes.end(), iMultisamples) == backend_info.AAModes.end())
iMultisamples = 1;
if (std::find(backend_info.AAModes.begin(), backend_info.AAModes.end(), iMultisamples) ==
backend_info.AAModes.end())
iMultisamples = 1;
if (iStereoMode > 0)
{
if (!backend_info.bSupportsGeometryShaders)
{
OSD::AddMessage("Stereoscopic 3D isn't supported by your GPU, support for OpenGL 3.2 is required.", 10000);
iStereoMode = 0;
}
if (iStereoMode > 0)
{
if (!backend_info.bSupportsGeometryShaders)
{
OSD::AddMessage(
"Stereoscopic 3D isn't supported by your GPU, support for OpenGL 3.2 is required.",
10000);
iStereoMode = 0;
}
if (bUseXFB && bUseRealXFB)
{
OSD::AddMessage("Stereoscopic 3D isn't supported with Real XFB, turning off stereoscopy.", 10000);
iStereoMode = 0;
}
}
if (bUseXFB && bUseRealXFB)
{
OSD::AddMessage("Stereoscopic 3D isn't supported with Real XFB, turning off stereoscopy.",
10000);
iStereoMode = 0;
}
}
}
void VideoConfig::Save(const std::string& ini_file)
{
IniFile iniFile;
iniFile.Load(ini_file);
IniFile iniFile;
iniFile.Load(ini_file);
IniFile::Section* hardware = iniFile.GetOrCreateSection("Hardware");
hardware->Set("VSync", bVSync);
hardware->Set("Adapter", iAdapter);
IniFile::Section* hardware = iniFile.GetOrCreateSection("Hardware");
hardware->Set("VSync", bVSync);
hardware->Set("Adapter", iAdapter);
IniFile::Section* settings = iniFile.GetOrCreateSection("Settings");
settings->Set("AspectRatio", iAspectRatio);
settings->Set("Crop", bCrop);
settings->Set("wideScreenHack", bWidescreenHack);
settings->Set("UseXFB", bUseXFB);
settings->Set("UseRealXFB", bUseRealXFB);
settings->Set("SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
settings->Set("ShowFPS", bShowFPS);
settings->Set("LogRenderTimeToFile", bLogRenderTimeToFile);
settings->Set("OverlayStats", bOverlayStats);
settings->Set("OverlayProjStats", bOverlayProjStats);
settings->Set("DumpTextures", bDumpTextures);
settings->Set("HiresTextures", bHiresTextures);
settings->Set("ConvertHiresTextures", bConvertHiresTextures);
settings->Set("CacheHiresTextures", bCacheHiresTextures);
settings->Set("DumpEFBTarget", bDumpEFBTarget);
settings->Set("FreeLook", bFreeLook);
settings->Set("UseFFV1", bUseFFV1);
settings->Set("EnablePixelLighting", bEnablePixelLighting);
settings->Set("FastDepthCalc", bFastDepthCalc);
settings->Set("MSAA", iMultisamples);
settings->Set("SSAA", bSSAA);
settings->Set("EFBScale", iEFBScale);
settings->Set("TexFmtOverlayEnable", bTexFmtOverlayEnable);
settings->Set("TexFmtOverlayCenter", bTexFmtOverlayCenter);
settings->Set("Wireframe", bWireFrame);
settings->Set("DisableFog", bDisableFog);
settings->Set("EnableShaderDebugging", bEnableShaderDebugging);
settings->Set("BorderlessFullscreen", bBorderlessFullscreen);
IniFile::Section* settings = iniFile.GetOrCreateSection("Settings");
settings->Set("AspectRatio", iAspectRatio);
settings->Set("Crop", bCrop);
settings->Set("wideScreenHack", bWidescreenHack);
settings->Set("UseXFB", bUseXFB);
settings->Set("UseRealXFB", bUseRealXFB);
settings->Set("SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
settings->Set("ShowFPS", bShowFPS);
settings->Set("LogRenderTimeToFile", bLogRenderTimeToFile);
settings->Set("OverlayStats", bOverlayStats);
settings->Set("OverlayProjStats", bOverlayProjStats);
settings->Set("DumpTextures", bDumpTextures);
settings->Set("HiresTextures", bHiresTextures);
settings->Set("ConvertHiresTextures", bConvertHiresTextures);
settings->Set("CacheHiresTextures", bCacheHiresTextures);
settings->Set("DumpEFBTarget", bDumpEFBTarget);
settings->Set("FreeLook", bFreeLook);
settings->Set("UseFFV1", bUseFFV1);
settings->Set("EnablePixelLighting", bEnablePixelLighting);
settings->Set("FastDepthCalc", bFastDepthCalc);
settings->Set("MSAA", iMultisamples);
settings->Set("SSAA", bSSAA);
settings->Set("EFBScale", iEFBScale);
settings->Set("TexFmtOverlayEnable", bTexFmtOverlayEnable);
settings->Set("TexFmtOverlayCenter", bTexFmtOverlayCenter);
settings->Set("Wireframe", bWireFrame);
settings->Set("DisableFog", bDisableFog);
settings->Set("EnableShaderDebugging", bEnableShaderDebugging);
settings->Set("BorderlessFullscreen", bBorderlessFullscreen);
settings->Set("SWZComploc", bZComploc);
settings->Set("SWZFreeze", bZFreeze);
settings->Set("SWDumpObjects", bDumpObjects);
settings->Set("SWDumpTevStages", bDumpTevStages);
settings->Set("SWDumpTevTexFetches", bDumpTevTextureFetches);
settings->Set("SWDrawStart", drawStart);
settings->Set("SWDrawEnd", drawEnd);
settings->Set("SWZComploc", bZComploc);
settings->Set("SWZFreeze", bZFreeze);
settings->Set("SWDumpObjects", bDumpObjects);
settings->Set("SWDumpTevStages", bDumpTevStages);
settings->Set("SWDumpTevTexFetches", bDumpTevTextureFetches);
settings->Set("SWDrawStart", drawStart);
settings->Set("SWDrawEnd", drawEnd);
IniFile::Section* enhancements = iniFile.GetOrCreateSection("Enhancements");
enhancements->Set("ForceFiltering", bForceFiltering);
enhancements->Set("MaxAnisotropy", iMaxAnisotropy);
enhancements->Set("PostProcessingShader", sPostProcessingShader);
IniFile::Section* enhancements = iniFile.GetOrCreateSection("Enhancements");
enhancements->Set("ForceFiltering", bForceFiltering);
enhancements->Set("MaxAnisotropy", iMaxAnisotropy);
enhancements->Set("PostProcessingShader", sPostProcessingShader);
IniFile::Section* stereoscopy = iniFile.GetOrCreateSection("Stereoscopy");
stereoscopy->Set("StereoMode", iStereoMode);
stereoscopy->Set("StereoDepth", iStereoDepth);
stereoscopy->Set("StereoConvergencePercentage", iStereoConvergencePercentage);
stereoscopy->Set("StereoSwapEyes", bStereoSwapEyes);
IniFile::Section* stereoscopy = iniFile.GetOrCreateSection("Stereoscopy");
stereoscopy->Set("StereoMode", iStereoMode);
stereoscopy->Set("StereoDepth", iStereoDepth);
stereoscopy->Set("StereoConvergencePercentage", iStereoConvergencePercentage);
stereoscopy->Set("StereoSwapEyes", bStereoSwapEyes);
IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks");
hacks->Set("EFBAccessEnable", bEFBAccessEnable);
hacks->Set("BBoxEnable", bBBoxEnable);
hacks->Set("ForceProgressive", bForceProgressive);
hacks->Set("EFBToTextureEnable", bSkipEFBCopyToRam);
hacks->Set("EFBScaledCopy", bCopyEFBScaled);
hacks->Set("EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks");
hacks->Set("EFBAccessEnable", bEFBAccessEnable);
hacks->Set("BBoxEnable", bBBoxEnable);
hacks->Set("ForceProgressive", bForceProgressive);
hacks->Set("EFBToTextureEnable", bSkipEFBCopyToRam);
hacks->Set("EFBScaledCopy", bCopyEFBScaled);
hacks->Set("EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
iniFile.Save(ini_file);
iniFile.Save(ini_file);
}
bool VideoConfig::IsVSync()
{
return bVSync && !Core::GetIsThrottlerTempDisabled();
return bVSync && !Core::GetIsThrottlerTempDisabled();
}

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
// IMPORTANT: UI etc should modify g_Config. Graphics code should read g_ActiveConfig.
// The reason for this is to get rid of race conditions etc when the configuration
// changes in the middle of a frame. This is done by copying g_Config to g_ActiveConfig
@ -18,167 +17,170 @@
#include "VideoCommon/VideoCommon.h"
// Log in two categories, and save three other options in the same byte
#define CONF_LOG 1
#define CONF_PRIMLOG 2
#define CONF_SAVETARGETS 8
#define CONF_SAVESHADERS 16
#define CONF_LOG 1
#define CONF_PRIMLOG 2
#define CONF_SAVETARGETS 8
#define CONF_SAVESHADERS 16
enum AspectMode
{
ASPECT_AUTO = 0,
ASPECT_ANALOG_WIDE = 1,
ASPECT_ANALOG = 2,
ASPECT_STRETCH = 3,
ASPECT_AUTO = 0,
ASPECT_ANALOG_WIDE = 1,
ASPECT_ANALOG = 2,
ASPECT_STRETCH = 3,
};
enum EFBScale
{
SCALE_FORCE_INTEGRAL = -1,
SCALE_AUTO,
SCALE_AUTO_INTEGRAL,
SCALE_1X,
SCALE_1_5X,
SCALE_2X,
SCALE_2_5X,
SCALE_FORCE_INTEGRAL = -1,
SCALE_AUTO,
SCALE_AUTO_INTEGRAL,
SCALE_1X,
SCALE_1_5X,
SCALE_2X,
SCALE_2_5X,
};
enum StereoMode
{
STEREO_OFF = 0,
STEREO_SBS,
STEREO_TAB,
STEREO_ANAGLYPH,
STEREO_3DVISION
STEREO_OFF = 0,
STEREO_SBS,
STEREO_TAB,
STEREO_ANAGLYPH,
STEREO_3DVISION
};
// NEVER inherit from this class.
struct VideoConfig final
{
VideoConfig();
void Load(const std::string& ini_file);
void GameIniLoad();
void VerifyValidity();
void Save(const std::string& ini_file);
void UpdateProjectionHack();
bool IsVSync();
VideoConfig();
void Load(const std::string& ini_file);
void GameIniLoad();
void VerifyValidity();
void Save(const std::string& ini_file);
void UpdateProjectionHack();
bool IsVSync();
// General
bool bVSync;
bool bFullscreen;
bool bExclusiveMode;
bool bRunning;
bool bWidescreenHack;
int iAspectRatio;
bool bCrop; // Aspect ratio controls.
bool bUseXFB;
bool bUseRealXFB;
// General
bool bVSync;
bool bFullscreen;
bool bExclusiveMode;
bool bRunning;
bool bWidescreenHack;
int iAspectRatio;
bool bCrop; // Aspect ratio controls.
bool bUseXFB;
bool bUseRealXFB;
// Enhancements
int iMultisamples;
bool bSSAA;
int iEFBScale;
bool bForceFiltering;
int iMaxAnisotropy;
std::string sPostProcessingShader;
// Enhancements
int iMultisamples;
bool bSSAA;
int iEFBScale;
bool bForceFiltering;
int iMaxAnisotropy;
std::string sPostProcessingShader;
// Information
bool bShowFPS;
bool bOverlayStats;
bool bOverlayProjStats;
bool bTexFmtOverlayEnable;
bool bTexFmtOverlayCenter;
bool bLogRenderTimeToFile;
// Information
bool bShowFPS;
bool bOverlayStats;
bool bOverlayProjStats;
bool bTexFmtOverlayEnable;
bool bTexFmtOverlayCenter;
bool bLogRenderTimeToFile;
// Render
bool bWireFrame;
bool bDisableFog;
// Render
bool bWireFrame;
bool bDisableFog;
// Utility
bool bDumpTextures;
bool bHiresTextures;
bool bConvertHiresTextures;
bool bCacheHiresTextures;
bool bDumpEFBTarget;
bool bUseFFV1;
bool bFreeLook;
bool bBorderlessFullscreen;
// Utility
bool bDumpTextures;
bool bHiresTextures;
bool bConvertHiresTextures;
bool bCacheHiresTextures;
bool bDumpEFBTarget;
bool bUseFFV1;
bool bFreeLook;
bool bBorderlessFullscreen;
// Hacks
bool bEFBAccessEnable;
bool bPerfQueriesEnable;
bool bBBoxEnable;
bool bForceProgressive;
// Hacks
bool bEFBAccessEnable;
bool bPerfQueriesEnable;
bool bBBoxEnable;
bool bForceProgressive;
bool bEFBEmulateFormatChanges;
bool bSkipEFBCopyToRam;
bool bCopyEFBScaled;
int iSafeTextureCache_ColorSamples;
int iPhackvalue[3];
std::string sPhackvalue[2];
float fAspectRatioHackW, fAspectRatioHackH;
bool bEnablePixelLighting;
bool bFastDepthCalc;
int iLog; // CONF_ bits
int iSaveTargetId; // TODO: Should be dropped
bool bEFBEmulateFormatChanges;
bool bSkipEFBCopyToRam;
bool bCopyEFBScaled;
int iSafeTextureCache_ColorSamples;
int iPhackvalue[3];
std::string sPhackvalue[2];
float fAspectRatioHackW, fAspectRatioHackH;
bool bEnablePixelLighting;
bool bFastDepthCalc;
int iLog; // CONF_ bits
int iSaveTargetId; // TODO: Should be dropped
// Stereoscopy
int iStereoMode;
int iStereoDepth;
int iStereoConvergence;
int iStereoConvergencePercentage;
bool bStereoSwapEyes;
bool bStereoEFBMonoDepth;
int iStereoDepthPercentage;
// Stereoscopy
int iStereoMode;
int iStereoDepth;
int iStereoConvergence;
int iStereoConvergencePercentage;
bool bStereoSwapEyes;
bool bStereoEFBMonoDepth;
int iStereoDepthPercentage;
// D3D only config, mostly to be merged into the above
int iAdapter;
// D3D only config, mostly to be merged into the above
int iAdapter;
// Debugging
bool bEnableShaderDebugging;
// Debugging
bool bEnableShaderDebugging;
// VideoSW Debugging
int drawStart;
int drawEnd;
bool bZComploc;
bool bZFreeze;
bool bDumpObjects;
bool bDumpTevStages;
bool bDumpTevTextureFetches;
// VideoSW Debugging
int drawStart;
int drawEnd;
bool bZComploc;
bool bZFreeze;
bool bDumpObjects;
bool bDumpTevStages;
bool bDumpTevTextureFetches;
// Static config per API
// TODO: Move this out of VideoConfig
struct
{
API_TYPE APIType;
// Static config per API
// TODO: Move this out of VideoConfig
struct
{
API_TYPE APIType;
std::vector<std::string> Adapters; // for D3D
std::vector<int> AAModes;
std::vector<std::string> PPShaders; // post-processing shaders
std::vector<std::string> AnaglyphShaders; // anaglyph shaders
std::vector<std::string> Adapters; // for D3D
std::vector<int> AAModes;
std::vector<std::string> PPShaders; // post-processing shaders
std::vector<std::string> AnaglyphShaders; // anaglyph shaders
// TODO: merge AdapterName and Adapters array
std::string AdapterName; // for OpenGL
// TODO: merge AdapterName and Adapters array
std::string AdapterName; // for OpenGL
bool bSupportsExclusiveFullscreen;
bool bSupportsDualSourceBlend;
bool bSupportsPrimitiveRestart;
bool bSupportsOversizedViewports;
bool bSupportsGeometryShaders;
bool bSupports3DVision;
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
bool bSupportsBBox;
bool bSupportsGSInstancing; // Needed by GeometryShaderGen, so must stay in VideoCommon
bool bSupportsPostProcessing;
bool bSupportsPaletteConversion;
bool bSupportsClipControl; // Needed by VertexShaderGen, so must stay in VideoCommon
bool bSupportsSSAA;
} backend_info;
bool bSupportsExclusiveFullscreen;
bool bSupportsDualSourceBlend;
bool bSupportsPrimitiveRestart;
bool bSupportsOversizedViewports;
bool bSupportsGeometryShaders;
bool bSupports3DVision;
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
bool bSupportsBBox;
bool bSupportsGSInstancing; // Needed by GeometryShaderGen, so must stay in VideoCommon
bool bSupportsPostProcessing;
bool bSupportsPaletteConversion;
bool bSupportsClipControl; // Needed by VertexShaderGen, so must stay in VideoCommon
bool bSupportsSSAA;
} backend_info;
// Utility
bool RealXFBEnabled() const { return bUseXFB && bUseRealXFB; }
bool VirtualXFBEnabled() const { return bUseXFB && !bUseRealXFB; }
bool ExclusiveFullscreenEnabled() const { return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen; }
// Utility
bool RealXFBEnabled() const { return bUseXFB && bUseRealXFB; }
bool VirtualXFBEnabled() const { return bUseXFB && !bUseRealXFB; }
bool ExclusiveFullscreenEnabled() const
{
return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen;
}
};
extern VideoConfig g_Config;

View file

@ -5,10 +5,10 @@
#include <cstring>
#include "Common/ChunkFile.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelEngine.h"
@ -19,56 +19,56 @@
#include "VideoCommon/VideoState.h"
#include "VideoCommon/XFMemory.h"
void VideoCommon_DoState(PointerWrap &p)
void VideoCommon_DoState(PointerWrap& p)
{
// BP Memory
p.Do(bpmem);
p.DoMarker("BP Memory");
// BP Memory
p.Do(bpmem);
p.DoMarker("BP Memory");
// CP Memory
DoCPState(p);
// CP Memory
DoCPState(p);
// XF Memory
p.Do(xfmem);
p.DoMarker("XF Memory");
// XF Memory
p.Do(xfmem);
p.DoMarker("XF Memory");
// Texture decoder
p.DoArray(texMem);
p.DoMarker("texMem");
// Texture decoder
p.DoArray(texMem);
p.DoMarker("texMem");
// FIFO
Fifo::DoState(p);
p.DoMarker("Fifo");
// FIFO
Fifo::DoState(p);
p.DoMarker("Fifo");
CommandProcessor::DoState(p);
p.DoMarker("CommandProcessor");
CommandProcessor::DoState(p);
p.DoMarker("CommandProcessor");
PixelEngine::DoState(p);
p.DoMarker("PixelEngine");
PixelEngine::DoState(p);
p.DoMarker("PixelEngine");
// the old way of replaying current bpmem as writes to push side effects to pixel shader manager doesn't really work.
PixelShaderManager::DoState(p);
p.DoMarker("PixelShaderManager");
// the old way of replaying current bpmem as writes to push side effects to pixel shader manager
// doesn't really work.
PixelShaderManager::DoState(p);
p.DoMarker("PixelShaderManager");
VertexShaderManager::DoState(p);
p.DoMarker("VertexShaderManager");
VertexShaderManager::DoState(p);
p.DoMarker("VertexShaderManager");
GeometryShaderManager::DoState(p);
p.DoMarker("GeometryShaderManager");
GeometryShaderManager::DoState(p);
p.DoMarker("GeometryShaderManager");
VertexManagerBase::DoState(p);
p.DoMarker("VertexManager");
VertexManagerBase::DoState(p);
p.DoMarker("VertexManager");
BoundingBox::DoState(p);
p.DoMarker("BoundingBox");
BoundingBox::DoState(p);
p.DoMarker("BoundingBox");
// TODO: search for more data that should be saved and add it here
// TODO: search for more data that should be saved and add it here
}
void VideoCommon_Init()
{
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
memset(texMem, 0, TMEM_SIZE);
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
memset(texMem, 0, TMEM_SIZE);
}

View file

@ -6,5 +6,5 @@
class PointerWrap;
void VideoCommon_DoState(PointerWrap &p);
void VideoCommon_DoState(PointerWrap& p);
void VideoCommon_Init();

View file

@ -14,300 +14,288 @@ class DataReader;
// Projection
enum : u32
{
XF_TEXPROJ_ST = 0,
XF_TEXPROJ_STQ = 1
XF_TEXPROJ_ST = 0,
XF_TEXPROJ_STQ = 1
};
// Input form
enum : u32
{
XF_TEXINPUT_AB11 = 0,
XF_TEXINPUT_ABC1 = 1
XF_TEXINPUT_AB11 = 0,
XF_TEXINPUT_ABC1 = 1
};
// Texture generation type
enum : u32
{
XF_TEXGEN_REGULAR = 0,
XF_TEXGEN_EMBOSS_MAP = 1, // Used when bump mapping
XF_TEXGEN_COLOR_STRGBC0 = 2,
XF_TEXGEN_COLOR_STRGBC1 = 3
XF_TEXGEN_REGULAR = 0,
XF_TEXGEN_EMBOSS_MAP = 1, // Used when bump mapping
XF_TEXGEN_COLOR_STRGBC0 = 2,
XF_TEXGEN_COLOR_STRGBC1 = 3
};
// Source row
enum : u32
{
XF_SRCGEOM_INROW = 0, // Input is abc
XF_SRCNORMAL_INROW = 1, // Input is abc
XF_SRCCOLORS_INROW = 2,
XF_SRCBINORMAL_T_INROW = 3, // Input is abc
XF_SRCBINORMAL_B_INROW = 4, // Input is abc
XF_SRCTEX0_INROW = 5,
XF_SRCTEX1_INROW = 6,
XF_SRCTEX2_INROW = 7,
XF_SRCTEX3_INROW = 8,
XF_SRCTEX4_INROW = 9,
XF_SRCTEX5_INROW = 10,
XF_SRCTEX6_INROW = 11,
XF_SRCTEX7_INROW = 12
XF_SRCGEOM_INROW = 0, // Input is abc
XF_SRCNORMAL_INROW = 1, // Input is abc
XF_SRCCOLORS_INROW = 2,
XF_SRCBINORMAL_T_INROW = 3, // Input is abc
XF_SRCBINORMAL_B_INROW = 4, // Input is abc
XF_SRCTEX0_INROW = 5,
XF_SRCTEX1_INROW = 6,
XF_SRCTEX2_INROW = 7,
XF_SRCTEX3_INROW = 8,
XF_SRCTEX4_INROW = 9,
XF_SRCTEX5_INROW = 10,
XF_SRCTEX6_INROW = 11,
XF_SRCTEX7_INROW = 12
};
// Control source
enum : u32
{
GX_SRC_REG = 0,
GX_SRC_VTX = 1
GX_SRC_REG = 0,
GX_SRC_VTX = 1
};
// Light diffuse attenuation function
enum : u32
{
LIGHTDIF_NONE = 0,
LIGHTDIF_SIGN = 1,
LIGHTDIF_CLAMP = 2
LIGHTDIF_NONE = 0,
LIGHTDIF_SIGN = 1,
LIGHTDIF_CLAMP = 2
};
// Light attenuation function
enum : u32
{
LIGHTATTN_NONE = 0, // No attenuation
LIGHTATTN_SPEC = 1, // Point light attenuation
LIGHTATTN_DIR = 2, // Directional light attenuation
LIGHTATTN_SPOT = 3 // Spot light attenuation
LIGHTATTN_NONE = 0, // No attenuation
LIGHTATTN_SPEC = 1, // Point light attenuation
LIGHTATTN_DIR = 2, // Directional light attenuation
LIGHTATTN_SPOT = 3 // Spot light attenuation
};
// Projection type
enum : u32
{
GX_PERSPECTIVE = 0,
GX_ORTHOGRAPHIC = 1
GX_PERSPECTIVE = 0,
GX_ORTHOGRAPHIC = 1
};
// Registers and register ranges
enum
{
XFMEM_POSMATRICES = 0x000,
XFMEM_POSMATRICES_END = 0x100,
XFMEM_NORMALMATRICES = 0x400,
XFMEM_NORMALMATRICES_END = 0x460,
XFMEM_POSTMATRICES = 0x500,
XFMEM_POSTMATRICES_END = 0x600,
XFMEM_LIGHTS = 0x600,
XFMEM_LIGHTS_END = 0x680,
XFMEM_ERROR = 0x1000,
XFMEM_DIAG = 0x1001,
XFMEM_STATE0 = 0x1002,
XFMEM_STATE1 = 0x1003,
XFMEM_CLOCK = 0x1004,
XFMEM_CLIPDISABLE = 0x1005,
XFMEM_SETGPMETRIC = 0x1006,
XFMEM_VTXSPECS = 0x1008,
XFMEM_SETNUMCHAN = 0x1009,
XFMEM_SETCHAN0_AMBCOLOR = 0x100a,
XFMEM_SETCHAN1_AMBCOLOR = 0x100b,
XFMEM_SETCHAN0_MATCOLOR = 0x100c,
XFMEM_SETCHAN1_MATCOLOR = 0x100d,
XFMEM_SETCHAN0_COLOR = 0x100e,
XFMEM_SETCHAN1_COLOR = 0x100f,
XFMEM_SETCHAN0_ALPHA = 0x1010,
XFMEM_SETCHAN1_ALPHA = 0x1011,
XFMEM_DUALTEX = 0x1012,
XFMEM_SETMATRIXINDA = 0x1018,
XFMEM_SETMATRIXINDB = 0x1019,
XFMEM_SETVIEWPORT = 0x101a,
XFMEM_SETZSCALE = 0x101c,
XFMEM_SETZOFFSET = 0x101f,
XFMEM_SETPROJECTION = 0x1020,
// XFMEM_SETPROJECTIONB = 0x1021,
// XFMEM_SETPROJECTIONC = 0x1022,
// XFMEM_SETPROJECTIOND = 0x1023,
// XFMEM_SETPROJECTIONE = 0x1024,
// XFMEM_SETPROJECTIONF = 0x1025,
// XFMEM_SETPROJECTIONORTHO1 = 0x1026,
// XFMEM_SETPROJECTIONORTHO2 = 0x1027,
XFMEM_SETNUMTEXGENS = 0x103f,
XFMEM_SETTEXMTXINFO = 0x1040,
XFMEM_SETPOSMTXINFO = 0x1050,
XFMEM_POSMATRICES = 0x000,
XFMEM_POSMATRICES_END = 0x100,
XFMEM_NORMALMATRICES = 0x400,
XFMEM_NORMALMATRICES_END = 0x460,
XFMEM_POSTMATRICES = 0x500,
XFMEM_POSTMATRICES_END = 0x600,
XFMEM_LIGHTS = 0x600,
XFMEM_LIGHTS_END = 0x680,
XFMEM_ERROR = 0x1000,
XFMEM_DIAG = 0x1001,
XFMEM_STATE0 = 0x1002,
XFMEM_STATE1 = 0x1003,
XFMEM_CLOCK = 0x1004,
XFMEM_CLIPDISABLE = 0x1005,
XFMEM_SETGPMETRIC = 0x1006,
XFMEM_VTXSPECS = 0x1008,
XFMEM_SETNUMCHAN = 0x1009,
XFMEM_SETCHAN0_AMBCOLOR = 0x100a,
XFMEM_SETCHAN1_AMBCOLOR = 0x100b,
XFMEM_SETCHAN0_MATCOLOR = 0x100c,
XFMEM_SETCHAN1_MATCOLOR = 0x100d,
XFMEM_SETCHAN0_COLOR = 0x100e,
XFMEM_SETCHAN1_COLOR = 0x100f,
XFMEM_SETCHAN0_ALPHA = 0x1010,
XFMEM_SETCHAN1_ALPHA = 0x1011,
XFMEM_DUALTEX = 0x1012,
XFMEM_SETMATRIXINDA = 0x1018,
XFMEM_SETMATRIXINDB = 0x1019,
XFMEM_SETVIEWPORT = 0x101a,
XFMEM_SETZSCALE = 0x101c,
XFMEM_SETZOFFSET = 0x101f,
XFMEM_SETPROJECTION = 0x1020,
// XFMEM_SETPROJECTIONB = 0x1021,
// XFMEM_SETPROJECTIONC = 0x1022,
// XFMEM_SETPROJECTIOND = 0x1023,
// XFMEM_SETPROJECTIONE = 0x1024,
// XFMEM_SETPROJECTIONF = 0x1025,
// XFMEM_SETPROJECTIONORTHO1 = 0x1026,
// XFMEM_SETPROJECTIONORTHO2 = 0x1027,
XFMEM_SETNUMTEXGENS = 0x103f,
XFMEM_SETTEXMTXINFO = 0x1040,
XFMEM_SETPOSMTXINFO = 0x1050,
};
union LitChannel
{
struct
{
u32 matsource : 1;
u32 enablelighting : 1;
u32 lightMask0_3 : 4;
u32 ambsource : 1;
u32 diffusefunc : 2; // LIGHTDIF_X
u32 attnfunc : 2; // LIGHTATTN_X
u32 lightMask4_7 : 4;
};
struct
{
u32 hex : 15;
u32 unused : 17;
};
struct
{
u32 dummy0 : 7;
u32 lightparams : 4;
u32 dummy1 : 21;
};
unsigned int GetFullLightMask() const
{
return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0;
}
union LitChannel {
struct
{
u32 matsource : 1;
u32 enablelighting : 1;
u32 lightMask0_3 : 4;
u32 ambsource : 1;
u32 diffusefunc : 2; // LIGHTDIF_X
u32 attnfunc : 2; // LIGHTATTN_X
u32 lightMask4_7 : 4;
};
struct
{
u32 hex : 15;
u32 unused : 17;
};
struct
{
u32 dummy0 : 7;
u32 lightparams : 4;
u32 dummy1 : 21;
};
unsigned int GetFullLightMask() const
{
return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0;
}
};
union INVTXSPEC
{
struct
{
u32 numcolors : 2;
u32 numnormals : 2; // 0 - nothing, 1 - just normal, 2 - normals and binormals
u32 numtextures : 4;
u32 unused : 24;
};
u32 hex;
union INVTXSPEC {
struct
{
u32 numcolors : 2;
u32 numnormals : 2; // 0 - nothing, 1 - just normal, 2 - normals and binormals
u32 numtextures : 4;
u32 unused : 24;
};
u32 hex;
};
union TexMtxInfo
{
struct
{
u32 unknown : 1;
u32 projection : 1; // XF_TEXPROJ_X
u32 inputform : 1; // XF_TEXINPUT_X
u32 unknown2 : 1;
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
};
u32 hex;
union TexMtxInfo {
struct
{
u32 unknown : 1;
u32 projection : 1; // XF_TEXPROJ_X
u32 inputform : 1; // XF_TEXINPUT_X
u32 unknown2 : 1;
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
};
u32 hex;
};
union PostMtxInfo
{
struct
{
u32 index : 6; // base row of dual transform matrix
u32 unused : 2;
u32 normalize : 1; // normalize before send operation
};
u32 hex;
union PostMtxInfo {
struct
{
u32 index : 6; // base row of dual transform matrix
u32 unused : 2;
u32 normalize : 1; // normalize before send operation
};
u32 hex;
};
union NumColorChannel
{
struct
{
u32 numColorChans : 2;
};
u32 hex;
union NumColorChannel {
struct
{
u32 numColorChans : 2;
};
u32 hex;
};
union NumTexGen
{
struct
{
u32 numTexGens : 4;
};
u32 hex;
union NumTexGen {
struct
{
u32 numTexGens : 4;
};
u32 hex;
};
union DualTexInfo
{
struct
{
u32 enabled : 1;
};
u32 hex;
union DualTexInfo {
struct
{
u32 enabled : 1;
};
u32 hex;
};
struct Light
{
u32 useless[3];
u8 color[4];
float cosatt[3]; // cos attenuation
float distatt[3]; // dist attenuation
u32 useless[3];
u8 color[4];
float cosatt[3]; // cos attenuation
float distatt[3]; // dist attenuation
union
{
struct
{
float dpos[3];
float ddir[3]; // specular lights only
};
union {
struct
{
float dpos[3];
float ddir[3]; // specular lights only
};
struct
{
float sdir[3];
float shalfangle[3]; // specular lights only
};
};
struct
{
float sdir[3];
float shalfangle[3]; // specular lights only
};
};
};
struct Viewport
{
float wd;
float ht;
float zRange;
float xOrig;
float yOrig;
float farZ;
float wd;
float ht;
float zRange;
float xOrig;
float yOrig;
float farZ;
};
struct Projection
{
float rawProjection[6];
u32 type; // only GX_PERSPECTIVE or GX_ORTHOGRAPHIC are allowed
float rawProjection[6];
u32 type; // only GX_PERSPECTIVE or GX_ORTHOGRAPHIC are allowed
};
struct XFMemory
{
float posMatrices[256]; // 0x0000 - 0x00ff
u32 unk0[768]; // 0x0100 - 0x03ff
float normalMatrices[96]; // 0x0400 - 0x045f
u32 unk1[160]; // 0x0460 - 0x04ff
float postMatrices[256]; // 0x0500 - 0x05ff
Light lights[8]; // 0x0600 - 0x067f
u32 unk2[2432]; // 0x0680 - 0x0fff
u32 error; // 0x1000
u32 diag; // 0x1001
u32 state0; // 0x1002
u32 state1; // 0x1003
u32 xfClock; // 0x1004
u32 clipDisable; // 0x1005
u32 perf0; // 0x1006
u32 perf1; // 0x1007
INVTXSPEC hostinfo; // 0x1008 number of textures,colors,normals from vertex input
NumColorChannel numChan; // 0x1009
u32 ambColor[2]; // 0x100a, 0x100b
u32 matColor[2]; // 0x100c, 0x100d
LitChannel color[2]; // 0x100e, 0x100f
LitChannel alpha[2]; // 0x1010, 0x1011
DualTexInfo dualTexTrans; // 0x1012
u32 unk3; // 0x1013
u32 unk4; // 0x1014
u32 unk5; // 0x1015
u32 unk6; // 0x1016
u32 unk7; // 0x1017
TMatrixIndexA MatrixIndexA; // 0x1018
TMatrixIndexB MatrixIndexB; // 0x1019
Viewport viewport; // 0x101a - 0x101f
Projection projection; // 0x1020 - 0x1026
u32 unk8[24]; // 0x1027 - 0x103e
NumTexGen numTexGen; // 0x103f
TexMtxInfo texMtxInfo[8]; // 0x1040 - 0x1047
u32 unk9[8]; // 0x1048 - 0x104f
PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057
float posMatrices[256]; // 0x0000 - 0x00ff
u32 unk0[768]; // 0x0100 - 0x03ff
float normalMatrices[96]; // 0x0400 - 0x045f
u32 unk1[160]; // 0x0460 - 0x04ff
float postMatrices[256]; // 0x0500 - 0x05ff
Light lights[8]; // 0x0600 - 0x067f
u32 unk2[2432]; // 0x0680 - 0x0fff
u32 error; // 0x1000
u32 diag; // 0x1001
u32 state0; // 0x1002
u32 state1; // 0x1003
u32 xfClock; // 0x1004
u32 clipDisable; // 0x1005
u32 perf0; // 0x1006
u32 perf1; // 0x1007
INVTXSPEC hostinfo; // 0x1008 number of textures,colors,normals from vertex input
NumColorChannel numChan; // 0x1009
u32 ambColor[2]; // 0x100a, 0x100b
u32 matColor[2]; // 0x100c, 0x100d
LitChannel color[2]; // 0x100e, 0x100f
LitChannel alpha[2]; // 0x1010, 0x1011
DualTexInfo dualTexTrans; // 0x1012
u32 unk3; // 0x1013
u32 unk4; // 0x1014
u32 unk5; // 0x1015
u32 unk6; // 0x1016
u32 unk7; // 0x1017
TMatrixIndexA MatrixIndexA; // 0x1018
TMatrixIndexB MatrixIndexB; // 0x1019
Viewport viewport; // 0x101a - 0x101f
Projection projection; // 0x1020 - 0x1026
u32 unk8[24]; // 0x1027 - 0x103e
NumTexGen numTexGen; // 0x103f
TexMtxInfo texMtxInfo[8]; // 0x1040 - 0x1047
u32 unk9[8]; // 0x1048 - 0x104f
PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057
};
extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address, DataReader src);

Some files were not shown because too many files have changed in this diff Show more