diff --git a/rpcs3/Emu/Audio/audio_resampler.cpp b/rpcs3/Emu/Audio/audio_resampler.cpp
index c2d318db40..b029172dfa 100644
--- a/rpcs3/Emu/Audio/audio_resampler.cpp
+++ b/rpcs3/Emu/Audio/audio_resampler.cpp
@@ -33,8 +33,7 @@ void audio_resampler::put_samples(const f32* buf, u32 sample_cnt)
std::pair audio_resampler::get_samples(u32 sample_cnt)
{
- f32 *const buf = resampler.bufBegin();
- return std::make_pair(buf, resampler.receiveSamples(sample_cnt));
+ return std::make_pair(resampler.bufBegin(), resampler.receiveSamples(sample_cnt));
}
u32 audio_resampler::samples_available() const
diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
index c46b09b8a5..f9083c8fc4 100644
--- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
@@ -5,6 +5,7 @@
#include "Emu/Cell/lv2/sys_process.h"
#include "Emu/Cell/lv2/sys_event.h"
#include "cellAudio.h"
+#include "util/video_provider.h"
#include
@@ -69,7 +70,7 @@ void cell_audio_config::reset(bool backend_changed)
const AudioFreq freq = AudioFreq::FREQ_48K;
const AudioSampleSize sample_size = raw.convert_to_s16 ? AudioSampleSize::S16 : AudioSampleSize::FLOAT;
- const auto [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY
+ const auto& [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY
f64 cb_frame_len = 0.0;
u32 ch_cnt = 2;
@@ -276,16 +277,23 @@ void audio_ringbuffer::process_resampled_data()
{
if (!cfg.time_stretching_enabled) return;
- const auto [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt))));
+ const auto& [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt))));
commit_data(buffer, samples);
}
void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt)
{
- sample_cnt *= cfg.audio_channels;
+ const u32 sample_cnt_in = sample_cnt * cfg.audio_channels;
+ const u32 sample_cnt_out = sample_cnt * static_cast(cfg.backend_ch_cnt);
// Dump audio if enabled
- m_dump.WriteData(buf, sample_cnt * static_cast(AudioSampleSize::FLOAT));
+ m_dump.WriteData(buf, sample_cnt_in * static_cast(AudioSampleSize::FLOAT));
+
+ // Record audio if enabled
+ if (utils::video_provider& provider = g_fxo->get(); provider.can_consume_sample())
+ {
+ provider.present_samples(reinterpret_cast(buf), sample_cnt, static_cast(cfg.audio_channels));
+ }
if (cfg.backend_ch_cnt < AudioChannelCnt{cfg.audio_channels})
{
@@ -293,11 +301,11 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt)
{
if (cfg.backend_ch_cnt == AudioChannelCnt::SURROUND_5_1)
{
- AudioBackend::downmix(sample_cnt, buf, buf);
+ AudioBackend::downmix(sample_cnt_in, buf, buf);
}
else if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO)
{
- AudioBackend::downmix(sample_cnt, buf, buf);
+ AudioBackend::downmix(sample_cnt_in, buf, buf);
}
else
{
@@ -308,7 +316,7 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt)
{
if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO)
{
- AudioBackend::downmix(sample_cnt, buf, buf);
+ AudioBackend::downmix(sample_cnt_in, buf, buf);
}
else
{
@@ -321,8 +329,6 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt)
}
}
- const u32 sample_cnt_out = sample_cnt / cfg.audio_channels * static_cast(cfg.backend_ch_cnt);
-
if (cfg.backend->get_convert_to_s16())
{
AudioBackend::convert_to_s16(sample_cnt_out, buf, buf);
diff --git a/rpcs3/Emu/Cell/Modules/cellRec.cpp b/rpcs3/Emu/Cell/Modules/cellRec.cpp
index 53165ef6a5..ebdeffb49f 100644
--- a/rpcs3/Emu/Cell/Modules/cellRec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellRec.cpp
@@ -140,34 +140,26 @@ struct rec_param
constexpr u32 rec_framerate = 30; // Always 30 fps
-class rec_image_sink : public utils::image_sink
+class rec_video_sink : public utils::video_sink
{
public:
- rec_image_sink() : utils::image_sink()
+ rec_video_sink() : utils::video_sink()
{
m_framerate = rec_framerate;
+ m_sample_rate = 44100; // TODO
}
void stop(bool flush = true) override
{
- cellRec.notice("Stopping image sink. flush=%d", flush);
+ cellRec.notice("Stopping video sink. flush=%d", flush);
std::lock_guard lock(m_mtx);
m_flush = flush;
m_frames_to_encode.clear();
+ m_samples_to_encode.clear();
has_error = false;
}
- void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override
- {
- std::lock_guard lock(m_mtx);
-
- if (m_flush)
- return;
-
- m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
- }
-
encoder_frame get_frame()
{
std::lock_guard lock(m_mtx);
@@ -196,7 +188,7 @@ struct rec_info
vm::bptr video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen.
vm::bptr audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz
- std::vector video_ringbuffer;
+ std::vector video_ringbuffer;
std::vector audio_ringbuffer;
usz video_ring_pos = 0;
usz video_ring_frame_count = 0;
@@ -209,9 +201,9 @@ struct rec_info
return pos;
}
- std::shared_ptr image_sink;
+ std::shared_ptr video_sink;
std::shared_ptr encoder;
- std::unique_ptr>> image_provider_thread;
+ std::unique_ptr>> video_provider_thread;
atomic_t paused = false;
s64 last_pts = -1;
@@ -240,9 +232,9 @@ struct rec_info
void set_video_params(s32 video_format);
void set_audio_params(s32 audio_format);
- void start_image_provider();
- void pause_image_provider();
- void stop_image_provider(bool flush);
+ void start_video_provider();
+ void pause_video_provider();
+ void stop_video_provider(bool flush);
};
void rec_info::set_video_params(s32 video_format)
@@ -507,29 +499,29 @@ void rec_info::set_audio_params(s32 audio_format)
cellRec.notice("set_audio_params: audio_format=0x%x, audio_codec_id=%d, sample_rate=%d, audio_bps=%d", audio_format, audio_codec_id, sample_rate, audio_bps);
}
-void rec_info::start_image_provider()
+void rec_info::start_video_provider()
{
const bool was_paused = paused.exchange(false);
utils::video_provider& video_provider = g_fxo->get();
- if (image_provider_thread && was_paused)
+ if (video_provider_thread && was_paused)
{
// Resume
const u64 pause_time_end = get_system_time();
ensure(pause_time_end > pause_time_start);
pause_time_total += (pause_time_end - pause_time_start);
video_provider.set_pause_time(pause_time_total / 1000);
- cellRec.notice("Resuming image provider.");
+ cellRec.notice("Resuming video provider.");
return;
}
- cellRec.notice("Starting image provider.");
+ cellRec.notice("Starting video provider.");
recording_time_start = get_system_time();
pause_time_total = 0;
video_provider.set_pause_time(0);
- image_provider_thread = std::make_unique>>("cellRec Image Provider", [this]()
+ video_provider_thread = std::make_unique>>("cellRec video provider", [this]()
{
const bool use_internal_audio = param.audio_input == CELL_REC_PARAM_AUDIO_INPUT_DISABLE || param.audio_input_mix_vol < 100;
const bool use_external_audio = param.audio_input != CELL_REC_PARAM_AUDIO_INPUT_DISABLE && param.audio_input_mix_vol > 0;
@@ -537,7 +529,7 @@ void rec_info::start_image_provider()
const bool use_ring_buffer = param.ring_sec > 0;
const usz frame_size = input_format.pitch * input_format.height;
- cellRec.notice("image_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_external_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, use_external_video, use_external_audio, use_internal_audio);
+ cellRec.notice("video_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_external_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, use_external_video, use_external_audio, use_internal_audio);
while (thread_ctrl::state() != thread_state::aborting && encoder)
{
@@ -575,7 +567,7 @@ void rec_info::start_image_provider()
{
if (use_ring_buffer)
{
- utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
+ utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
frame_data.pts = pts;
frame_data.width = input_format.width;
frame_data.height = input_format.height;
@@ -595,14 +587,14 @@ void rec_info::start_image_provider()
last_pts = pts;
}
}
- else if (use_ring_buffer && image_sink)
+ else if (use_ring_buffer && video_sink)
{
- utils::image_sink::encoder_frame frame = image_sink->get_frame();
+ utils::video_sink::encoder_frame frame = video_sink->get_frame();
if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_pts && frame.data.size() > 0)
{
ensure(frame.data.size() == frame_size);
- utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
+ utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
frame_data = std::move(frame);
frame_data.pts = pts;
last_pts = pts;
@@ -635,34 +627,34 @@ void rec_info::start_image_provider()
}
// Update recording time
- recording_time_total = encoder->get_timestamp_ms(encoder->last_pts());
+ recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts());
thread_ctrl::wait_for(100);
}
});
}
-void rec_info::pause_image_provider()
+void rec_info::pause_video_provider()
{
cellRec.notice("Pausing image provider.");
- if (image_provider_thread)
+ if (video_provider_thread)
{
paused = true;
pause_time_start = get_system_time();
}
}
-void rec_info::stop_image_provider(bool flush)
+void rec_info::stop_video_provider(bool flush)
{
- cellRec.notice("Stopping image provider.");
+ cellRec.notice("Stopping video provider.");
- if (image_provider_thread)
+ if (video_provider_thread)
{
- auto& thread = *image_provider_thread;
+ auto& thread = *video_provider_thread;
thread = thread_state::aborting;
thread();
- image_provider_thread.reset();
+ video_provider_thread.reset();
}
if (flush && param.ring_sec > 0 && !video_ringbuffer.empty())
@@ -680,7 +672,7 @@ void rec_info::stop_image_provider(bool flush)
for (usz i = 0; i < frame_count; i++)
{
const usz pos = (start_offset + i) % video_ringbuffer.size();
- utils::image_sink::encoder_frame& frame_data = video_ringbuffer[pos];
+ utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos];
encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts));
// TODO: add audio data to encoder
@@ -1073,7 +1065,7 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp
rec.audio_ringbuffer.resize(audio_ring_buffer_size);
rec.audio_ring_step = audio_size_per_sample;
rec.video_ringbuffer.resize(video_ring_buffer_size, {});
- rec.image_sink = std::make_shared();
+ rec.video_sink = std::make_shared();
}
rec.encoder = std::make_shared();
@@ -1082,6 +1074,7 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp
rec.encoder->set_video_bitrate(rec.video_bps);
rec.encoder->set_video_codec(rec.video_codec_id);
rec.encoder->set_sample_rate(rec.sample_rate);
+ rec.encoder->set_audio_channels(rec.channels);
rec.encoder->set_audio_bitrate(rec.audio_bps);
rec.encoder->set_audio_codec(rec.audio_codec_id);
rec.encoder->set_output_format(rec.output_format);
@@ -1114,12 +1107,12 @@ error_code cellRecClose(s32 isDiscard)
if (isDiscard)
{
// No need to flush
- rec.stop_image_provider(false);
+ rec.stop_video_provider(false);
rec.encoder->stop(false);
- if (rec.image_sink)
+ if (rec.video_sink)
{
- rec.image_sink->stop(false);
+ rec.video_sink->stop(false);
}
if (fs::is_file(rec.param.filename))
@@ -1135,18 +1128,18 @@ error_code cellRecClose(s32 isDiscard)
else
{
// Flush to make sure we encode all remaining frames
- rec.stop_image_provider(true);
+ rec.stop_video_provider(true);
rec.encoder->stop(true);
- rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts());
+ rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts());
- if (rec.image_sink)
+ if (rec.video_sink)
{
- rec.image_sink->stop(true);
+ rec.video_sink->stop(true);
}
const s64 start_pts = rec.encoder->get_pts(rec.param.scene_metadata.start_time);
const s64 end_pts = rec.encoder->get_pts(rec.param.scene_metadata.end_time);
- const s64 last_pts = rec.encoder->last_pts();
+ const s64 last_pts = rec.encoder->last_video_pts();
is_valid_range = start_pts >= 0 && end_pts <= last_pts;
}
@@ -1157,7 +1150,7 @@ error_code cellRecClose(s32 isDiscard)
g_fxo->need();
utils::video_provider& video_provider = g_fxo->get();
- // Release the image sink if it was used
+ // Release the video sink if it was used
if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE)
{
const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped);
@@ -1167,15 +1160,15 @@ error_code cellRecClose(s32 isDiscard)
cellRec.error("cellRecClose: Unexpected recording mode %s found while stopping video capture.", old_mode);
}
- if (!video_provider.set_image_sink(nullptr, recording_mode::cell))
+ if (!video_provider.set_video_sink(nullptr, recording_mode::cell))
{
- cellRec.error("cellRecClose failed to release image sink");
+ cellRec.error("cellRecClose failed to release video sink");
}
}
rec.param = {};
rec.encoder.reset();
- rec.image_sink.reset();
+ rec.video_sink.reset();
rec.audio_ringbuffer.clear();
rec.video_ringbuffer.clear();
rec.state = rec_state::closed;
@@ -1207,7 +1200,7 @@ error_code cellRecStop()
sysutil_register_cb([&rec](ppu_thread& ppu) -> s32
{
- // Disable image sink if it was used
+ // Disable video sink if it was used
if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE)
{
const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped);
@@ -1219,12 +1212,12 @@ error_code cellRecStop()
}
// cellRecStop actually just pauses the recording
- rec.pause_image_provider();
+ rec.pause_video_provider();
ensure(!!rec.encoder);
rec.encoder->pause(true);
- rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts());
+ rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts());
rec.state = rec_state::stopped;
rec.cb(ppu, CELL_REC_STATUS_STOP, CELL_OK, rec.cbUserData);
@@ -1254,15 +1247,15 @@ error_code cellRecStart()
g_fxo->need();
utils::video_provider& video_provider = g_fxo->get();
- // Setup an image sink if it is needed
+ // Setup an video sink if it is needed
if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE)
{
if (rec.param.ring_sec <= 0)
{
// Regular recording
- if (!video_provider.set_image_sink(rec.encoder, recording_mode::cell))
+ if (!video_provider.set_video_sink(rec.encoder, recording_mode::cell))
{
- cellRec.error("Failed to set image sink");
+ cellRec.error("Failed to set video sink");
rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData);
return CELL_OK;
}
@@ -1270,9 +1263,9 @@ error_code cellRecStart()
else
{
// Ringbuffer recording
- if (!video_provider.set_image_sink(rec.image_sink, recording_mode::cell))
+ if (!video_provider.set_video_sink(rec.video_sink, recording_mode::cell))
{
- cellRec.error("Failed to set image sink");
+ cellRec.error("Failed to set video sink");
rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData);
return CELL_OK;
}
@@ -1287,7 +1280,7 @@ error_code cellRecStart()
g_recording_mode = recording_mode::stopped;
}
- rec.start_image_provider();
+ rec.start_video_provider();
if (rec.encoder->has_error)
{
diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h
index f1e2e58242..e08e73acfe 100644
--- a/rpcs3/Emu/Io/recording_config.h
+++ b/rpcs3/Emu/Io/recording_config.h
@@ -8,14 +8,31 @@ struct cfg_recording final : cfg::node
bool load();
void save() const;
- cfg::uint<0, 60> framerate{this, "Framerate", 30};
- cfg::uint<0, 7680> width{this, "Width", 1280};
- cfg::uint<0, 4320> height{this, "Height", 720};
- cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P
- cfg::uint<0, 32813> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4
- cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000};
- cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2};
- cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12};
+ struct node_video : cfg::node
+ {
+ node_video(cfg::node* _this) : cfg::node(_this, "Video") {}
+
+ cfg::uint<0, 60> framerate{this, "Framerate", 30};
+ cfg::uint<0, 7680> width{this, "Width", 1280};
+ cfg::uint<0, 4320> height{this, "Height", 720};
+ cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P
+ cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4
+ cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000};
+ cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2};
+ cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12};
+
+ } video{ this };
+
+ struct node_audio : cfg::node
+ {
+ node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {}
+
+ cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86019}; // AVCodecID::AV_CODEC_ID_AC3
+ cfg::uint<0, 8> channels{this, "Channels", 2};
+ cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000};
+ cfg::uint<0, 25000000> sample_rate{this, "Sample Rate", 48000};
+
+ } audio{ this };
const std::string path;
};
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index cc7c60f51d..0d7a9a74ea 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -618,7 +618,7 @@
-
+
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index 5a11ef535f..447430681e 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -2275,7 +2275,7 @@
Utilities
-
+
Utilities
diff --git a/rpcs3/rpcs3qt/gs_frame.cpp b/rpcs3/rpcs3qt/gs_frame.cpp
index f14213cb52..31b81518e5 100644
--- a/rpcs3/rpcs3qt/gs_frame.cpp
+++ b/rpcs3/rpcs3qt/gs_frame.cpp
@@ -12,6 +12,7 @@
#include "Emu/IdManager.h"
#include "Emu/Cell/Modules/cellScreenshot.h"
#include "Emu/Cell/Modules/cellVideoOut.h"
+#include "Emu/Cell/Modules/cellAudio.h"
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/Overlays/overlay_message.h"
#include "Emu/Io/recording_config.h"
@@ -445,9 +446,9 @@ void gs_frame::toggle_recording()
{
m_video_encoder->stop();
- if (!video_provider.set_image_sink(nullptr, recording_mode::rpcs3))
+ if (!video_provider.set_video_sink(nullptr, recording_mode::rpcs3))
{
- gui_log.warning("The video provider could not release the image sink. A sink with higher priority must have been set.");
+ gui_log.warning("The video provider could not release the video sink. A sink with higher priority must have been set.");
}
// Play a sound
@@ -489,21 +490,23 @@ void gs_frame::toggle_recording()
video_path += "recording_" + date_time::current_time_narrow<'_'>() + ".mp4";
utils::video_encoder::frame_format output_format{};
- output_format.av_pixel_format = static_cast(g_cfg_recording.pixel_format.get());
- output_format.width = g_cfg_recording.width;
- output_format.height = g_cfg_recording.height;
- output_format.pitch = g_cfg_recording.width * 4;
+ output_format.av_pixel_format = static_cast(g_cfg_recording.video.pixel_format.get());
+ output_format.width = g_cfg_recording.video.width;
+ output_format.height = g_cfg_recording.video.height;
+ output_format.pitch = g_cfg_recording.video.width * 4;
m_video_encoder->set_path(video_path);
- m_video_encoder->set_framerate(g_cfg_recording.framerate);
- m_video_encoder->set_video_bitrate(g_cfg_recording.video_bps);
- m_video_encoder->set_video_codec(g_cfg_recording.video_codec);
- m_video_encoder->set_max_b_frames(g_cfg_recording.max_b_frames);
- m_video_encoder->set_gop_size(g_cfg_recording.gop_size);
+ m_video_encoder->set_framerate(g_cfg_recording.video.framerate);
+ m_video_encoder->set_video_bitrate(g_cfg_recording.video.video_bps);
+ m_video_encoder->set_video_codec(g_cfg_recording.video.video_codec);
+ m_video_encoder->set_max_b_frames(g_cfg_recording.video.max_b_frames);
+ m_video_encoder->set_gop_size(g_cfg_recording.video.gop_size);
m_video_encoder->set_output_format(output_format);
- m_video_encoder->set_sample_rate(0); // TODO
- m_video_encoder->set_audio_bitrate(0); // TODO
- m_video_encoder->set_audio_codec(0); // TODO
+ m_video_encoder->set_sample_rate(g_cfg_recording.audio.sample_rate);
+ //m_video_encoder->set_audio_channels(static_cast(g_fxo->get().cfg.backend_ch_cnt));
+ m_video_encoder->set_audio_channels(static_cast(g_fxo->get().cfg.audio_channels));
+ m_video_encoder->set_audio_bitrate(g_cfg_recording.audio.audio_bps);
+ m_video_encoder->set_audio_codec(g_cfg_recording.audio.audio_codec);
m_video_encoder->encode();
if (m_video_encoder->has_error)
@@ -513,9 +516,9 @@ void gs_frame::toggle_recording()
return;
}
- if (!video_provider.set_image_sink(m_video_encoder, recording_mode::rpcs3))
+ if (!video_provider.set_video_sink(m_video_encoder, recording_mode::rpcs3))
{
- gui_log.warning("The video provider could not set the image sink. A sink with higher priority must have been set.");
+ gui_log.warning("The video provider could not set the video sink. A sink with higher priority must have been set.");
rsx::overlays::queue_message(tr("Recording not possible").toStdString());
m_video_encoder->stop();
return;
diff --git a/rpcs3/util/image_sink.h b/rpcs3/util/image_sink.h
deleted file mode 100644
index 3c23eca514..0000000000
--- a/rpcs3/util/image_sink.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-
-#include "util/types.hpp"
-#include "util/atomic.hpp"
-#include "Utilities/mutex.h"
-
-#include
-#include
-
-namespace utils
-{
- class image_sink
- {
- public:
- image_sink() = default;
-
- virtual void stop(bool flush = true) = 0;
- virtual void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) = 0;
-
- s64 get_pts(usz timestamp_ms) const
- {
- return static_cast(std::round((timestamp_ms * m_framerate) / 1000.f));
- }
-
- usz get_timestamp_ms(s64 pts) const
- {
- return static_cast(std::round((pts * 1000) / static_cast(m_framerate)));
- }
-
- atomic_t has_error{false};
-
- struct encoder_frame
- {
- encoder_frame() = default;
- encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data)
- : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data))
- {}
-
- s64 pts = -1; // Optional
- usz timestamp_ms = 0;
- u32 pitch = 0;
- u32 width = 0;
- u32 height = 0;
- s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat
- std::vector data;
- };
-
- protected:
- shared_mutex m_mtx;
- std::deque m_frames_to_encode;
- atomic_t m_flush = false;
- u32 m_framerate = 0;
- };
-}
diff --git a/rpcs3/util/media_utils.cpp b/rpcs3/util/media_utils.cpp
index 3dddd5b0f8..cc6207d369 100644
--- a/rpcs3/util/media_utils.cpp
+++ b/rpcs3/util/media_utils.cpp
@@ -32,6 +32,28 @@ LOG_CHANNEL(media_log, "Media");
namespace utils
{
+ template
+ static inline void write_byteswapped(const u8* src, u8* dst)
+ {
+ *reinterpret_cast(dst) = *reinterpret_cast*>(src);
+ }
+
+ template
+ static inline void copy_samples(const u8* src, u8* dst, usz sample_count, bool swap_endianness)
+ {
+ if (swap_endianness)
+ {
+ for (usz i = 0; i < sample_count; i++)
+ {
+ write_byteswapped(src + i * sizeof(T), dst + i * sizeof(T));
+ }
+ }
+ else
+ {
+ std::memcpy(dst, src, sample_count * sizeof(T));
+ }
+ }
+
template <>
std::string media_info::get_metadata(const std::string& key, const std::string& def) const
{
@@ -204,11 +226,19 @@ namespace utils
struct scoped_av
{
- AVFormatContext* format = nullptr;
- const AVCodec* codec = nullptr;
- AVCodecContext* context = nullptr;
- AVFrame* frame = nullptr;
- AVStream* stream = nullptr;
+ struct ctx
+ {
+ const AVCodec* codec = nullptr;
+ AVCodecContext* context = nullptr;
+ AVStream* stream = nullptr;
+ AVPacket* packet = nullptr;
+ AVFrame* frame = nullptr;
+ };
+
+ ctx audio{};
+ ctx video{};
+
+ AVFormatContext* format_context = nullptr;
SwrContext* swr = nullptr;
SwsContext* sws = nullptr;
std::function kill_callback = nullptr;
@@ -216,21 +246,38 @@ namespace utils
~scoped_av()
{
// Clean up
- if (frame)
+ if (audio.frame)
{
- av_frame_unref(frame);
- av_frame_free(&frame);
+ av_frame_unref(audio.frame);
+ av_frame_free(&audio.frame);
+ }
+ if (video.frame)
+ {
+ av_frame_unref(video.frame);
+ av_frame_free(&video.frame);
+ }
+ if (audio.packet)
+ {
+ av_packet_unref(audio.packet);
+ av_packet_free(&audio.packet);
+ }
+ if (video.packet)
+ {
+ av_packet_unref(video.packet);
+ av_packet_free(&video.packet);
}
if (swr)
swr_free(&swr);
if (sws)
sws_freeContext(sws);
- if (context)
- avcodec_close(context);
+ if (audio.context)
+ avcodec_close(audio.context);
+ if (video.context)
+ avcodec_close(video.context);
// AVCodec is managed by libavformat, no need to free it
// see: https://stackoverflow.com/a/18047320
- if (format)
- avformat_free_context(format);
+ if (format_context)
+ avformat_free_context(format_context);
//if (stream)
// av_free(stream);
if (kill_callback)
@@ -238,6 +285,53 @@ namespace utils
}
};
+ // check that a given sample format is supported by the encoder
+ static bool check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt)
+ {
+ for (const AVSampleFormat* p = codec->sample_fmts; p && *p != AV_SAMPLE_FMT_NONE; p++)
+ {
+ if (*p == sample_fmt)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // just pick the highest supported samplerate
+ static int select_sample_rate(const AVCodec* codec)
+ {
+ if (!codec->supported_samplerates)
+ return 44100;
+
+ int best_samplerate = 0;
+ for (const int* samplerate = codec->supported_samplerates; samplerate && *samplerate != 0; samplerate++)
+ {
+ if (!best_samplerate || abs(44100 - *samplerate) < abs(44100 - best_samplerate))
+ {
+ best_samplerate = *samplerate;
+ }
+ }
+ return best_samplerate;
+ }
+
+ // select layout with the highest channel count
+ static const AVChannelLayout* select_channel_layout(const AVCodec* codec, int channels)
+ {
+ constexpr AVChannelLayout empty_ch_layout = {};
+
+ for (const AVChannelLayout* ch_layout = codec->ch_layouts;
+ ch_layout && memcmp(ch_layout, &empty_ch_layout, sizeof(AVChannelLayout)) != 0;
+ ch_layout++)
+ {
+ if (ch_layout->nb_channels == channels)
+ {
+ return ch_layout;
+ }
+ }
+ return nullptr;
+ }
+
audio_decoder::audio_decoder()
{
}
@@ -295,14 +389,14 @@ namespace utils
scoped_av av;
// Get format from audio file
- av.format = avformat_alloc_context();
- if (int err = avformat_open_input(&av.format, path.c_str(), nullptr, nullptr); err < 0)
+ av.format_context = avformat_alloc_context();
+ if (int err = avformat_open_input(&av.format_context, path.c_str(), nullptr, nullptr); err < 0)
{
media_log.error("audio_decoder: Could not open file '%s'. Error: %d='%s'", path, err, av_error_to_string(err));
has_error = true;
return;
}
- if (int err = avformat_find_stream_info(av.format, nullptr); err < 0)
+ if (int err = avformat_find_stream_info(av.format_context, nullptr); err < 0)
{
media_log.error("audio_decoder: Could not retrieve stream info from file '%s'. Error: %d='%s'", path, err, av_error_to_string(err));
has_error = true;
@@ -312,11 +406,11 @@ namespace utils
// Find the first audio stream
AVStream* stream = nullptr;
unsigned int stream_index;
- for (stream_index = 0; stream_index < av.format->nb_streams; stream_index++)
+ for (stream_index = 0; stream_index < av.format_context->nb_streams; stream_index++)
{
- if (av.format->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
+ if (av.format_context->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
- stream = av.format->streams[stream_index];
+ stream = av.format_context->streams[stream_index];
break;
}
}
@@ -328,8 +422,8 @@ namespace utils
}
// Find decoder
- av.codec = avcodec_find_decoder(stream->codecpar->codec_id);
- if (!av.codec)
+ av.audio.codec = avcodec_find_decoder(stream->codecpar->codec_id);
+ if (!av.audio.codec)
{
media_log.error("audio_decoder: Failed to find decoder for stream #%u in file '%s'", stream_index, path);
has_error = true;
@@ -337,8 +431,8 @@ namespace utils
}
// Allocate context
- av.context = avcodec_alloc_context3(av.codec);
- if (!av.context)
+ av.audio.context = avcodec_alloc_context3(av.audio.codec);
+ if (!av.audio.context)
{
media_log.error("audio_decoder: Failed to allocate context for stream #%u in file '%s'", stream_index, path);
has_error = true;
@@ -346,7 +440,7 @@ namespace utils
}
// Open decoder
- if (int err = avcodec_open2(av.context, av.codec, nullptr); err < 0)
+ if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err < 0)
{
media_log.error("audio_decoder: Failed to open decoder for stream #%u in file '%s'. Error: %d='%s'", stream_index, path, err, av_error_to_string(err));
has_error = true;
@@ -389,8 +483,8 @@ namespace utils
}
// Prepare to read data
- av.frame = av_frame_alloc();
- if (!av.frame)
+ av.audio.frame = av_frame_alloc();
+ if (!av.audio.frame)
{
media_log.error("audio_decoder: Error allocating the frame");
has_error = true;
@@ -403,9 +497,9 @@ namespace utils
std::unique_ptr packet_(packet);
// Iterate through frames
- while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format, packet) >= 0)
+ while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format_context, packet) >= 0)
{
- if (int err = avcodec_send_packet(av.context, packet); err < 0)
+ if (int err = avcodec_send_packet(av.audio.context, packet); err < 0)
{
media_log.error("audio_decoder: Queuing error: %d='%s'", err, av_error_to_string(err));
has_error = true;
@@ -414,7 +508,7 @@ namespace utils
while (thread_ctrl::state() != thread_state::aborting)
{
- if (int err = avcodec_receive_frame(av.context, av.frame); err < 0)
+ if (int err = avcodec_receive_frame(av.audio.context, av.audio.frame); err < 0)
{
if (err == AVERROR(EAGAIN) || err == averror_eof)
break;
@@ -427,7 +521,7 @@ namespace utils
// Resample frames
u8* buffer;
const int align = 1;
- const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.frame->nb_samples, dst_format, align);
+ const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.audio.frame->nb_samples, dst_format, align);
if (buffer_size < 0)
{
media_log.error("audio_decoder: Error allocating buffer: %d='%s'", buffer_size, av_error_to_string(buffer_size));
@@ -435,7 +529,7 @@ namespace utils
return;
}
- const int frame_count = swr_convert(av.swr, &buffer, av.frame->nb_samples, const_cast(av.frame->data), av.frame->nb_samples);
+ const int frame_count = swr_convert(av.swr, &buffer, av.audio.frame->nb_samples, const_cast(av.audio.frame->data), av.audio.frame->nb_samples);
if (frame_count < 0)
{
media_log.error("audio_decoder: Error converting frame: %d='%s'", frame_count, av_error_to_string(frame_count));
@@ -450,25 +544,10 @@ namespace utils
std::scoped_lock lock(m_mtx);
data.resize(m_size + buffer_size);
- if (m_swap_endianness)
- {
- // The format is float 32bit per channel.
- const auto write_byteswapped = [](const void* src, void* dst) -> void
- {
- *static_cast(dst) = *static_cast*>(src);
- };
+ // The format is float 32bit per channel.
+ copy_samples(buffer, &data[m_size], buffer_size / sizeof(f32), m_swap_endianness);
- for (size_t i = 0; i < (buffer_size - sizeof(f32)); i += sizeof(f32))
- {
- write_byteswapped(buffer + i, data.data() + m_size + i);
- }
- }
- else
- {
- memcpy(&data[m_size], buffer, buffer_size);
- }
-
- const s64 timestamp_ms = stream->time_base.den ? (1000 * av.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0;
+ const s64 timestamp_ms = stream->time_base.den ? (1000 * av.audio.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0;
timestamps_ms.push_back({m_size, timestamp_ms});
m_size += buffer_size;
}
@@ -476,7 +555,7 @@ namespace utils
if (buffer)
av_free(buffer);
- media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.frame->best_effort_timestamp);
+ media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.audio.frame->best_effort_timestamp);
}
}
};
@@ -535,7 +614,7 @@ namespace utils
}
video_encoder::video_encoder()
- : utils::image_sink()
+ : utils::video_sink()
{
}
@@ -549,9 +628,9 @@ namespace utils
return m_path;
}
- s64 video_encoder::last_pts() const
+ s64 video_encoder::last_video_pts() const
{
- return m_last_pts;
+ return m_last_video_pts;
}
void video_encoder::set_path(const std::string& path)
@@ -594,6 +673,11 @@ namespace utils
m_sample_rate = sample_rate;
}
+ void video_encoder::set_audio_channels(u32 channels)
+ {
+ m_channels = channels;
+ }
+
void video_encoder::set_audio_bitrate(u32 bitrate)
{
m_audio_bitrate_bps = bitrate;
@@ -604,16 +688,6 @@ namespace utils
m_audio_codec_id = codec_id;
}
- void video_encoder::add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms)
- {
- // Do not allow new frames while flushing
- if (m_flush)
- return;
-
- std::lock_guard lock(m_mtx);
- m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
- }
-
void video_encoder::pause(bool flush)
{
if (m_thread)
@@ -658,6 +732,7 @@ namespace utils
std::lock_guard lock(m_mtx);
m_frames_to_encode.clear();
+ m_samples_to_encode.clear();
has_error = false;
m_flush = false;
m_paused = false;
@@ -675,7 +750,8 @@ namespace utils
return;
}
- m_last_pts = 0;
+ m_last_audio_pts = 0;
+ m_last_video_pts = 0;
stop();
@@ -692,7 +768,21 @@ namespace utils
{
m_running = true;
- // TODO: audio encoding
+ av_log_set_callback([](void* avcl, int level, const char* fmt, va_list vl) -> void
+ {
+ constexpr int line_size = 1024;
+ char line[line_size]{};
+ int print_prefix = 1;
+
+ if (int err = av_log_format_line2(avcl, level, fmt, vl, line, line_size, &print_prefix); err < 0)
+ {
+ media_log.error("av_log: av_log_format_line2 failed. Error: %d='%s'", err, av_error_to_string(err));
+ return;
+ }
+
+ media_log.error("av_log: %s", line);
+ });
+ av_log_set_level(AV_LOG_TRACE);
// Reset variables at all costs
scoped_av av;
@@ -702,38 +792,38 @@ namespace utils
m_running = false;
};
- const AVPixelFormat out_format = static_cast(m_out_format.av_pixel_format);
- const char* av_output_format = nullptr;
-
- const auto find_format = [&](const AVCodec* codec) -> const char*
+ // Let's list the encoders first
+ std::vector audio_codecs;
+ std::vector video_codecs;
+ void* opaque = nullptr;
+ while (const AVCodec* codec = av_codec_iterate(&opaque))
{
- if (!codec)
- return nullptr;
+ if (codec->type == AVMediaType::AVMEDIA_TYPE_AUDIO)
+ {
+ media_log.notice("video_encoder: Found audio codec %d = %s", static_cast(codec->id), codec->name);
+ audio_codecs.push_back(codec);
+ }
+ else if (codec->type == AVMediaType::AVMEDIA_TYPE_VIDEO)
+ {
+ media_log.notice("video_encoder: Found video codec %d = %s", static_cast(codec->id), codec->name);
+ video_codecs.push_back(codec);
+ }
+ }
+ const AVPixelFormat out_pix_format = static_cast(m_out_format.av_pixel_format);
+
+ const auto find_format = [&](AVCodecID video_codec, AVCodecID audio_codec) -> const AVOutputFormat*
+ {
// Try to find a preferable output format
std::vector oformats;
void* opaque = nullptr;
for (const AVOutputFormat* oformat = av_muxer_iterate(&opaque); !!oformat; oformat = av_muxer_iterate(&opaque))
{
- if (avformat_query_codec(oformat, codec->id, FF_COMPLIANCE_STRICT) == 1)
+ if (avformat_query_codec(oformat, video_codec, FF_COMPLIANCE_STRICT) == 1 &&
+ avformat_query_codec(oformat, audio_codec, FF_COMPLIANCE_STRICT) == 1)
{
- media_log.notice("video_encoder: Found output format '%s'", oformat->name);
-
- switch (codec->id)
- {
- case AV_CODEC_ID_MPEG4:
- if (strcmp(oformat->name, "avi") == 0)
- return oformat->name;
- break;
- case AV_CODEC_ID_H264:
- case AV_CODEC_ID_MJPEG:
- // TODO
- break;
- default:
- break;
- }
-
+ media_log.notice("video_encoder: Found output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(video_codec), static_cast(audio_codec));
oformats.push_back(oformat);
}
}
@@ -742,168 +832,294 @@ namespace utils
if (!oformats.empty() && oformats.front())
{
const AVOutputFormat* oformat = oformats.front();
- media_log.notice("video_encoder: Falling back to output format '%s'", oformat->name);
- return oformat->name;
+ media_log.notice("video_encoder: Falling back to output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(video_codec), static_cast(audio_codec));
+ return oformat;
}
return nullptr;
};
- AVCodecID used_codec = static_cast(m_video_codec_id);
+ const AVOutputFormat* out_format = find_format(static_cast(m_video_codec_id), static_cast(m_audio_codec_id));
- // Find specified codec first
- if (const AVCodec* encoder = avcodec_find_encoder(used_codec); !!encoder)
+ if (out_format)
{
- media_log.success("video_encoder: Found requested video_codec %d = %s", static_cast(used_codec), encoder->name);
- av_output_format = find_format(encoder);
-
- if (av_output_format)
- {
- media_log.success("video_encoder: Found requested output format '%s'", av_output_format);
- }
- else
- {
- media_log.error("video_encoder: Could not find a format for the requested video_codec %d = %s", static_cast(used_codec), encoder->name);
- }
+ media_log.success("video_encoder: Found requested output format '%s'", out_format->name);
}
else
{
- media_log.error("video_encoder: Could not find requested video_codec %d", static_cast(used_codec));
- }
+ media_log.error("video_encoder: Could not find a format for the requested video_codec %d and audio_codec %d", m_video_codec_id, m_audio_codec_id);
- // Fallback to some other codec
- if (!av_output_format)
- {
- void* opaque = nullptr;
- for (const AVCodec* codec = av_codec_iterate(&opaque); !!codec; codec = av_codec_iterate(&opaque))
+ // Fallback to some other codec
+ for (const AVCodec* video_codec : video_codecs)
{
- if (av_codec_is_encoder(codec))
+ for (const AVCodec* audio_codec : audio_codecs)
{
- media_log.notice("video_encoder: Found video_codec %d = %s", static_cast(codec->id), codec->name);
- av_output_format = find_format(codec);
+ out_format = find_format(video_codec->id, audio_codec->id);
- if (av_output_format)
+ if (out_format)
{
- media_log.success("video_encoder: Found fallback output format '%s'", av_output_format);
+ media_log.success("video_encoder: Found fallback output format '%s'", out_format->name);
break;
}
}
+
+ if (out_format)
+ {
+ break;
+ }
}
}
- if (!av_output_format)
+ if (!out_format)
{
media_log.error("video_encoder: Could not find any output format");
has_error = true;
return;
}
- if (int err = avformat_alloc_output_context2(&av.format, nullptr, av_output_format, path.c_str()); err < 0)
+ if (int err = avformat_alloc_output_context2(&av.format_context, out_format, nullptr, nullptr); err < 0)
{
- media_log.error("video_encoder: avformat_alloc_output_context2 failed. Error: %d='%s'", err, av_error_to_string(err));
+ media_log.error("video_encoder: avformat_alloc_output_context2 for '%s' failed. Error: %d='%s'", out_format->name, err, av_error_to_string(err));
has_error = true;
return;
}
- if (!av.format)
+ if (!av.format_context)
{
media_log.error("video_encoder: avformat_alloc_output_context2 failed");
has_error = true;
return;
}
- if (!(av.codec = avcodec_find_encoder(av.format->oformat->video_codec)))
+ const auto create_context = [this, &av](AVCodecID codec_id, bool is_video) -> bool
+ {
+ const std::string type = is_video ? "video" : "audio";
+ scoped_av::ctx& ctx = is_video ? av.video : av.audio;
+
+ if (is_video)
+ {
+ if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->video_codec)))
+ {
+ media_log.error("video_encoder: avcodec_find_encoder for video failed. video_codev=%d", static_cast(av.format_context->oformat->video_codec));
+ return false;
+ }
+ }
+ else
+ {
+ if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->audio_codec)))
+ {
+ media_log.error("video_encoder: avcodec_find_encoder for audio failed. audio_codec=%d", static_cast(av.format_context->oformat->audio_codec));
+ return false;
+ }
+ }
+
+ if (!(ctx.stream = avformat_new_stream(av.format_context, nullptr)))
+ {
+ media_log.error("video_encoder: avformat_new_stream for %s failed", type);
+ return false;
+ }
+
+ ctx.stream->id = is_video ? 0 : 1;
+
+ if (!(ctx.context = avcodec_alloc_context3(ctx.codec)))
+ {
+ media_log.error("video_encoder: avcodec_alloc_context3 for %s failed", type);
+ return false;
+ }
+
+ if (av.format_context->oformat->flags & AVFMT_GLOBALHEADER)
+ {
+ ctx.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+ }
+
+ return true;
+ };
+
+ if (!create_context(static_cast(m_video_codec_id), true))
{
- media_log.error("video_encoder: avcodec_find_encoder failed");
has_error = true;
return;
}
- if (!(av.stream = avformat_new_stream(av.format, nullptr)))
+ if (!create_context(static_cast(m_audio_codec_id), false))
{
- media_log.error("video_encoder: avformat_new_stream failed");
has_error = true;
return;
}
- av.stream->id = static_cast(av.format->nb_streams - 1);
+ media_log.error("video_encoder: using audio_codec = %d", static_cast(av.format_context->oformat->audio_codec));
+ media_log.error("video_encoder: using sample_rate = %d", m_sample_rate);
+ media_log.error("video_encoder: using audio_bitrate = %d", m_audio_bitrate_bps);
+ media_log.error("video_encoder: using audio channels = %d", m_channels);
+ media_log.error("video_encoder: using video_codec = %d", static_cast(av.format_context->oformat->video_codec));
+ media_log.error("video_encoder: using video_bitrate = %d", m_video_bitrate_bps);
+ media_log.error("video_encoder: using out width = %d", m_out_format.width);
+ media_log.error("video_encoder: using out height = %d", m_out_format.height);
+ media_log.error("video_encoder: using framerate = %d", m_framerate);
+ media_log.error("video_encoder: using gop_size = %d", m_gop_size);
+ media_log.error("video_encoder: using max_b_frames = %d", m_max_b_frames);
- if (!(av.context = avcodec_alloc_context3(av.codec)))
+ // select audio parameters supported by the encoder
+ if (av.audio.context)
{
- media_log.error("video_encoder: avcodec_alloc_context3 failed");
- has_error = true;
- return;
+ if (const AVChannelLayout* ch_layout = select_channel_layout(av.audio.codec, m_channels))
+ {
+ if (int err = av_channel_layout_copy(&av.audio.context->ch_layout, ch_layout); err != 0)
+ {
+ media_log.error("video_encoder: av_channel_layout_copy failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+ }
+ else
+ {
+ media_log.error("video_encoder: select_channel_layout returned nullptr");
+ has_error = true;
+ return;
+ }
+
+ m_sample_rate = select_sample_rate(av.audio.codec);
+
+ av.audio.context->codec_id = av.format_context->oformat->audio_codec;
+ av.audio.context->codec_type = AVMEDIA_TYPE_AUDIO;
+ av.audio.context->bit_rate = m_audio_bitrate_bps;
+ av.audio.context->sample_rate = m_sample_rate;
+ av.audio.context->time_base = {.num = 1, .den = av.audio.context->sample_rate};
+ av.audio.context->sample_fmt = AV_SAMPLE_FMT_FLTP; // AV_SAMPLE_FMT_FLT is not supported in regular AC3
+ av.audio.stream->time_base = av.audio.context->time_base;
+
+ // check that the encoder supports the format
+ if (!check_sample_fmt(av.audio.codec, av.audio.context->sample_fmt))
+ {
+ media_log.error("video_encoder: Audio encoder does not support sample format %s", av_get_sample_fmt_name(av.audio.context->sample_fmt));
+ has_error = true;
+ return;
+ }
+
+ if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err != 0)
+ {
+ media_log.error("video_encoder: avcodec_open2 for audio failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (!(av.audio.packet = av_packet_alloc()))
+ {
+ media_log.error("video_encoder: av_packet_alloc for audio packet failed");
+ has_error = true;
+ return;
+ }
+
+ if (!(av.audio.frame = av_frame_alloc()))
+ {
+ media_log.error("video_encoder: av_frame_alloc for audio frame failed");
+ has_error = true;
+ return;
+ }
+
+ av.audio.frame->format = AV_SAMPLE_FMT_FLTP;
+ av.audio.frame->nb_samples = av.audio.context->frame_size;
+
+ if (int err = av_channel_layout_copy(&av.audio.frame->ch_layout, &av.audio.context->ch_layout); err < 0)
+ {
+ media_log.error("video_encoder: av_channel_layout_copy for audio frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (int err = av_frame_get_buffer(av.audio.frame, 0); err < 0)
+ {
+ media_log.error("video_encoder: av_frame_get_buffer for audio frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (int err = avcodec_parameters_from_context(av.audio.stream->codecpar, av.audio.context); err < 0)
+ {
+ media_log.error("video_encoder: avcodec_parameters_from_context for audio failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
}
- media_log.notice("video_encoder: using video_codec = %d", static_cast(av.format->oformat->video_codec));
- media_log.notice("video_encoder: using video_bitrate = %d", m_video_bitrate_bps);
- media_log.notice("video_encoder: using out width = %d", m_out_format.width);
- media_log.notice("video_encoder: using out height = %d", m_out_format.height);
- media_log.notice("video_encoder: using framerate = %d", m_framerate);
- media_log.notice("video_encoder: using gop_size = %d", m_gop_size);
- media_log.notice("video_encoder: using max_b_frames = %d", m_max_b_frames);
-
- av.context->codec_id = av.format->oformat->video_codec;
- av.context->bit_rate = m_video_bitrate_bps;
- av.context->width = static_cast(m_out_format.width);
- av.context->height = static_cast(m_out_format.height);
- av.context->time_base = {.num = 1, .den = static_cast(m_framerate)};
- av.context->framerate = {.num = static_cast(m_framerate), .den = 1};
- av.context->pix_fmt = out_format;
- av.context->gop_size = m_gop_size;
- av.context->max_b_frames = m_max_b_frames;
-
- if (av.format->oformat->flags & AVFMT_GLOBALHEADER)
+ // select video parameters supported by the encoder
+ if (av.video.context)
{
- av.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+ av.video.context->codec_id = av.format_context->oformat->video_codec;
+ av.video.context->codec_type = AVMEDIA_TYPE_VIDEO;
+ av.video.context->frame_number = 0;
+ av.video.context->bit_rate = m_video_bitrate_bps;
+ av.video.context->width = static_cast(m_out_format.width);
+ av.video.context->height = static_cast(m_out_format.height);
+ av.video.context->time_base = {.num = 1, .den = static_cast(m_framerate)};
+ av.video.context->framerate = {.num = static_cast(m_framerate), .den = 1};
+ av.video.context->pix_fmt = out_pix_format;
+ av.video.context->gop_size = m_gop_size;
+ av.video.context->max_b_frames = m_max_b_frames;
+ av.video.stream->time_base = av.video.context->time_base;
+
+ if (int err = avcodec_open2(av.video.context, av.video.codec, nullptr); err != 0)
+ {
+ media_log.error("video_encoder: avcodec_open2 for video failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (!(av.video.packet = av_packet_alloc()))
+ {
+ media_log.error("video_encoder: av_packet_alloc for video packet failed");
+ has_error = true;
+ return;
+ }
+
+ if (!(av.video.frame = av_frame_alloc()))
+ {
+ media_log.error("video_encoder: av_frame_alloc for video frame failed");
+ has_error = true;
+ return;
+ }
+
+ av.video.frame->format = av.video.context->pix_fmt;
+ av.video.frame->width = av.video.context->width;
+ av.video.frame->height = av.video.context->height;
+
+ if (int err = av_frame_get_buffer(av.video.frame, 0); err < 0)
+ {
+ media_log.error("video_encoder: av_frame_get_buffer for video frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (int err = avcodec_parameters_from_context(av.video.stream->codecpar, av.video.context); err < 0)
+ {
+ media_log.error("video_encoder: avcodec_parameters_from_context for video failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
}
- if (int err = avcodec_open2(av.context, av.codec, nullptr); err != 0)
+ media_log.error("video_encoder: av_dump_format");
+ for (u32 i = 0; i < av.format_context->nb_streams; i++)
{
- media_log.error("video_encoder: avcodec_open2 failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- return;
+ av_dump_format(av.format_context, i, path.c_str(), 1);
}
- if (!(av.frame = av_frame_alloc()))
+ // open the output file, if needed
+ if (!(av.format_context->flags & AVFMT_NOFILE))
{
- media_log.error("video_encoder: av_frame_alloc failed");
- has_error = true;
- return;
+ if (int err = avio_open(&av.format_context->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0)
+ {
+ media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
}
- av.frame->format = av.context->pix_fmt;
- av.frame->width = av.context->width;
- av.frame->height = av.context->height;
-
- if (int err = av_frame_get_buffer(av.frame, 32); err < 0)
- {
- media_log.error("video_encoder: av_frame_get_buffer failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- return;
- }
-
- if (int err = avcodec_parameters_from_context(av.stream->codecpar, av.context); err < 0)
- {
- media_log.error("video_encoder: avcodec_parameters_from_context failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- return;
- }
-
- av_dump_format(av.format, 0, path.c_str(), 1);
-
- if (int err = avio_open(&av.format->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0)
- {
- media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- return;
- }
-
- if (int err = avformat_write_header(av.format, nullptr); err < 0)
+ if (int err = avformat_write_header(av.format_context, nullptr); err < 0)
{
media_log.error("video_encoder: avformat_write_header failed. Error: %d='%s'", err, av_error_to_string(err));
- if (int err = avio_close(av.format->pb); err != 0)
+ if (int err = avio_close(av.format_context->pb); err != 0)
{
media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err));
}
@@ -912,21 +1128,11 @@ namespace utils
return;
}
- const auto flush = [&]()
+ const auto flush = [&](scoped_av::ctx& ctx)
{
- while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error)
+ while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error && ctx.context)
{
- AVPacket* packet = av_packet_alloc();
- std::unique_ptr packet_(packet);
-
- if (!packet)
- {
- media_log.error("video_encoder: av_packet_alloc failed");
- has_error = true;
- return;
- }
-
- if (int err = avcodec_receive_packet(av.context, packet); err < 0)
+ if (int err = avcodec_receive_packet(ctx.context, ctx.packet); err < 0)
{
if (err == AVERROR(EAGAIN) || err == averror_eof)
break;
@@ -936,133 +1142,361 @@ namespace utils
return;
}
- av_packet_rescale_ts(packet, av.context->time_base, av.stream->time_base);
- packet->stream_index = av.stream->index;
+ av_packet_rescale_ts(ctx.packet, ctx.context->time_base, ctx.stream->time_base);
+ ctx.packet->stream_index = ctx.stream->index;
- if (int err = av_interleaved_write_frame(av.format, packet); err < 0)
+ if (int err = av_write_frame(av.format_context, ctx.packet); err < 0)
{
- media_log.error("video_encoder: av_interleaved_write_frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ media_log.error("video_encoder: av_write_frame failed. Error: %d='%s'", err, av_error_to_string(err));
has_error = true;
return;
}
}
};
- s64 last_pts = -1;
+ u32 audio_sample_remainder = 0;
+ s64 last_audio_pts = -1;
+ s64 last_audio_frame_pts = 0;
+ s64 last_video_pts = -1;
+
+ // Allocate audio buffer for our audio frame
+ std::vector audio_samples;
+ u32 audio_samples_sample_count = 0;
+ const bool sample_fmt_is_planar = av.audio.context && av_sample_fmt_is_planar(av.audio.context->sample_fmt) != 0;
+ const int sample_fmt_bytes = av.audio.context ? av_get_bytes_per_sample(av.audio.context->sample_fmt) : 0;
+ ensure(sample_fmt_bytes == sizeof(f32)); // We only support FLT or FLTP for now
+
+ if (av.audio.frame)
+ {
+ audio_samples.resize(av.audio.frame->nb_samples * av.audio.frame->ch_layout.nb_channels * sizeof(f32));
+ last_audio_frame_pts -= av.audio.frame->nb_samples;
+ }
+
+ encoder_sample last_samples;
+ u32 leftover_sample_count = 0;
while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error)
{
+ // Fetch video frame
encoder_frame frame_data;
+ bool got_frame = false;
{
m_mtx.lock();
if (m_frames_to_encode.empty())
{
m_mtx.unlock();
+ }
+ else
+ {
+ frame_data = std::move(m_frames_to_encode.front());
+ m_frames_to_encode.pop_front();
+ m_mtx.unlock();
- if (m_flush)
+ // Calculate presentation timestamp.
+ const s64 pts = get_pts(frame_data.timestamp_ms);
+
+ // We need to skip this frame if it has the same timestamp.
+ if (pts <= last_video_pts)
{
- m_flush = false;
+ media_log.trace("video_encoder: skipping frame. last_pts=%d, pts=%d", last_video_pts, pts);
+ }
+ else if (av.video.context)
+ {
+ media_log.trace("video_encoder: adding new frame. timestamp=%d", frame_data.timestamp_ms);
- if (!m_paused)
+ got_frame = true;
+
+ if (int err = av_frame_make_writable(av.video.frame); err < 0)
{
- // We only stop the thread after a flush if we are not paused
+ media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
break;
}
+
+ u8* in_data[4]{};
+ int in_line[4]{};
+
+ const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format);
+
+ if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0)
+ {
+ fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret));
+ }
+
+ if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0)
+ {
+ fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret));
+ }
+
+ // Update the context in case the frame format has changed
+ av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format,
+ av.video.context->width, av.video.context->height, out_pix_format, SWS_BICUBIC, nullptr, nullptr, nullptr);
+ if (!av.sws)
+ {
+ media_log.error("video_encoder: sws_getCachedContext failed");
+ has_error = true;
+ break;
+ }
+
+ if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.video.frame->data, av.video.frame->linesize); err < 0)
+ {
+ media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ break;
+ }
+
+ av.video.frame->pts = pts;
+
+ if (int err = avcodec_send_frame(av.video.context, av.video.frame); err < 0)
+ {
+ media_log.error("video_encoder: avcodec_send_frame for video failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ break;
+ }
+
+ flush(av.video);
+
+ last_video_pts = av.video.frame->pts;
+ m_last_video_pts = last_video_pts;
+ }
+ }
+ }
+
+ // Fetch audio sample
+ encoder_sample sample_data;
+ bool got_sample = false;
+ {
+ m_audio_mtx.lock();
+
+ if (m_samples_to_encode.empty())
+ {
+ m_audio_mtx.unlock();
+ }
+ else
+ {
+ sample_data = std::move(m_samples_to_encode.front());
+ m_samples_to_encode.pop_front();
+ m_audio_mtx.unlock();
+
+ if (sample_data.channels != av.audio.frame->ch_layout.nb_channels)
+ {
+ fmt::throw_exception("video_encoder: Audio sample channel count %d does not match frame channel count %d", sample_data.channels, av.audio.frame->ch_layout.nb_channels);
}
- // We only actually pause after we process all frames
- const u64 sleeptime = m_paused ? 10000 : 1;
- thread_ctrl::wait_for(sleeptime);
- continue;
+ // Calculate presentation timestamp.
+ const s64 pts = get_audio_pts(sample_data.timestamp_us);
+
+ // We need to skip this frame if it has the same timestamp.
+ if (pts <= last_audio_pts)
+ {
+ media_log.error("video_encoder: skipping sample. last_pts=%d, pts=%d", last_audio_pts, pts);
+ }
+ else if (av.audio.context)
+ {
+ media_log.trace("video_encoder: adding new sample. timestamp_us=%d", sample_data.timestamp_us);
+
+ static constexpr bool swap_endianness = false;
+
+ const auto send_frame = [&]()
+ {
+ if (audio_samples_sample_count < static_cast(av.audio.frame->nb_samples))
+ {
+ return;
+ }
+
+ audio_samples_sample_count = 0;
+ got_sample = true;
+
+ if (int err = av_frame_make_writable(av.audio.frame); err < 0)
+ {
+ media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ if (sample_fmt_is_planar)
+ {
+ const int channels = av.audio.frame->ch_layout.nb_channels;
+ const int samples = av.audio.frame->nb_samples;
+
+ for (int ch = 0; ch < channels; ch++)
+ {
+ f32* dst = reinterpret_cast(av.audio.frame->data[ch]);
+
+ for (int sample = 0; sample < samples; sample++)
+ {
+ dst[sample] = *reinterpret_cast(&audio_samples[(sample * channels + ch) * sizeof(f32)]);
+ }
+ }
+ }
+ else
+ {
+ std::memcpy(av.audio.frame->data[0], audio_samples.data(), audio_samples.size());
+ }
+
+ av.audio.frame->pts = last_audio_frame_pts + av.audio.frame->nb_samples;
+
+ if (int err = avcodec_send_frame(av.audio.context, av.audio.frame); err < 0)
+ {
+ media_log.error("video_encoder: avcodec_send_frame failed: %d='%s'", err, av_error_to_string(err));
+ has_error = true;
+ return;
+ }
+
+ flush(av.audio);
+
+ last_audio_frame_pts = av.audio.frame->pts;
+ };
+
+ const auto add_encoder_sample = [&](bool add_new_sample, u32 silence_to_add = 0)
+ {
+ const auto update_last_pts = [&](u32 samples_to_add)
+ {
+ const u32 sample_count = audio_sample_remainder + samples_to_add;
+ const u32 pts_to_add = sample_count / m_samples_per_block;
+ audio_sample_remainder = sample_count % m_samples_per_block;
+ last_audio_pts += pts_to_add;
+ };
+
+ // Copy as many old samples to our audio frame as possible
+ if (leftover_sample_count > 0)
+ {
+ const u32 samples_to_add = std::min(leftover_sample_count, av.audio.frame->nb_samples - audio_samples_sample_count);
+
+ if (samples_to_add > 0)
+ {
+ const u8* src = &last_samples.data[(last_samples.sample_count - leftover_sample_count) * last_samples.channels * sizeof(f32)];
+ u8* dst = &audio_samples[audio_samples_sample_count * last_samples.channels * sizeof(f32)];
+ copy_samples(src, dst, samples_to_add * last_samples.channels, swap_endianness);
+ audio_samples_sample_count += samples_to_add;
+ leftover_sample_count -= samples_to_add;
+ update_last_pts(samples_to_add);
+ }
+
+ if (samples_to_add < leftover_sample_count)
+ {
+ media_log.error("video_encoder: audio frame buffer is already filled entirely by last sample package...");
+ }
+ }
+ else if (silence_to_add > 0)
+ {
+ const u32 samples_to_add = std::min(silence_to_add, av.audio.frame->nb_samples - audio_samples_sample_count);
+
+ if (samples_to_add > 0)
+ {
+ u8* dst = &audio_samples[audio_samples_sample_count * av.audio.frame->ch_layout.nb_channels * sizeof(f32)];
+ std::memset(dst, 0, samples_to_add * sample_data.channels * sizeof(f32));
+ audio_samples_sample_count += samples_to_add;
+ update_last_pts(samples_to_add);
+ }
+ }
+ else if (add_new_sample)
+ {
+ // Copy as many new samples to our audio frame as possible
+ const u32 samples_to_add = std::min(sample_data.sample_count, av.audio.frame->nb_samples - audio_samples_sample_count);
+
+ if (samples_to_add > 0)
+ {
+ const u8* src = sample_data.data.data();
+ u8* dst = &audio_samples[audio_samples_sample_count * sample_data.channels * sizeof(f32)];
+ copy_samples(src, dst, samples_to_add * sample_data.channels, swap_endianness);
+ audio_samples_sample_count += samples_to_add;
+ update_last_pts(samples_to_add);
+ }
+
+ if (samples_to_add < sample_data.sample_count)
+ {
+ // Save this sample package for the next loop if it wasn't fully used.
+ leftover_sample_count = sample_data.sample_count - samples_to_add;
+ }
+ else
+ {
+ // Mark this sample package as fully used.
+ leftover_sample_count = 0;
+ }
+
+ last_samples = std::move(sample_data);
+ }
+
+ send_frame();
+ };
+
+ for (u32 sample = 0; !has_error;)
+ {
+ if (leftover_sample_count > 0)
+ {
+ // Add leftover samples
+ add_encoder_sample(false);
+ }
+ else if (pts > (last_audio_pts + 1))
+ {
+ // Add silence to fill the gap
+ const u32 silence_to_add = pts - (last_audio_pts + 1);
+ add_encoder_sample(false, silence_to_add);
+ }
+ else if (sample == 0)
+ {
+ // Add new samples
+ add_encoder_sample(true);
+ sample++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ m_last_audio_pts = last_audio_pts;
+ }
+ }
+ }
+
+ if (!got_frame && !got_sample)
+ {
+ if (m_flush)
+ {
+ m_flush = false;
+
+ if (!m_paused)
+ {
+ // We only stop the thread after a flush if we are not paused
+ break;
+ }
}
- frame_data = std::move(m_frames_to_encode.front());
- m_frames_to_encode.pop_front();
-
- m_mtx.unlock();
-
- media_log.trace("video_encoder: adding new frame. timestamp=%d", frame_data.timestamp_ms);
- }
-
- // Calculate presentation timestamp.
- const s64 pts = get_pts(frame_data.timestamp_ms);
-
- // We need to skip this frame if it has the same timestamp.
- if (pts <= last_pts)
- {
- media_log.notice("video_encoder: skipping frame. last_pts=%d, pts=%d", last_pts, pts);
+ // We only actually pause after we process all frames
+ const u64 sleeptime_us = m_paused ? 10000 : 1;
+ thread_ctrl::wait_for(sleeptime_us);
continue;
}
-
- if (int err = av_frame_make_writable(av.frame); err < 0)
- {
- media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- break;
- }
-
- u8* in_data[4]{};
- int in_line[4]{};
-
- const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format);
-
- if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0)
- {
- fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret));
- }
-
- if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0)
- {
- fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret));
- }
-
- // Update the context in case the frame format has changed
- av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format,
- av.context->width, av.context->height, out_format, SWS_BICUBIC, nullptr, nullptr, nullptr);
- if (!av.sws)
- {
- media_log.error("video_encoder: sws_getCachedContext failed");
- has_error = true;
- break;
- }
-
- if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.frame->data, av.frame->linesize); err < 0)
- {
- media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- break;
- }
-
- av.frame->pts = pts;
-
- if (int err = avcodec_send_frame(av.context, av.frame); err < 0)
- {
- media_log.error("video_encoder: avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err));
- has_error = true;
- break;
- }
-
- flush();
-
- last_pts = av.frame->pts;
-
- m_last_pts = last_pts;
}
- if (int err = avcodec_send_frame(av.context, nullptr); err != 0)
+ if (av.video.context)
{
- media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ if (int err = avcodec_send_frame(av.video.context, nullptr); err != 0)
+ {
+ media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ }
}
- flush();
+ if (av.audio.context)
+ {
+ if (int err = avcodec_send_frame(av.audio.context, nullptr); err != 0)
+ {
+ media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err));
+ }
+ }
- if (int err = av_write_trailer(av.format); err != 0)
+ flush(av.video);
+ flush(av.audio);
+
+ if (int err = av_write_trailer(av.format_context); err != 0)
{
media_log.error("video_encoder: av_write_trailer failed. Error: %d='%s'", err, av_error_to_string(err));
}
- if (int err = avio_close(av.format->pb); err != 0)
+ if (int err = avio_close(av.format_context->pb); err != 0)
{
media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err));
}
diff --git a/rpcs3/util/media_utils.h b/rpcs3/util/media_utils.h
index 2718a80617..8bbea8c7ee 100644
--- a/rpcs3/util/media_utils.h
+++ b/rpcs3/util/media_utils.h
@@ -88,7 +88,7 @@ namespace utils
std::unique_ptr>> m_thread;
};
- class video_encoder : public utils::image_sink
+ class video_encoder : public utils::video_sink
{
public:
video_encoder();
@@ -108,7 +108,7 @@ namespace utils
};
std::string path() const;
- s64 last_pts() const;
+ s64 last_video_pts() const;
void set_path(const std::string& path);
void set_framerate(u32 framerate);
@@ -118,16 +118,17 @@ namespace utils
void set_max_b_frames(s32 max_b_frames);
void set_gop_size(s32 gop_size);
void set_sample_rate(u32 sample_rate);
+ void set_audio_channels(u32 channels);
void set_audio_bitrate(u32 bitrate);
void set_audio_codec(s32 codec_id);
- void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override;
void pause(bool flush = true);
void stop(bool flush = true) override;
void encode();
private:
std::string m_path;
- s64 m_last_pts = 0;
+ s64 m_last_audio_pts = 0;
+ s64 m_last_video_pts = 0;
// Thread control
std::unique_ptr>> m_thread;
@@ -136,14 +137,14 @@ namespace utils
// Video parameters
u32 m_video_bitrate_bps = 0;
- s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4;
+ s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4
s32 m_max_b_frames = 2;
s32 m_gop_size = 12;
frame_format m_out_format{};
// Audio parameters
- u32 m_sample_rate = 48000;
- u32 m_audio_bitrate_bps = 96000;
- s32 m_audio_codec_id = 86018; // AV_CODEC_ID_AAC
+ u32 m_channels = 2;
+ u32 m_audio_bitrate_bps = 320000;
+ s32 m_audio_codec_id = 86019; // AV_CODEC_ID_AC3
};
}
diff --git a/rpcs3/util/video_provider.cpp b/rpcs3/util/video_provider.cpp
index d919137733..d86da5ecf1 100644
--- a/rpcs3/util/video_provider.cpp
+++ b/rpcs3/util/video_provider.cpp
@@ -34,37 +34,37 @@ namespace utils
g_recording_mode = recording_mode::stopped;
}
- bool video_provider::set_image_sink(std::shared_ptr sink, recording_mode type)
+ bool video_provider::set_video_sink(std::shared_ptr sink, recording_mode type)
{
- media_log.notice("video_provider: setting new image sink. sink=%d, type=%s", !!sink, type);
+ media_log.notice("video_provider: setting new video sink. sink=%d, type=%s", !!sink, type);
if (type == recording_mode::stopped)
{
// Prevent misuse. type is supposed to be a valid state.
- media_log.error("video_provider: cannot set image sink with type %s", type);
+ media_log.error("video_provider: cannot set video sink with type %s", type);
return false;
}
std::lock_guard lock(m_mutex);
- if (m_image_sink)
+ if (m_video_sink)
{
// cell has preference
if (m_type == recording_mode::cell && m_type != type)
{
- media_log.warning("video_provider: cannot set image sink with type %s if type %s is active", type, m_type);
+ media_log.warning("video_provider: cannot set video sink with type %s if type %s is active", type, m_type);
return false;
}
- if (m_type != type || m_image_sink != sink)
+ if (m_type != type || m_video_sink != sink)
{
- media_log.warning("video_provider: stopping current image sink of type %s", m_type);
- m_image_sink->stop();
+ media_log.warning("video_provider: stopping current video sink of type %s", m_type);
+ m_video_sink->stop();
}
}
m_type = sink ? type : recording_mode::stopped;
- m_image_sink = sink;
+ m_video_sink = sink;
if (m_type == recording_mode::stopped)
{
@@ -84,19 +84,17 @@ namespace utils
{
std::lock_guard lock(m_mutex);
- if (!m_image_sink)
+ if (!m_video_sink)
return false;
const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms;
- const s64 pts = m_image_sink->get_pts(timestamp_ms);
- return pts > m_last_pts_incoming;
+ const s64 pts = m_video_sink->get_pts(timestamp_ms);
+ return pts > m_last_video_pts_incoming;
}
- void video_provider::present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra)
+ recording_mode video_provider::check_state()
{
- std::lock_guard lock(m_mutex);
-
- if (!m_image_sink || m_image_sink->has_error)
+ if (!m_video_sink || m_video_sink->has_error)
{
g_recording_mode = recording_mode::stopped;
rsx::overlays::queue_message(localized_string_id::RECORDING_ABORTED);
@@ -105,33 +103,86 @@ namespace utils
if (g_recording_mode == recording_mode::stopped)
{
m_active = false;
- return;
+ return g_recording_mode;
}
if (!m_active.exchange(true))
{
m_current_encoder_frame = 0;
- m_last_pts_incoming = -1;
+ m_last_video_pts_incoming = -1;
+ m_last_audio_pts_incoming = -1;
}
- if (m_current_encoder_frame == 0)
+ if (m_current_encoder_frame == 0 && m_current_encoder_sample == 0)
{
m_encoder_start = steady_clock::now();
}
- // Calculate presentation timestamp.
- const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms;
- const s64 pts = m_image_sink->get_pts(timestamp_ms);
+ return g_recording_mode;
+ }
- // We can just skip this frame if it has the same timestamp.
- if (pts <= m_last_pts_incoming)
+ void video_provider::present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra)
+ {
+ std::lock_guard lock(m_mutex);
+
+ if (check_state() == recording_mode::stopped)
{
return;
}
- m_last_pts_incoming = pts;
+ // Calculate presentation timestamp.
+ const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms;
+ const s64 pts = m_video_sink->get_pts(timestamp_ms);
+ // We can just skip this frame if it has the same timestamp.
+ if (pts <= m_last_video_pts_incoming)
+ {
+ return;
+ }
+
+ m_last_video_pts_incoming = pts;
m_current_encoder_frame++;
- m_image_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms);
+ m_video_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms);
+ }
+
+ bool video_provider::can_consume_sample()
+ {
+ std::lock_guard lock(m_mutex);
+
+ if (!m_video_sink)
+ return false;
+
+ const usz timestamp_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - (m_pause_time_ms * 1000ull);
+ const s64 pts = m_video_sink->get_audio_pts(timestamp_us);
+ return pts > m_last_audio_pts_incoming;
+ }
+
+ void video_provider::present_samples(u8* buf, u32 sample_count, u16 channels)
+ {
+ if (!buf || !sample_count || !channels)
+ {
+ return;
+ }
+
+ std::lock_guard lock(m_mutex);
+
+ if (check_state() == recording_mode::stopped)
+ {
+ return;
+ }
+
+ // Calculate presentation timestamp.
+ const usz timestamp_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - (m_pause_time_ms * 1000ull);
+ const s64 pts = m_video_sink->get_audio_pts(timestamp_us);
+
+ // We can just skip this sample if it has the same timestamp.
+ if (pts <= m_last_audio_pts_incoming)
+ {
+ return;
+ }
+
+ m_last_audio_pts_incoming = pts;
+ m_current_encoder_sample += sample_count;
+ m_video_sink->add_audio_samples(buf, sample_count, channels, timestamp_us);
}
}
diff --git a/rpcs3/util/video_provider.h b/rpcs3/util/video_provider.h
index 31a051a112..93955ab571 100644
--- a/rpcs3/util/video_provider.h
+++ b/rpcs3/util/video_provider.h
@@ -1,6 +1,6 @@
#pragma once
-#include "image_sink.h"
+#include "video_sink.h"
enum class recording_mode
{
@@ -17,19 +17,27 @@ namespace utils
video_provider() = default;
~video_provider();
- bool set_image_sink(std::shared_ptr sink, recording_mode type);
+ bool set_video_sink(std::shared_ptr sink, recording_mode type);
void set_pause_time(usz pause_time_ms);
+
bool can_consume_frame();
void present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra);
+ bool can_consume_sample();
+ void present_samples(u8* buf, u32 sample_count, u16 channels);
+
private:
+ recording_mode check_state();
+
recording_mode m_type = recording_mode::stopped;
- std::shared_ptr m_image_sink;
+ std::shared_ptr m_video_sink;
shared_mutex m_mutex{};
atomic_t m_active{false};
atomic_t m_current_encoder_frame{0};
+ atomic_t m_current_encoder_sample{0};
steady_clock::time_point m_encoder_start{};
- s64 m_last_pts_incoming = -1;
+ s64 m_last_video_pts_incoming = -1;
+ s64 m_last_audio_pts_incoming = -1;
usz m_pause_time_ms = 0;
};
diff --git a/rpcs3/util/video_sink.h b/rpcs3/util/video_sink.h
new file mode 100644
index 0000000000..d3d69c82d1
--- /dev/null
+++ b/rpcs3/util/video_sink.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include "util/types.hpp"
+#include "util/atomic.hpp"
+#include "Utilities/mutex.h"
+
+#include
+#include
+
+namespace utils
+{
+ class video_sink
+ {
+ public:
+ video_sink() = default;
+
+ virtual void stop(bool flush = true) = 0;
+
+ void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms)
+ {
+ // Do not allow new frames while flushing
+ if (m_flush)
+ return;
+
+ std::lock_guard lock(m_mtx);
+ m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
+ }
+
+ void add_audio_samples(u8* buf, u32 sample_count, u16 channels, usz timestamp_us)
+ {
+ // Do not allow new samples while flushing
+ if (m_flush || !buf || !sample_count || !channels)
+ return;
+
+ std::vector sample(buf, buf + sample_count * channels * sizeof(f32));
+ std::lock_guard lock(m_audio_mtx);
+ m_samples_to_encode.emplace_back(timestamp_us, sample_count, channels, std::move(sample));
+ }
+
+ s64 get_pts(usz timestamp_ms) const
+ {
+ return static_cast(std::round((timestamp_ms * m_framerate) / 1000.f));
+ }
+
+ s64 get_audio_pts(usz timestamp_us) const
+ {
+ static constexpr f32 us_per_sec = 1000000.0f;
+ const f32 us_per_block = us_per_sec / (m_sample_rate / static_cast(m_samples_per_block));
+ return static_cast(std::ceil(timestamp_us / us_per_block));
+ }
+
+ usz get_timestamp_ms(s64 pts) const
+ {
+ return static_cast(std::round((pts * 1000) / static_cast(m_framerate)));
+ }
+
+ usz get_audio_timestamp_us(s64 pts) const
+ {
+ return static_cast(std::round((pts * 1000) / static_cast(m_sample_rate)));
+ }
+
+ atomic_t has_error{false};
+
+ struct encoder_frame
+ {
+ encoder_frame() = default;
+ encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data)
+ : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data))
+ {}
+
+ s64 pts = -1; // Optional
+ usz timestamp_ms = 0;
+ u32 pitch = 0;
+ u32 width = 0;
+ u32 height = 0;
+ s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat
+ std::vector data;
+ };
+
+ struct encoder_sample
+ {
+ encoder_sample() = default;
+ encoder_sample(usz timestamp_us, u32 sample_count, u16 channels, std::vector&& data)
+ : timestamp_us(timestamp_us), sample_count(sample_count), channels(channels), data(std::move(data))
+ {
+ }
+
+ usz timestamp_us = 0;
+ u32 sample_count = 0;
+ u16 channels = 0;
+ std::vector data;
+ };
+
+ protected:
+ shared_mutex m_mtx;
+ std::deque m_frames_to_encode;
+ shared_mutex m_audio_mtx;
+ std::deque m_samples_to_encode;
+ atomic_t m_flush = false;
+ u32 m_framerate = 30;
+ u32 m_sample_rate = 48000;
+ static constexpr u32 m_samples_per_block = 256;
+ };
+}