mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-30 06:52:52 +00:00
Kernel+Profiler: Track lost time between profiler timer ticks
We can lose profiling timer events for a few reasons, for example disabled interrupts or system slowness. This accounts for lost time between CPU samples by adding a field lost_samples to each profiling event which tracks how many samples were lost immediately preceding the event.
This commit is contained in:
parent
8614d18956
commit
c41f13f10b
Notes:
sideshowbarker
2024-07-18 18:11:50 +09:00
Author: https://github.com/gunnarbeutner
Commit: c41f13f10b
Pull-request: https://github.com/SerenityOS/serenity/pull/7090
Reviewed-by: https://github.com/awesomekling
8 changed files with 30 additions and 11 deletions
|
@ -25,7 +25,7 @@ NEVER_INLINE KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, Flat
|
||||||
FlatPtr ebp;
|
FlatPtr ebp;
|
||||||
asm volatile("movl %%ebp, %%eax"
|
asm volatile("movl %%ebp, %%eax"
|
||||||
: "=a"(ebp));
|
: "=a"(ebp));
|
||||||
return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, arg1, arg2, arg3);
|
return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, 0, arg1, arg2, arg3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip)
|
static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip)
|
||||||
|
@ -55,13 +55,14 @@ static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(Fl
|
||||||
}
|
}
|
||||||
|
|
||||||
KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid,
|
KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid,
|
||||||
u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
|
u32 eip, u32 ebp, int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
|
||||||
{
|
{
|
||||||
if (count() >= capacity())
|
if (count() >= capacity())
|
||||||
return ENOBUFS;
|
return ENOBUFS;
|
||||||
|
|
||||||
PerformanceEvent event;
|
PerformanceEvent event;
|
||||||
event.type = type;
|
event.type = type;
|
||||||
|
event.lost_samples = lost_samples;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case PERF_EVENT_SAMPLE:
|
case PERF_EVENT_SAMPLE:
|
||||||
|
@ -182,6 +183,7 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
|
||||||
event_object.add("pid", event.pid);
|
event_object.add("pid", event.pid);
|
||||||
event_object.add("tid", event.tid);
|
event_object.add("tid", event.tid);
|
||||||
event_object.add("timestamp", event.timestamp);
|
event_object.add("timestamp", event.timestamp);
|
||||||
|
event_object.add("lost_samples", i != 0 ? event.lost_samples : 0);
|
||||||
auto stack_array = event_object.add_array("stack");
|
auto stack_array = event_object.add_array("stack");
|
||||||
for (size_t j = 0; j < event.stack_size; ++j) {
|
for (size_t j = 0; j < event.stack_size; ++j) {
|
||||||
stack_array.add(event.stack[j]);
|
stack_array.add(event.stack[j]);
|
||||||
|
@ -220,17 +222,17 @@ void PerformanceEventBuffer::add_process(const Process& process, ProcessEventTyp
|
||||||
|
|
||||||
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0,
|
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0,
|
||||||
event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
|
event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
|
||||||
process.pid().value(), 0, executable);
|
0, process.pid().value(), 0, executable);
|
||||||
|
|
||||||
process.for_each_thread([&](auto& thread) {
|
process.for_each_thread([&](auto& thread) {
|
||||||
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(),
|
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(),
|
||||||
0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, nullptr);
|
0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, 0, nullptr);
|
||||||
return IterationDecision::Continue;
|
return IterationDecision::Continue;
|
||||||
});
|
});
|
||||||
|
|
||||||
for (auto& region : process.space().regions()) {
|
for (auto& region : process.space().regions()) {
|
||||||
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0,
|
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0,
|
||||||
0, 0, PERF_EVENT_MMAP, region->range().base().get(), region->range().size(), region->name());
|
0, 0, PERF_EVENT_MMAP, 0, region->range().base().get(), region->range().size(), region->name());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,7 @@ struct [[gnu::packed]] PerformanceEvent {
|
||||||
u32 pid { 0 };
|
u32 pid { 0 };
|
||||||
u32 tid { 0 };
|
u32 tid { 0 };
|
||||||
u64 timestamp;
|
u64 timestamp;
|
||||||
|
u32 lost_samples;
|
||||||
union {
|
union {
|
||||||
MallocPerformanceEvent malloc;
|
MallocPerformanceEvent malloc;
|
||||||
FreePerformanceEvent free;
|
FreePerformanceEvent free;
|
||||||
|
@ -77,7 +78,7 @@ public:
|
||||||
|
|
||||||
KResult append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3, Thread* current_thread = Thread::current());
|
KResult append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3, Thread* current_thread = Thread::current());
|
||||||
KResult append_with_eip_and_ebp(ProcessID pid, ThreadID tid, u32 eip, u32 ebp,
|
KResult append_with_eip_and_ebp(ProcessID pid, ThreadID tid, u32 eip, u32 ebp,
|
||||||
int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
|
int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
|
|
|
@ -34,7 +34,7 @@ public:
|
||||||
if (g_profiling_all_threads) {
|
if (g_profiling_all_threads) {
|
||||||
VERIFY(g_global_perf_events);
|
VERIFY(g_global_perf_events);
|
||||||
[[maybe_unused]] auto rc = g_global_perf_events->append_with_eip_and_ebp(
|
[[maybe_unused]] auto rc = g_global_perf_events->append_with_eip_and_ebp(
|
||||||
process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, nullptr);
|
process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, 0, nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs)
|
inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs, u32 lost_time)
|
||||||
{
|
{
|
||||||
PerformanceEventBuffer* perf_events = nullptr;
|
PerformanceEventBuffer* perf_events = nullptr;
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ public:
|
||||||
if (perf_events) {
|
if (perf_events) {
|
||||||
[[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp(
|
[[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp(
|
||||||
current_thread.pid(), current_thread.tid(),
|
current_thread.pid(), current_thread.tid(),
|
||||||
regs.eip, regs.ebp, PERF_EVENT_SAMPLE, 0, 0, nullptr);
|
regs.eip, regs.ebp, PERF_EVENT_SAMPLE, lost_time, 0, 0, nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,13 +87,20 @@ public:
|
||||||
|
|
||||||
inline static void timer_tick(RegisterState const& regs)
|
inline static void timer_tick(RegisterState const& regs)
|
||||||
{
|
{
|
||||||
|
static Time last_wakeup;
|
||||||
|
auto now = kgettimeofday();
|
||||||
|
constexpr auto ideal_interval = Time::from_microseconds(1000'000 / OPTIMAL_PROFILE_TICKS_PER_SECOND_RATE);
|
||||||
|
auto expected_wakeup = last_wakeup + ideal_interval;
|
||||||
|
auto delay = (now > expected_wakeup) ? now - expected_wakeup : Time::from_microseconds(0);
|
||||||
|
last_wakeup = now;
|
||||||
auto current_thread = Thread::current();
|
auto current_thread = Thread::current();
|
||||||
// FIXME: We currently don't collect samples while idle.
|
// FIXME: We currently don't collect samples while idle.
|
||||||
// That will be an interesting mode to add in the future. :^)
|
// That will be an interesting mode to add in the future. :^)
|
||||||
if (!current_thread || current_thread == Processor::current().idle_thread())
|
if (!current_thread || current_thread == Processor::current().idle_thread())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
PerformanceManager::add_cpu_sample_event(*current_thread, regs);
|
auto lost_samples = delay.to_microseconds() / ideal_interval.to_microseconds();
|
||||||
|
PerformanceManager::add_cpu_sample_event(*current_thread, regs, lost_samples);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -228,6 +228,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
|
||||||
Event event;
|
Event event;
|
||||||
|
|
||||||
event.timestamp = perf_event.get("timestamp").to_number<u64>();
|
event.timestamp = perf_event.get("timestamp").to_number<u64>();
|
||||||
|
event.lost_samples = perf_event.get("lost_samples").to_number<u32>();
|
||||||
event.type = perf_event.get("type").to_string();
|
event.type = perf_event.get("type").to_string();
|
||||||
event.pid = perf_event.get("pid").to_i32();
|
event.pid = perf_event.get("pid").to_i32();
|
||||||
event.tid = perf_event.get("tid").to_i32();
|
event.tid = perf_event.get("tid").to_i32();
|
||||||
|
|
|
@ -166,6 +166,7 @@ public:
|
||||||
String executable;
|
String executable;
|
||||||
int pid { 0 };
|
int pid { 0 };
|
||||||
int tid { 0 };
|
int tid { 0 };
|
||||||
|
u32 lost_samples { 0 };
|
||||||
bool in_kernel { false };
|
bool in_kernel { false };
|
||||||
Vector<Frame> frames;
|
Vector<Frame> frames;
|
||||||
};
|
};
|
||||||
|
|
|
@ -45,6 +45,8 @@ String SamplesModel::column_name(int column) const
|
||||||
return "TID";
|
return "TID";
|
||||||
case Column::ExecutableName:
|
case Column::ExecutableName:
|
||||||
return "Executable";
|
return "Executable";
|
||||||
|
case Column::LostSamples:
|
||||||
|
return "Lost Samples";
|
||||||
case Column::InnermostStackFrame:
|
case Column::InnermostStackFrame:
|
||||||
return "Innermost Frame";
|
return "Innermost Frame";
|
||||||
default:
|
default:
|
||||||
|
@ -81,6 +83,10 @@ GUI::Variant SamplesModel::data(const GUI::ModelIndex& index, GUI::ModelRole rol
|
||||||
return (u32)event.timestamp;
|
return (u32)event.timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (index.column() == Column::LostSamples) {
|
||||||
|
return event.lost_samples;
|
||||||
|
}
|
||||||
|
|
||||||
if (index.column() == Column::InnermostStackFrame) {
|
if (index.column() == Column::InnermostStackFrame) {
|
||||||
return event.frames.last().symbol;
|
return event.frames.last().symbol;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ public:
|
||||||
ProcessID,
|
ProcessID,
|
||||||
ThreadID,
|
ThreadID,
|
||||||
ExecutableName,
|
ExecutableName,
|
||||||
|
LostSamples,
|
||||||
InnermostStackFrame,
|
InnermostStackFrame,
|
||||||
__Count
|
__Count
|
||||||
};
|
};
|
||||||
|
|
|
@ -78,7 +78,7 @@ void TimelineTrack::paint_event(GUI::PaintEvent& event)
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& histogram = event.in_kernel ? kernel_histogram : usermode_histogram;
|
auto& histogram = event.in_kernel ? kernel_histogram : usermode_histogram;
|
||||||
histogram.insert(clamp_timestamp(event.timestamp), 1);
|
histogram.insert(clamp_timestamp(event.timestamp), 1 + event.lost_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
decltype(kernel_histogram.at(0)) max_value = 0;
|
decltype(kernel_histogram.at(0)) max_value = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue