From bbbc7fb1f2d23e043b4ccf5c7aa59bbff82246a6 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sat, 18 Nov 2023 15:18:04 +0200 Subject: [PATCH] KVM support Co-authored-by: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Co-authored-by: hazelwiss --- CMakeLists.txt | 5 +- include/cpu.hpp | 2 +- include/cpu_kvm.hpp | 235 ++++++++++++++++++++++++++++++++++ include/memory.hpp | 11 +- perf.data | Bin 0 -> 30108 bytes src/core/CPU/cpu_kvm.cpp | 268 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 517 insertions(+), 4 deletions(-) create mode 100644 include/cpu_kvm.hpp create mode 100644 perf.data create mode 100644 src/core/CPU/cpu_kvm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index df0e2bb8..e68a2a35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,7 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON) option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) +option(USE_KVM "Use KVM instead of Dynarmic" OFF) option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) @@ -155,12 +156,14 @@ else() set(HOST_ARM64 FALSE) endif() -if(HOST_X64 OR HOST_ARM64) +if(NOT USE_KVM AND (HOST_X64 OR HOST_ARM64)) set(DYNARMIC_TESTS OFF) #set(DYNARMIC_NO_BUNDLED_FMT ON) set(DYNARMIC_FRONTENDS "A32" CACHE STRING "") add_subdirectory(third_party/dynarmic) add_compile_definitions(CPU_DYNARMIC) +elseif(USE_KVM AND HOST_ARM64) + add_compile_definitions(CPU_KVM) else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() diff --git a/include/cpu.hpp b/include/cpu.hpp index 14800e19..7c8a3f65 100644 --- a/include/cpu.hpp +++ b/include/cpu.hpp @@ -3,7 +3,7 @@ #ifdef CPU_DYNARMIC #include "cpu_dynarmic.hpp" #elif defined(CPU_KVM) -#error KVM CPU is not implemented yet +#include "cpu_kvm.hpp" #else #error No CPU core implemented :( #endif \ No newline at end of file diff --git a/include/cpu_kvm.hpp b/include/cpu_kvm.hpp new file mode 100644 index 00000000..5e3b2491 --- /dev/null +++ b/include/cpu_kvm.hpp @@ -0,0 +1,235 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "helpers.hpp" +#include "kernel.hpp" +#include "memory.hpp" + +#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +struct MmuTables { + u32 level1[4096]; + u32 level2SectionTables[256]; +}; + +constexpr u32 hypervisorCodeAddress = 0xD0000000; +constexpr u32 hypervisorDataAddress = 0xE0000000; +constexpr u32 hypervisorCodeSize = hypervisorDataAddress - hypervisorCodeAddress; +constexpr u32 hypervisorDataSize = hypervisorCodeSize; +constexpr u32 mmuTableOffset = hypervisorDataSize - sizeof(MmuTables); +constexpr u32 mmuTableAddress = hypervisorDataAddress + mmuTableOffset; +constexpr u32 exitCodeOffset = 0; // at start of hypervisor data segment +constexpr u32 customEntryOffset = 0x100000; // arbitrary, far enough that the exit code won't ever overlap with this +constexpr u32 guestStateOffset = 0x200000; // also arbitrary, store the guest state here upon exit + +struct GuestState +{ + std::array regs; + std::array fprs; + u32 cpsr; + u32 fpscr; + // u32 tlsBase? + // u64 ticks? +}; + +struct Environment { + Environment(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) { + u32 currentMemorySlot = 0; + + kvmDescriptor = open("/dev/kvm", O_RDWR); + if (kvmDescriptor < 0) { + Helpers::panic("Failed to open /dev/kvm"); + } + + vmDescriptor = ioctl(kvmDescriptor, KVM_CREATE_VM, 0); + if (vmDescriptor < 0) { + Helpers::panic("Failed to create KVM VM"); + } + + if (ioctl(vmDescriptor, KVM_CHECK_EXTENSION, KVM_CAP_ARM_EL1_32BIT) <= 0) { + Helpers::panic("CPU doesn't support EL1 32-bit mode, KVM won't work on this CPU"); + } + + // TODO: allocate these with mmap instead of malloc + kvm_userspace_memory_region vramRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::VRAM, + .memory_size = PhysicalAddrs::VRAMSize, + .userspace_addr = (uint64_t)mem.getVRAM()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &vramRegionDesc) < 0) { + Helpers::panic("Failed to set VRAM memory region"); + } + + kvm_userspace_memory_region dspRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::DSPMem, + .memory_size = PhysicalAddrs::DSPMemSize, + .userspace_addr = (uint64_t)mem.getDSPMem()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &dspRegionDesc) < 0) { + Helpers::panic("Failed to set DSP memory region"); + } + + kvm_userspace_memory_region fcramRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::FCRAM, + .memory_size = PhysicalAddrs::FCRAMSize * 2, + .userspace_addr = (uint64_t)mem.getFCRAM()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &fcramRegionDesc) < 0) { + Helpers::panic("Failed to set FCRAM memory region"); + } + + hypervisorCodeRegion = mmap(NULL, hypervisorCodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (hypervisorCodeRegion == MAP_FAILED) { + Helpers::panic("Failed to allocate memory for hypervisor I/O"); + } + + kvm_userspace_memory_region hypervisorCodeRegionDesc = { + .slot = currentMemorySlot++, + .flags = KVM_MEM_READONLY, // We want writes here to cause VM exits + .guest_phys_addr = hypervisorCodeAddress, + .memory_size = hypervisorCodeSize, + .userspace_addr = (uint64_t)hypervisorCodeRegion + }; + + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &hypervisorCodeRegionDesc) < 0) { + Helpers::panic("Failed to set up hypervisor IO memory region\n"); + return; + } + + hypervisorDataRegion = mmap(NULL, hypervisorDataSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (hypervisorDataRegion == MAP_FAILED) { + Helpers::panic("Failed to allocate memory for hypervisor code"); + } + + kvm_userspace_memory_region hypervisorDataRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = hypervisorDataAddress, + .memory_size = hypervisorDataSize, + .userspace_addr = (uint64_t)hypervisorDataRegion + }; + + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &hypervisorDataRegionDesc) < 0) { + Helpers::panic("Failed to set up hypervisor code memory region\n"); + return; + } + + cpuDescriptor = ioctl(vmDescriptor, KVM_CREATE_VCPU, 0); + if (cpuDescriptor < 0) { + Helpers::panic("Failed to create VCPU"); + } + + int mmapSize = ioctl(kvmDescriptor, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmapSize < 0) { + Helpers::panic("Failed to get KVM shared memory size"); + } + + runInfo = (kvm_run*)mmap(nullptr, mmapSize, PROT_READ | PROT_WRITE, MAP_SHARED, cpuDescriptor, 0); + if (runInfo == MAP_FAILED) { + Helpers::panic("Failed to map KVM shared memory"); + } + + kvm_vcpu_init initParams; + if (ioctl(vmDescriptor, KVM_ARM_PREFERRED_TARGET, &initParams) < 0) { + Helpers::panic("Failed to fetch initialization parameters for vCPU"); + } + initParams.features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; + initParams.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + + if (ioctl(cpuDescriptor, KVM_ARM_VCPU_INIT, initParams) < 0) { + Helpers::panic("Failed to initialize vCPU"); + } + + kvm_reg_list tempRegList; + tempRegList.n = 0; + ioctl(cpuDescriptor, KVM_GET_REG_LIST, &tempRegList); + + regList = (kvm_reg_list*)malloc(sizeof(kvm_reg_list) + tempRegList.n * sizeof(u64)); + regList->n = tempRegList.n; + if (ioctl(cpuDescriptor, KVM_GET_REG_LIST, regList) < 0) { + Helpers::panic("Failed to get register list"); + } + } + + void setPC(u32 pc) { + u64 val = (u64)pc; + kvm_one_reg reg; + + reg.id = AARCH64_CORE_REG(regs.pc); + reg.addr = (u64)&val; + + if (ioctl(cpuDescriptor, KVM_SET_ONE_REG, ®) < 0) [[unlikely]] { + printf("SetPC failed\n"); + } + } + + void run() { + if (ioctl(cpuDescriptor, KVM_RUN, 0) < 0) { + Helpers::panic("Failed to run vCPU"); + } else { + printf("KVM run succeeded\n"); + } + } + + void mapHypervisorCode(const std::vector& code, u32 offset) + { + if (code.size() > hypervisorCodeSize) { + Helpers::panic("Launch code is too big"); + } + memcpy((void*)((uintptr_t)hypervisorCodeRegion + offset), code.data(), code.size()); + } + + Memory& mem; + Kernel& kernel; + kvm_run* runInfo = nullptr; + kvm_reg_list* regList = nullptr; + void* hypervisorCodeRegion; + void* hypervisorDataRegion; + int kvmDescriptor = -1; + int vmDescriptor = -1; + int cpuDescriptor = -1; +}; + +class CPU { + Memory& mem; + Environment env; + GuestState state; + + public: + static constexpr u64 ticksPerSec = 268111856; + + CPU(Memory& mem, Kernel& kernel); + void reset() {} + + void setReg(int index, u32 value) {} + u32 getReg(int index) {return 0;} + + std::span regs() { return state.regs; } + std::span fprs() { return state.fprs; } + + void setCPSR(u32 value) { state.cpsr = value; } + u32 getCPSR() { return state.cpsr; } + void setFPSCR(u32 value) { state.fpscr = value; } + u32 getFPSCR() { return state.fpscr; } + void setTLSBase(u32 value) {} + + u64 getTicks() {return 0;} + u64& getTicksRef() {static u64 dummy; return dummy;} + + void clearCache() {} + + void runFrame() {} + + // TODO: remove + void romLoaded(); +}; + +#undef AARCH64_CORE_REG \ No newline at end of file diff --git a/include/memory.hpp b/include/memory.hpp index 33ccbae5..3ddd3df8 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -17,12 +17,18 @@ namespace PhysicalAddrs { enum : u32 { VRAM = 0x18000000, - VRAMEnd = VRAM + 0x005FFFFF, + VRAMSize = 0x00600000, + VRAMEnd = VRAM + VRAMSize - 1, FCRAM = 0x20000000, FCRAMEnd = FCRAM + 0x07FFFFFF, DSP_RAM = 0x1FF00000, - DSP_RAM_End = DSP_RAM + 0x0007FFFF + DSP_RAM_End = DSP_RAM + 0x0007FFFF, + FCRAMSize = 0x08000000, + FCRAMEnd = FCRAM + FCRAMSize - 1, + DSPMem = 0x1FF00000, + DSPMemSize = 0x00080000, + DSPMemEnd = DSPMem + DSPMemSize - 1 }; } @@ -284,6 +290,7 @@ private: u8* getDSPDataMem() { return &dspRam[DSP_DATA_MEMORY_OFFSET]; } u8* getDSPCodeMem() { return &dspRam[DSP_CODE_MEMORY_OFFSET]; } u32 getUsedUserMem() { return usedUserMemory; } + u8* getVRAM() { return vram; } void setVRAM(u8* pointer) { vram = pointer; } void setDSPMem(u8* pointer) { dspRam = pointer; } diff --git a/perf.data b/perf.data new file mode 100644 index 0000000000000000000000000000000000000000..0a51cd023d39b4c504a70fa9a54f633427f14cc3 GIT binary patch literal 30108 zcmWG=4RZ7JaW%?dfPfhgia~=B!Z-n?KQKY~7tA3vI{Bx*@EE!nGG7YHk%rPTP+AsB z%Ry;*D6Igc6``~;lvaV#s!&=DN~=R@4JfS%rL~~6Hk8(Z(z;Mu4@&DpX#*&22&Ijn zG>GZ|5ey6rTnr2h$xyaCG<-y${A3YPD2p}}ES?;YjEA|4WdPl^0_q-Eyn%uRjYNR_0phxX2nGRW z2{6sT087uHFb4_2==Gq)!N9-(OXsX0Nd^W67!8w$rGGZ4JdB3P!}0|?R31jd$}w2H!)Ta1 zIMGAPGEm^b(g92!R_?*#9Y(|C{Xm)+6c}LP0?QAeaAaZNWs^Fk1kcKDeRwSulXy0m22TMQOUJ6{)cD1*{wk5d;?FU;u>!qk{p1hh#$2 z|NsC0gUkToxdr=WL1`A|PMEuJsTXM=O}#@4s(NooK;U%Gt#(xPVI`0l0J(?B!GIyW zM5^{gJOex&gc1&;xL5!RkR_2*SzY!SbMRgym0&{Sy%O zPcVkq-vG@q0T6pYhJfrpN63D#<%y*!nfY+1BTHhy=N(SC?>jq$HIV>l%Ur#8}VRNY4bM7ZesCyxl1TOL!pJ z@1P2?{{SeY85kHEK)N9qWWO{=4w&OS6-JMj39 z&L(94$1`C2AAnLe0|SEq*gBB);5u}fVh&hf6(1zMV6(rtvbZEQHwEMd13hC)up>Zz z2jQcK3D|#NH^l!im%}PXXdS-1fUy0E#g%!EZVKn=zg{$=?^Ntwl|dC4GG z80i`485+UkKVUZj`xz7=>0beozR=Tu9})g{^Ko?BDPJ68;OI{zq!qgYus)5%%X6 zq#GI;gF?VS&%{CxTy=x|4#Fmz3E2Oj0%AWb`~#p74fX%;T*B!ux4<|jF&A9^80Z;* zvmZSEzcWPO5C0FP5c>^%ks1xWc03Oi7`S3d({F)(C7D6I9KyAR0IhI;0D2z%ph+u)Ajs3MY^r5Du_~@L}yXbbXtk`qY=) zN72U&<>S(~8LIE;JQEva`?k%6@Nwze0@Zi(syT|jWGEk|57cVF)}CL+*k%W^57y6w zhBnUjwy!K{>P3}M)ki@s0J#U$KHspfZWAb-fx;Gqvo%rGCv}2qQU}kJZb9vQ&H8|LoEQg2NX`4ap@@W+@6K1 zetjP#T|oLxNafZ9Ur4#N0ov1t)tAtE;;1p<*ZFG3@`~46z?ay(W=HM7^3^T2!201abnXUxm`H`mv-Pzx@hO`wO7{ zN2}M13W;b}6s6{+q(bdChQt}j?;t!!tOLLO3!EVSht)4=_FFm;VSid?MJl*uYYcB# zgX{+3uXC{3k5s=WI6&-&^(SHOfTq`K-du3bz}K&f$OHww8Q5-+ogloZu?xSy9~409 zR~+GfHwbJk!xU(=fN~o~d#WfgF9jUzMtVk&V28JBnm2djx8DI8?yz1iVT zT?`I%P@02>8p!V;Y;~yzzx@+-9R`)d46yc10>r-{?}FM@pNWWfcb|y(+|=SkQ0PNi zH;^_1$bOCY)!_CG&U|-Z8^nItI0~Bo{}5q+dQM^iIO<`RBlTx*ovXob|Aeg&`(f!0 zmj9vQzrc=gx=$&}1$o^FTRCYQQ;XkzgDnvICxAknfq?<$4ygU1#Eqxursq_E+yD;& zP}qa8;Tr<>J19W<)39)d(NOyj5m!&>!pC&b`g6I>b@=^1Apw&AVC4;rhT88>r2kWj zGL4V}0OWTNP8P1mZ@+^Bq@IN395ny?5t05PD$*>>%D`y=o(Dm8gYet=1nf6}_UB>t z!f2@fbBXkSaR%6ajQF>ZZ@}+=hK-Q)cL192(Zc^v5aD_+wW1&qT=5#}Swcn=5c%JF zDFOQppz#m$H;jh*--U?quPDyQO-s#729GE}V*nKPAUsRG5x@TpHbDFjoA86x15o>; zi41>u6<`3ae&FT*2MGsQK7`RwcN7rm z4$#mls2qSM1Gqm;HJY)61Csp$`0Xzv(*6o~8we5%Aislf(M$sNI~)Y}hZq)wK{6uR zxau21`5>hzQ$MpPvp7Db2viq>!XAV{c7yP;q-R#3ehkifc*Coe;C6t*en=N;17s!* zWCo}mP(aB3oXjL2pD_=+q7XNdc3HMj3z?SP7 zgKP)+8H8VKdyU0jqKzv_99-~JCPAod$Tl05@jyxV6I4*%4gG+hHdLrYL^6&n2T@R!{8 z0l)nQQ2Sx+J~aCm6Y2lN+?4bJ@USkV4LP*Opr++zk{&q{xA6Lf3OhZ zf9Q-o11udv%YCkCM8tneCcNS?hfFWSF0 zP`^L5xEvJxX3!}$kl#W0)V%Nb?JqdH6I}iX@IfpU0QD{*OpyJl7KHmB6}dU!wy%+% zp)q>g0W|$tOd{<6%;eHi z@LZ<3DX8iJn*)k_5S}>YFN*z;RE8Lz+{(qk02-h4_dW$5p9G0b^n~zX^)+aG4KzN9 zybc94#=dp-L-6=ID6N1jhL)mO$8kaGEw+%RK6)3b`fjKNAoqaACvRQI2anr;{0cIk z?;xuB>1v=66VM7Ckn14+2B~w*NlGoUD$amKH%xz7;v=wsVC$hk_JI72J}%4o)yWpr zo*~BHAhTQbok>#fX@{yFHlGc056IsFXydY(KB(&FUIh6UFr;fkD|KxV(YL7Mum zXQ=9LK)el3f1q$eAHTl)2~~Z%6U0TZaCKnV11=9hZUJG}wo(-L$7w<)0MX~0KzRnF zUWv03tRBP$;dpikAGUrDUEen79KqhrH7NS*HbVF?eIWOtht~!!28Ni4HsJ9!*!oL| zZ?K0iNPYhj($wd!LRG&OY5~YSpzuY`e<1TGY(Q1-bQY5TKoNK=2~532emJnjdX&(C6o;+{_%AUOgu`wA&PK=;qrYV0&QtxNk!Exb%U-7NjpyPtzV$?tt`x#Jsg3e3(A4I~5S~ z%V2jRjYob`=ad5Jhpoqi%tIjc$2YKm*Xc1h)I)NT07wOd2`cNNf{MW*cvTGIGf?b; zYEJ!({M=Oi{Is-^>;nD#l8n?M{eZ;0ltg2fVEv@h%$yW`h)Y232I1vDu(<=NUlgz( zT&^<|fJ_Iivx2w*M1tI5p+=})OwrBB%qy+Xt*|iDH8TOVjX)D3AiF{M%62X+;gA8Z zw;2?m_9L}NKz8H8uyBYjODWEeMYMlec$vq^TGB zjjA3t%>{A~C?7kp9|VUt$Sojzh?@uOZkT&SUP62hQ-5|D>O9+h4_*}YXY-iAUI3*Y z)b*S@!1gd~083-cv$^j13bqK}dd{?5aC6(x06yD+2yaD30kAtz(>GGOD(HmRe*ipP z%fL_o@)6id@cO+hBHG)86~ul9=)5Rw zyaC$Y^Cp$W9P`@Jfh)y|oZ~VeMSBbxS@(_&Ys0IUd~0H$;SYu!;zN zdp9IN>^&d`31kOoeunzjkqG~S+qP)_of9OA#a^WRdto2MUf6gdj1O@I1_`Q{K`ztR zPt42DL-^11xGjEr1RNmq7Yxt{Mes4~014o-CsRKH#U4kDD~l%Ua%GauS1lcs)&2C91W^!2#K23$^o{0qXYdZ_BX!y)MlzmNtH|Be27uMZ^(hnr^4E8uyGw|dB2i~_3@=Csb!$r6|_Q1F{i{AqJw1;R%$_1IeZg zAE5Ew0^L?xk_J%-tA<;keAqDS6DS`Nt_%z>(D-kld=aSo7@!+U=kP+z#YQqVq%eT? zN-a%!!L^Ym=4ym*-Yj;}2~`4*ZI*yqebL#eMR}Xt(&+PcgacZ~T!wFVXy?H3Do0 ze2)|Mz79UKVRA4#P8_i9VaP2jT2{bzC|a?Gt)+Fl56BK!e+H(W92#ba(Xy~rp-Jz~ z^+Yah{rc)`&eul<)gU{N*M=jv^+>S`X2;AmD%!5U86rA+61GjYU#eGHr*;k94)B&N zPyvIqAB-5gV0Luhj}Dr>;^%XX<({PzGnBTR>I+pzw*%XnBvS17+&U{T>U_wx8+OcL zMj@UXwmR-!UGd)W^OWnkxanQ+&ANgx$Sgynk_-j+)I0@G&}5cIkfwrjeo?AMh_9wX zrn#=6k*R4YHY4C-AT!)k^GY-GQo$zZnCTdrn&^<^I+IC5;a~R zXfHNaNg@S7W>^rZ7MC)R85C9(;BA?Pu!M-qA_C$dcTkp6F)aXg%=>d(7fXqoMN=?iz$j{6x0f~~1L3J$1oaEw? z#FEr_vYiB~qmj%h$WABSEg&7B^aL^|)6$TlA_$aT!RCOP&hbelgj-P{+c7b$E(5P8 zi!UHElp%xhAopY!6y+zSV%kHj2q@oy%*ib-B327lb)dEs$ee=w@>EJp0_8i9ImP*D zCFO}lWcv)1SwZH2M^Gt9&!Ek8AahFdKuI}1u_!5?tWp=$Rz@?2tTqCuTtqS_IVrz_ zEC+$ctI*9M$3dV~UFhb(+Cx|q3JyWgU@N*g1k#cTgCCXpngz3@amD5=$sLiw0)QC4jBO?n+hA2U9qI7N)wl@oQ<|C-wgqEV9 zAqMaqER+wXe4q+o^#A|t;Pt4WHK}+pXz~JV0)&927fRcL0GrD!$uCHZ&n?X<$t=i8jZe)hDas@%GlRl}!e$~Q-xMdsCs)E& z?_q{LArWf1qc}4!JtsB3C^0WRH9o(zq@a|v zATy{D9tFio6sHek=)N8#FOtLr`6ab1HLoNdyn=-!y)eZfGpG_Tpyk%_B^gDj#TofI zq-PqCJE##J;I&2ZEFrsD2Ck#15gvJ^x$%i9DMhKp#gIyd#KH^|9uzj0!F~a?my1i2a#9n?D+EC9 pphkFr+aHj^o#-@ytdMH%$Sa9Y$t+Gx${{a}fZRcid`sz?O920r+5`Xq literal 0 HcmV?d00001 diff --git a/src/core/CPU/cpu_kvm.cpp b/src/core/CPU/cpu_kvm.cpp new file mode 100644 index 00000000..6bde71fd --- /dev/null +++ b/src/core/CPU/cpu_kvm.cpp @@ -0,0 +1,268 @@ +// #ifdef CPU_KVM +#include "cpu_kvm.hpp" + +MmuTables* mmuTables = nullptr; + +// ARMv6 MMU supports up to two levels of address lookup with 4KiB pages. +// The top level is called the level 1 table. It contains 4096 entries of 4 bytes each (16KiB total). +// The bottom level is called level 2, which contains 256 entries of 4 bytes each (1KiB total). +// The level 1 table supports 3 kind of entries: Pages, Sections and Supersections each corresponding to a page size. +// Pages are for 4KiB pages, Sections are for 1MiB pages and Supersections are for 16MiB pages. + +// Sections and supersections don't use the level 2 table at all. +// This is because with a 32 bit vaddr and 4 KiB pages, the offset is 12 bits, +// the level 2 index is 8 bits and the level 1 index is 12 bits -> 12 + 8 + 12 = 32 for the vaddr +// However for sections, the offset is 20 bits, so you can only use +// the level 1 table (up to 4096 entries) because 20 for offset + 12 for level 1 -> 20 + 12 = 32 for the vaddr +// For supersections, you need a 24 bit offset, so the level 1 table actually has up to 256 entries because +// you're left with 8 bits -> 24 + 8 = 32 for the vaddr + +// Level 2 entries +// Bits: 31-12 11 10 9 8-6 5-4 3 2 1 0 +// Value: BADDR nG S APX TEX[2:0] AP C B 1 XN + +// Access permission table: +/* + APX AP Privileged Unprivileged Description + 0 00 No access No access Permission fault + 0 01 Read/Write No access Privileged Access only + 0 10 Read/Write Read No user-mode write + 0 11 Read/Write Read/Write Full access + 1 00 - - Reserved + 1 01 Read No access Privileged Read only + 1 10 Read Read Read only + 1 11 - - Reserved +*/ + +constexpr u32 APX = 1 << 9; +constexpr u32 AP0 = 1 << 4; +constexpr u32 AP1 = 1 << 5; + +enum Level2Flags : u32 +{ + Level2Flags_ExecuteNever = 1 << 0, + Level2Flags_Bufferable = 1 << 2, + Level2Flags_Cacheable = 1 << 3, + Level2Flags_Shared = 1 << 10, + Level2Flags_AP_NoUserModeWrite = AP1, + Level2Flags_AP_FullAccess = AP1 | AP0, +}; + +// Generated by passing the following code to godbolt: +// Thanks libn3ds +/* + // FCSE PID Register (FCSE PID = 0) + // Note: This must be 0 before disabling the MMU otherwise UB + __asm__ volatile ("mcr p15, 0, %0, c13, c0, 0" : : "r"(0)); + + // Context ID Register (ASID = 0, PROCID = 0) + __asm__ volatile ("mcr p15, 0, %0, c13, c0, 1" : : "r"(0)); + + // // TTBR0 address shared page table walk and outer cachable write-through, no allocate on write + uint32_t ttbr0 = mmuTableAddress | 0x12; + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r" (ttbr0) : "memory"); + + // Use the 16 KiB L1 table only + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 2" : : "r"(0)); + + // Domain 0 = client, remaining domains all = no access + __asm__ volatile("mcr p15, 0, %0, c3, c0, 0" : : "r"(1)); + + uint32_t* d = (uint32_t*)hypervisorCodeAddress; + *d = hypervisorCodeAddress; +*/ +constexpr u8 mmuCodeBefore[] = { + 0x00, 0x30, 0xb0, 0xe3, // movs r3, #0 + 0x10, 0x3f, 0x0d, 0xee, // mcr p15, #0, r3, c13, c0, #0 + 0x30, 0x3f, 0x0d, 0xee, // mcr p15, #0, r3, c13, c0, #1 + 0x14, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x14] + 0x10, 0x2f, 0x02, 0xee, // mcr p15, #0, r2, c2, c0, #0 + 0x50, 0x3f, 0x02, 0xee, // mcr p15, #0, r3, c2, c0, #2 + 0x01, 0x30, 0xb0, 0xe3, // movs r3, #1 + 0x10, 0x3f, 0x03, 0xee, // mcr p15, #0, r3, c3, c0, #0 + 0x0d, 0x32, 0xa0, 0xe3, // mov r3, #-0x30000000 TODO: instead jump to exit code + 0x00, 0x30, 0x83, 0xe5, // str r3, [r3] + (mmuTableAddress & 0xFF) | 0x12, (mmuTableAddress >> 8) & 0xFF, (mmuTableAddress >> 16) & 0xFF, (mmuTableAddress >> 24) & 0xFF, +}; + +// Generated by passing the following code to godbolt: +// Thanks libn3ds +/* + // Invalidate TLB + __asm__ volatile("mcr p15, 0, %0, c8, c7, 0" : : "r"(0)); + __asm__ volatile("dsb"); + + // Get ACR + uint32_t reg; + __asm__ volatile("mrc p15, 0, %0, c1, c0, 1" : "=r"(reg)); + // Enable Return stack, Dynamic branch prediction, Static branch prediction, + // Instruction folding and SMP mode: the CPU is taking part in coherency + reg |= 0x2F; + __asm__ volatile("mcr p15, 0, %0, c1, c0, 1" : : "r"(reg)); + + // Get CR + __asm__ volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(reg)); + // Enable MMU, D-Cache, Program flow prediction, + // I-Cache, high exception vectors, Unaligned data access, + // subpage AP bits disabled + reg |= 0xC03805; + __asm__ volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(reg)); + + // Invalidate both caches + __asm__ volatile("mcr p15, 0, %0, c7, c7, 0" : : "r" (0) : "memory"); + __asm__ volatile("dsb"); + __asm__ volatile("isb"); + + uint32_t* d = (uint32_t*)hypervisorCodeAddress; + *d = hypervisorCodeAddress; +*/ +constexpr u8 mmuCodeAfter[] = { + 0x00, 0x00, 0xb0, 0xe3, // movs r0, #0 + 0x17, 0x0f, 0x08, 0xee, // mcr p15, #0, r0, c8, c7, #0 + 0x4f, 0xf0, 0x7f, 0xf5, // dsb sy + 0x30, 0x3f, 0x11, 0xee, // mrc p15, #0, r3, c1, c0, #1 + 0x2f, 0x30, 0x83, 0xe3, // orr r3, r3, #0x2f + 0x30, 0x3f, 0x01, 0xee, // mcr p15, #0, r3, c1, c0, #1 + 0x10, 0x2f, 0x11, 0xee, // mrc p15, #0, r2, c1, c0, #0 + 0x05, 0x38, 0x03, 0xe3, // movw r3, #0x3805 + 0xc0, 0x30, 0x40, 0xe3, // movt r3, #0xc0 + 0x02, 0x30, 0x93, 0xe1, // orrs r3, r3, r2 + 0x10, 0x3f, 0x01, 0xee, // mcr p15, #0, r3, c1, c0, #0 + 0x17, 0x0f, 0x07, 0xee, // mcr p15, #0, r0, c7, c7, #0 + 0x4f, 0xf0, 0x7f, 0xf5, // dsb sy + 0x6f, 0xf0, 0x7f, 0xf5, // isb sy + 0x0d, 0x32, 0xa0, 0xe3, // mov r3, #-0x30000000 TODO: instead jump to exit code + 0x00, 0x30, 0x83, 0xe5, // str r3, [r3] +}; + +// Store the CPU state and exit the VM, then return from SVC +// Generated from the following ARM32 assembly +/* + push {r0} + ldr r0, GuestStateAddr + 4 + stmfd r0, {r1-r12, sp, lr, pc}^ + pop {r0} + + push {r1} + ldr r1, GuestStateAddr + str r0, [r1] + + // Exit the VM + ldr r1, CodeAddr + str r1, [r1] + + pop {r1} + + CodeAddr: + .word 0xD0000000 + GuestStateAddr: + .word 0xE0200000 +*/ +constexpr u8 svcHandlerCode[] = { +}; + +/// Level 1, page table entry +/// Bits: 31-10 9 8-5 4 3 2 1 0 +/// Value: BADDR IMP Domain SBZ NS PXN 0 1 +/// We don't use domains, so we can set it to 0 +u32 pageTableEntry(u32 level2Address) +{ + // Level 2 tables have 256 entries of 4 bytes each, so they must be aligned to 1KiB + if ((level2Address & 0x3FF) != 0) { + Helpers::panic("level2Address is not aligned to 1KiB"); + } + + return level2Address | 0b1; +} + +u32 level2Entry(u32 physicalAddress, Level2Flags flags) +{ + return (physicalAddress & 0xFFFFF000) | 0b10 | flags; +} + +void mapPageTables(u32 virtualAddress, u32 physicalAddress, u8 pageCount, Level2Flags flags) +{ + if ((virtualAddress & 0xFFFFF000) != 0) { + Helpers::panic("virtualAddress is not aligned to 4KiB"); + } + + if ((physicalAddress & 0xFFFFF000) != 0) { + Helpers::panic("physicalAddress is not aligned to 4KiB"); + } + + for (u32 i = 0; i < pageCount * 4096; i += 4096) + { + u8 level2Index = ((virtualAddress + i) >> 12) & 0xFF; + mmuTables->level2SectionTables[level2Index] = level2Entry(physicalAddress + i, flags); + } + + u32 level2TableAddressVm = mmuTableAddress + offsetof(MmuTables, level2SectionTables); + mmuTables->level1[virtualAddress >> 20] = pageTableEntry(level2TableAddressVm); +} + +CPU::CPU(Memory& mem, Kernel& kernel) +: mem(mem), env(mem, kernel) +{ +} + +void CPU::romLoaded() +{ + NCCH* ncch = mem.getCXI(); + if (!ncch) { + // TODO: what to do here? + Helpers::panic("Alber has decided to panic!"); + } + + // Map the VM exit code which stores all registers to shared hypervisor memory + // and exits the VM by writing to read-only memory. + // We map it at the start of hypervisorCodeAddress. + env.mapHypervisorCode(std::vector(vmExitCode, vmExitCode + sizeof(vmExitCode)), 0); + + printf("Debug: Running pre mmu table code\n"); + env.mapHypervisorCode(std::vector(mmuCodeBefore, mmuCodeBefore + sizeof(mmuCodeBefore)), customEntryOffset); + env.setPC(hypervisorCodeAddress + customEntryOffset); + env.run(); + + const auto& text = ncch->text; + const auto& rodata = ncch->rodata; + const auto& data = ncch->data; + + mmuTables = (MmuTables*)((uintptr_t)env.hypervisorDataRegion + mmuTableOffset); + printf("Debug: level2sectionTables is at %p in host, %08x in guest\n", mmuTables->level2SectionTables, mmuTableAddress + offsetof(MmuTables, level2SectionTables)); + mapPageTables( + text.address, + text.address, + text.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_NoUserModeWrite) + ); + mapPageTables( + rodata.address, + rodata.address, + rodata.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_NoUserModeWrite | + Level2Flags_ExecuteNever) + ); + mapPageTables( + data.address, + data.address, + data.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_FullAccess) + ); + + printf("Debug: Running post mmu table code\n"); + env.mapHypervisorCode(std::vector(mmuCodeAfter, mmuCodeAfter + sizeof(mmuCodeAfter)), customEntryOffset); + env.setPC(hypervisorCodeAddress + customEntryOffset); + env.run(); + printf("Done\n"); +} + +// #endif