From f7534750c271e041f5df098a3fc4540eea633b16 Mon Sep 17 00:00:00 2001 From: NyanCatTW1 <17372086+NyanCatTW1@users.noreply.github.com> Date: Tue, 23 Apr 2024 20:43:26 +0300 Subject: [PATCH] Fix vprintf implementation --- ptx/lib/zluda_ptx_impl.bc | Bin 232076 -> 232076 bytes ptx/lib/zluda_ptx_impl.cpp | 20 +++++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 1edcbd5cc43abb641e28b19d37651f905d1e0986..4e441511ffc37584dc5e8960d13debdc46e39a39 100644 GIT binary patch delta 482 zcmeB~%GWcMZ^M=pM#JWbh&0Xxd<_?R7+D!wcsM2`>M3=6T7PkW+JC8k*}v9* z-2d#qj0lf{D9-|Mo_Uc>8PgbEF>qYKV_pNB7PCV=uasHApTtDYR#lN{0TE7MMt#Dc z#*gjC<>l%t{yc;-fQsA1q@oHk5*M??2^o!rt;a>>!Yg7PLK$hCiyFlm zye-A?XYI8wJ6e4(%;` n&2k=96L?Du-WW_@Fonr=`im({Dr}}47ZOi1Y!{l!RP_}A8^5#e delta 483 zcmeB~%GWcMZ^M=pM&stKDeYTR7`JatVY1h&ztCpc;Z$@mq4J=^O9l=ui2w$N8LAmp z3{1*wA`AjX3Jz_Z9S*1VyZxW_H~Yu>m-~R8pGoN&nLRjQ>jix_{|^+V8>IoUwqrq4_wk%)|_rMS`3jDhd-D zT$qF)47OboCY2YCI8J8sm5OO7U_8XiA+7*nWK46A%3wU8P$@53yE;ez3}XaGk)k*R zSsQvU;GH7y#$W^UAwHpryi+`07`$Lw#6C$bg;|xyn&(Z!^u<${T$vdd7^c6N!lc4x Q!f_$-w83_vsZ3R00U5`uKL7v# diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index ecbe691..a6417f6 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -1,5 +1,5 @@ // Compile and disassemble: -// python3 ./cvt.py > cvt.h && /opt/rocm/llvm/bin/clang -std=c++17 -Xclang -no-opaque-pointers -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -S -emit-llvm --cuda-device-only -nogpulib -O3 -Xclang -fallow-half-arguments-and-returns -o - | sed -e 's/define/define linkonce_odr/g' | sed -e '/@llvm.used/d' | sed -e 's/\"target-cpu\"=\"[^\"]*\"//g' | sed -e 's/\"target-features\"=\"[^\"]*\"//g' | sed -e 's/\"denormal-fp-math-f32\"=\"[^\"]*\"//g' | sed -e 's/!llvm.module.flags = !{!0, !1, !2, !3, !4}/!llvm.module.flags = !{ }/g' | sed -e 's/memory(none)/readnone/g' | sed -e 's/memory(argmem: readwrite, inaccessiblemem: readwrite)/inaccessiblemem_or_argmemonly/g' | sed -e 's/memory(read)/readonly/g' | sed -e 's/memory(argmem: readwrite)/argmemonly/g' | llvm-as-13 -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc +// python3 ./cvt.py > cvt.h && /opt/rocm/llvm/bin/clang -std=c++20 -Xclang -no-opaque-pointers -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -S -emit-llvm --cuda-device-only -nogpulib -O3 -Xclang -fallow-half-arguments-and-returns -o - | sed -e 's/define/define linkonce_odr/g' | sed -e '/@llvm.used/d' | sed -e 's/\"target-cpu\"=\"[^\"]*\"//g' | sed -e 's/\"target-features\"=\"[^\"]*\"//g' | sed -e 's/\"denormal-fp-math-f32\"=\"[^\"]*\"//g' | sed -e 's/!llvm.module.flags = !{!0, !1, !2, !3, !4}/!llvm.module.flags = !{ }/g' | sed -e 's/memory(none)/readnone/g' | sed -e 's/memory(argmem: readwrite, inaccessiblemem: readwrite)/inaccessiblemem_or_argmemonly/g' | sed -e 's/memory(read)/readonly/g' | sed -e 's/memory(argmem: readwrite)/argmemonly/g' | llvm-as-13 -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc // Compile to binary: // /opt/rocm/llvm/bin/clang -x ir -target amdgcn-amd-amdhsa -Xlinker --no-undefined zluda_ptx_impl.bc -mno-wavefrontsize64 -mcpu=gfx1030 // Decompile: @@ -1260,6 +1260,7 @@ extern "C" default: return 0; } + return 2; case 'l': switch (s[1]) { @@ -1289,17 +1290,18 @@ extern "C" case 'X': case 'n': len = 8; - return 2; + break; default: return 0; } + return 3; default: return 0; } + return 2; default: return 0; } - return 1; } __device__ static bool parse_printf_specifier(const char *s, uint8_t &len) @@ -1407,16 +1409,16 @@ extern "C" { s += specifier_with_length; } - if (len > 0) - { - uint64_t value = read_valist(valist_ptr, valist_offset, len); - handle = __ockl_printf_append_args(handle, 1, value, 0, 0, 0, 0, 0, 0, 0); - } + } + + if (len > 0) + { + uint64_t value = read_valist(valist_ptr, valist_offset, len); + handle = __ockl_printf_append_args(handle, 1, value, 0, 0, 0, 0, 0, 0, 0); } } } return (uint32_t)__ockl_printf_append_args(handle, 0, 0, 0, 0, 0, 0, 0, 0, 1); - return 1; } int64_t __ockl_mul_hi_i64(int64_t x, int64_t y) __attribute__((device));